From 22d83bc9f655d5ae7a1b49709c4c1b663725daf5 Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Mon, 3 Oct 2016 16:33:42 -0300 Subject: [PATCH 001/223] Begining of GLES3 renderer: -Most 2D drawing is implemented -Missing shaders -Missing all 3D -Editor needs to be set on update always to be used, otherwise it does not refresh -Large parts of editor not working --- SConstruct | 3 + bin/tests/test_detailer.cpp | 217 - bin/tests/test_detailer.h | 44 - bin/tests/test_gui.cpp | 4 +- bin/tests/test_main.cpp | 34 +- bin/tests/test_misc.cpp | 499 -- bin/tests/test_misc.h | 40 - bin/tests/test_particles.cpp | 121 - bin/tests/test_particles.h | 43 - bin/tests/test_physics.cpp | 28 +- bin/tests/test_physics_2d.cpp | 10 +- bin/tests/test_python.cpp | 56 - bin/tests/test_python.h | 43 - bin/tests/test_render.cpp | 8 +- bin/tests/test_shader_lang.cpp | 18 +- core/globals.cpp | 47 +- core/image.cpp | 1255 ++--- core/image.h | 200 +- core/image_quantize.cpp | 365 -- core/io/marshalls.cpp | 2 +- core/io/resource_format_binary.cpp | 90 +- core/io/resource_format_xml.cpp | 61 +- core/rid.cpp | 18 +- core/rid.h | 226 +- core/variant_call.cpp | 69 +- core/variant_parser.cpp | 78 +- doc/base/classes.xml | 22 +- drivers/SCsub | 2 +- drivers/dds/texture_loader_dds.cpp | 29 +- drivers/etc1/image_etc.cpp | 16 +- drivers/gles2/rasterizer_gles2.cpp | 60 +- drivers/gles2/rasterizer_gles2.h | 2 +- drivers/gles2/shaders/canvas.glsl | 6 +- drivers/gles3/SCsub | 5 + drivers/gles3/rasterizer_canvas_gles3.cpp | 1461 ++++++ drivers/gles3/rasterizer_canvas_gles3.h | 102 + drivers/gles3/rasterizer_gles3.cpp | 266 + drivers/gles3/rasterizer_gles3.h | 38 + drivers/gles3/rasterizer_storage_gles3.cpp | 2045 ++++++++ drivers/gles3/rasterizer_storage_gles3.h | 445 ++ drivers/gles3/shader_gles3.cpp | 754 +++ drivers/gles3/shader_gles3.h | 377 ++ drivers/gles3/shaders/SCsub | 7 + drivers/gles3/shaders/canvas.glsl | 428 ++ drivers/gles3/shaders/canvas_shadow.glsl | 49 + drivers/gles3/shaders/copy.glsl | 52 + drivers/jpegd/image_loader_jpegd.cpp | 4 +- drivers/png/image_loader_png.cpp | 91 +- drivers/png/resource_saver_png.cpp | 16 +- drivers/pvr/texture_loader_pvr.cpp | 38 +- drivers/squish/image_compress_squish.cpp | 18 +- drivers/theora/video_stream_theora.cpp | 10 +- drivers/webp/image_loader_webp.cpp | 10 +- methods.py | 46 +- modules/gridmap/config.py | 2 +- platform/iphone/rasterizer_iphone.cpp | 18 +- platform/iphone/rasterizer_iphone.h | 2 +- platform/osx/export/export.cpp | 2 +- platform/osx/os_osx.mm | 2 +- platform/osx/platform_config.h | 2 + platform/server/os_server.cpp | 10 +- platform/server/os_server.h | 2 +- platform/windows/os_windows.cpp | 4 +- platform/windows/platform_config.h | 1 + platform/x11/context_gl_x11.cpp | 1 + platform/x11/detect.py | 6 +- platform/x11/os_x11.cpp | 19 +- platform/x11/os_x11.h | 4 +- platform/x11/platform_config.h | 1 + scene/2d/canvas_item.cpp | 151 +- scene/2d/canvas_item.h | 38 +- scene/2d/light_2d.cpp | 60 +- scene/2d/light_2d.h | 29 +- scene/2d/sprite.cpp | 3 +- scene/2d/sprite.h | 2 + scene/2d/tile_map.cpp | 15 +- scene/3d/baked_light_instance.cpp | 2 + scene/3d/baked_light_instance.h | 3 +- scene/3d/camera.cpp | 107 +- scene/3d/camera.h | 6 +- scene/3d/light.cpp | 507 +- scene/3d/light.h | 104 +- scene/3d/particles.cpp | 2 + scene/3d/particles.h | 3 +- scene/3d/portal.cpp | 44 +- scene/3d/portal.h | 2 +- scene/3d/position_3d.cpp | 31 - scene/3d/position_3d.h | 1 - scene/3d/quad.cpp | 2 +- scene/3d/room_instance.cpp | 77 +- scene/3d/room_instance.h | 5 +- scene/3d/skeleton.cpp | 47 +- scene/3d/skeleton.h | 1 - scene/3d/spatial_player.cpp | 140 - scene/3d/spatial_player.h | 2 +- scene/3d/visual_instance.cpp | 108 +- scene/3d/visual_instance.h | 21 +- scene/gui/color_picker.cpp | 76 +- scene/gui/patch_9_frame.cpp | 2 +- scene/main/canvas_layer.cpp | 10 + scene/main/canvas_layer.h | 5 + scene/main/node.cpp | 10 +- scene/main/scene_main_loop.cpp | 35 +- scene/main/viewport.cpp | 264 +- scene/main/viewport.h | 74 +- scene/register_scene_types.cpp | 25 +- scene/resources/baked_light.cpp | 5 +- scene/resources/baked_light.h | 5 +- scene/resources/bit_mask.cpp | 7 +- .../default_theme/color_picker_hue.png | Bin 0 -> 132 bytes .../default_theme/color_picker_main.png | Bin 0 -> 310 bytes .../default_theme/color_picker_sample.png | Bin 0 -> 194 bytes .../resources/default_theme/default_theme.cpp | 18 +- scene/resources/default_theme/theme_data.h | 15 + scene/resources/dynamic_font.cpp | 2 +- scene/resources/dynamic_font_stb.cpp | 2 +- scene/resources/environment.cpp | 245 +- scene/resources/environment.h | 97 +- scene/resources/material.cpp | 552 +-- scene/resources/material.h | 188 +- scene/resources/mesh.cpp | 38 +- scene/resources/mesh.h | 5 +- scene/resources/multimesh.cpp | 42 +- scene/resources/multimesh.h | 29 +- scene/resources/room.cpp | 38 +- scene/resources/room.h | 8 +- scene/resources/shader.cpp | 334 +- scene/resources/shader.h | 19 +- scene/resources/shader_graph.cpp | 7 +- scene/resources/shader_graph.h | 4 +- scene/resources/style_box.cpp | 126 +- scene/resources/style_box.h | 30 - scene/resources/texture.cpp | 18 +- scene/resources/texture.h | 2 +- servers/audio/audio_server_sw.h | 4 +- servers/audio/sample_manager_sw.h | 2 +- servers/physics/broad_phase_octree.cpp | 2 +- servers/physics/constraint_sw.h | 2 +- servers/physics/shape_sw.h | 4 +- servers/physics/space_sw.h | 2 +- servers/physics_2d/broad_phase_2d_basic.cpp | 2 +- servers/physics_2d/constraint_2d_sw.h | 2 +- servers/physics_2d/shape_2d_sw.h | 4 +- servers/physics_2d/space_2d_sw.h | 2 +- .../spatial_sound/spatial_sound_server_sw.h | 8 +- .../spatial_sound_2d_server_sw.h | 8 +- servers/visual/particle_system_sw.cpp | 412 -- servers/visual/particle_system_sw.h | 131 - servers/visual/rasterizer.cpp | 19 + servers/visual/rasterizer.h | 760 ++- servers/visual/rasterizer_dummy.cpp | 1961 -------- servers/visual/rasterizer_dummy.h | 791 --- servers/visual/shader_language.cpp | 4322 +++++++++-------- servers/visual/shader_language.h | 389 +- servers/visual/visual_server_canvas.cpp | 1268 +++++ servers/visual/visual_server_canvas.h | 214 + servers/visual/visual_server_global.cpp | 10 + servers/visual/visual_server_global.h | 26 + servers/visual/visual_server_raster.cpp | 1329 ++++- servers/visual/visual_server_raster.h | 596 +-- servers/visual/visual_server_viewport.cpp | 489 ++ servers/visual/visual_server_viewport.h | 118 + servers/visual/visual_server_wrap_mt.cpp | 212 - servers/visual/visual_server_wrap_mt.h | 739 --- servers/visual_server.cpp | 439 +- servers/visual_server.h | 764 +-- tools/editor/code_editor.cpp | 2 +- tools/editor/editor_autoload_settings.cpp | 2 +- tools/editor/editor_help.cpp | 2 +- tools/editor/editor_node.cpp | 35 +- tools/editor/editor_profiler.cpp | 2 +- tools/editor/icons/SCsub | 2 +- .../io_plugins/editor_font_import_plugin.cpp | 10 +- .../io_plugins/editor_import_collada.cpp | 50 +- .../io_plugins/editor_scene_import_plugin.cpp | 58 +- .../editor_scene_importer_fbxconv.cpp | 3 +- .../editor_scene_importer_fbxconv.h | 2 + .../editor_texture_import_plugin.cpp | 72 +- tools/editor/plugins/baked_light_baker.cpp | 11 +- tools/editor/plugins/baked_light_baker.h | 3 + .../plugins/baked_light_editor_plugin.cpp | 5 +- .../plugins/baked_light_editor_plugin.h | 4 +- .../collision_polygon_editor_plugin.cpp | 3 + .../plugins/collision_polygon_editor_plugin.h | 4 +- .../plugins/cube_grid_theme_editor_plugin.cpp | 3 +- .../plugins/cube_grid_theme_editor_plugin.h | 3 +- .../editor/plugins/editor_preview_plugins.cpp | 17 +- tools/editor/plugins/editor_preview_plugins.h | 3 +- .../editor/plugins/material_editor_plugin.cpp | 3 + tools/editor/plugins/material_editor_plugin.h | 3 +- tools/editor/plugins/mesh_editor_plugin.cpp | 2 + tools/editor/plugins/mesh_editor_plugin.h | 3 + .../plugins/particles_2d_editor_plugin.cpp | 4 +- .../plugins/particles_editor_plugin.cpp | 3 + .../editor/plugins/particles_editor_plugin.h | 3 +- tools/editor/plugins/path_editor_plugin.cpp | 2 + tools/editor/plugins/path_editor_plugin.h | 3 +- .../plugins/polygon_2d_editor_plugin.cpp | 2 +- tools/editor/plugins/sample_editor_plugin.cpp | 4 +- .../plugins/sample_library_editor_plugin.cpp | 2 +- tools/editor/plugins/shader_editor_plugin.cpp | 3 +- tools/editor/plugins/shader_editor_plugin.h | 4 +- .../plugins/shader_graph_editor_plugin.cpp | 4 +- .../plugins/shader_graph_editor_plugin.h | 4 +- .../editor/plugins/spatial_editor_plugin.cpp | 86 +- tools/editor/plugins/spatial_editor_plugin.h | 2 +- tools/editor/project_manager.cpp | 4 +- tools/editor/pvrtc_compress.cpp | 2 +- tools/editor/scene_tree_editor.cpp | 2 +- tools/editor/spatial_editor_gizmos.cpp | 3 + tools/editor/spatial_editor_gizmos.h | 4 +- 211 files changed, 15188 insertions(+), 14195 deletions(-) delete mode 100644 bin/tests/test_detailer.cpp delete mode 100644 bin/tests/test_detailer.h delete mode 100644 bin/tests/test_misc.cpp delete mode 100644 bin/tests/test_misc.h delete mode 100644 bin/tests/test_particles.cpp delete mode 100644 bin/tests/test_particles.h delete mode 100644 bin/tests/test_python.cpp delete mode 100644 bin/tests/test_python.h create mode 100644 drivers/gles3/SCsub create mode 100644 drivers/gles3/rasterizer_canvas_gles3.cpp create mode 100644 drivers/gles3/rasterizer_canvas_gles3.h create mode 100644 drivers/gles3/rasterizer_gles3.cpp create mode 100644 drivers/gles3/rasterizer_gles3.h create mode 100644 drivers/gles3/rasterizer_storage_gles3.cpp create mode 100644 drivers/gles3/rasterizer_storage_gles3.h create mode 100644 drivers/gles3/shader_gles3.cpp create mode 100644 drivers/gles3/shader_gles3.h create mode 100644 drivers/gles3/shaders/SCsub create mode 100644 drivers/gles3/shaders/canvas.glsl create mode 100644 drivers/gles3/shaders/canvas_shadow.glsl create mode 100644 drivers/gles3/shaders/copy.glsl create mode 100644 scene/resources/default_theme/color_picker_hue.png create mode 100644 scene/resources/default_theme/color_picker_main.png create mode 100644 scene/resources/default_theme/color_picker_sample.png delete mode 100644 servers/visual/particle_system_sw.cpp delete mode 100644 servers/visual/particle_system_sw.h delete mode 100644 servers/visual/rasterizer_dummy.cpp delete mode 100644 servers/visual/rasterizer_dummy.h create mode 100644 servers/visual/visual_server_canvas.cpp create mode 100644 servers/visual/visual_server_canvas.h create mode 100644 servers/visual/visual_server_global.cpp create mode 100644 servers/visual/visual_server_global.h create mode 100644 servers/visual/visual_server_viewport.cpp create mode 100644 servers/visual/visual_server_viewport.h delete mode 100644 servers/visual/visual_server_wrap_mt.cpp delete mode 100644 servers/visual/visual_server_wrap_mt.h diff --git a/SConstruct b/SConstruct index 06524140884..4fd0330b92f 100644 --- a/SConstruct +++ b/SConstruct @@ -386,6 +386,9 @@ if selected_platform in platform_list: if (env['etc1']=='yes'): env.Append(CPPFLAGS=['-DETC1_ENABLED']) + if (True): # detect GLES3 + env.Append( BUILDERS = { 'GLES3_GLSL' : env.Builder(action = methods.build_gles3_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + Export('env') #build subdirs, the build order is dependent on link order. diff --git a/bin/tests/test_detailer.cpp b/bin/tests/test_detailer.cpp deleted file mode 100644 index 5dba7c3f724..00000000000 --- a/bin/tests/test_detailer.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/*************************************************************************/ -/* test_detailer.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#include "test_detailer.h" -#include "servers/visual_server.h" -#include "os/main_loop.h" -#include "math_funcs.h" -#include "print_string.h" -#include "geometry.h" -#include "quick_hull.h" -namespace TestMultiMesh { - - -class TestMainLoop : public MainLoop { - - RID instance; - RID camera; - RID viewport; - RID light; - RID mesh; - RID scenario; - -#define MULTIMESH_COUNT 1500 - - float ofs_x,ofs_y; - bool quit; -public: - - - virtual void _update_qh() { - - VisualServer *vs=VisualServer::get_singleton(); - Vector vts; -/* - - static const int s = 20; - for(int i=0;i convex_planes = Geometry::build_cylinder_planes(0.5,0.7,4,Vector3::AXIS_Z); - Geometry::MeshData convex_data = Geometry::build_convex_mesh(convex_planes); - vts=convex_data.vertices; - - Geometry::MeshData md; - Error err = QuickHull::build(vts,md); - print_line("ERR: "+itos(err)); - - vs->mesh_remove_surface(mesh,0); - vs->mesh_add_surface_from_mesh_data(mesh,md); - - - - //vs->scenario_set_debug(scenario,VS::SCENARIO_DEBUG_WIREFRAME); - - /* - RID sm = vs->shader_create(); - //vs->shader_set_fragment_code(sm,"OUT_ALPHA=mod(TIME,1);"); - //vs->shader_set_vertex_code(sm,"OUT_VERTEX=IN_VERTEX*mod(TIME,1);"); - vs->shader_set_fragment_code(sm,"OUT_DIFFUSE=vec3(1,0,1);OUT_GLOW=abs(sin(TIME));"); - RID tcmat = vs->mesh_surface_get_material(test_cube,0); - vs->material_set_shader(tcmat,sm); - */ - - } - - virtual void input_event(const InputEvent& p_event) { - - if (p_event.type==InputEvent::MOUSE_MOTION && p_event.mouse_motion.button_mask&4) { - - ofs_x+=p_event.mouse_motion.relative_y/200.0; - ofs_y+=p_event.mouse_motion.relative_x/200.0; - } - if (p_event.type==InputEvent::MOUSE_BUTTON && p_event.mouse_button.pressed && p_event.mouse_button.button_index==1) { - - QuickHull::debug_stop_after++; - _update_qh(); - } - if (p_event.type==InputEvent::MOUSE_BUTTON && p_event.mouse_button.pressed && p_event.mouse_button.button_index==2) { - - if (QuickHull::debug_stop_after>0) - QuickHull::debug_stop_after--; - _update_qh(); - } - - - } - - virtual void request_quit() { - - quit=true; - } - - - - virtual void init() { - - VisualServer *vs=VisualServer::get_singleton(); - - - mesh = vs->mesh_create(); - - scenario = vs->scenario_create(); - - QuickHull::debug_stop_after=0; - _update_qh(); - - instance = vs->instance_create2(mesh,scenario); - - camera = vs->camera_create(); - - - vs->camera_set_perspective( camera, 60.0,0.1, 100.0 ); - viewport = vs->viewport_create(); - vs->viewport_attach_camera( viewport, camera ); - vs->viewport_attach_to_screen(viewport); - vs->viewport_set_scenario( viewport, scenario ); - - vs->camera_set_transform(camera, Transform( Matrix3(), Vector3(0,0,2 ) ) ); - - RID lightaux = vs->light_create( VisualServer::LIGHT_DIRECTIONAL ); - //vs->light_set_color( lightaux, VisualServer::LIGHT_COLOR_AMBIENT, Color(0.3,0.3,0.3) ); - light = vs->instance_create2( lightaux,scenario ); - vs->instance_set_transform(light,Transform(Matrix3(Vector3(0.1,0.4,0.7).normalized(),0.9))); - - ofs_x=0; - ofs_y=0; - quit=false; - } - - virtual bool idle(float p_time) { - return false; - } - - virtual bool iteration(float p_time) { - - VisualServer *vs=VisualServer::get_singleton(); - - Transform tr_camera; - tr_camera.rotate( Vector3(0,1,0), ofs_y ); - tr_camera.rotate( Vector3(1,0,0),ofs_x ); - tr_camera.translate(0,0,10); - - vs->camera_set_transform( camera, tr_camera ); - - return quit; - } - virtual void finish() { - - } - -}; - -MainLoop* test() { - - return memnew(TestMainLoop); - -} - -} diff --git a/bin/tests/test_detailer.h b/bin/tests/test_detailer.h deleted file mode 100644 index 597e088caf2..00000000000 --- a/bin/tests/test_detailer.h +++ /dev/null @@ -1,44 +0,0 @@ -/*************************************************************************/ -/* test_detailer.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#ifndef TEST_MULTIMESH_H -#define TEST_MULTIMESH_H - -/** - @author Juan Linietsky -*/ -#include "os/main_loop.h" - -namespace TestMultiMesh { - -MainLoop* test(); - -} - - -#endif diff --git a/bin/tests/test_gui.cpp b/bin/tests/test_gui.cpp index f4341fd7b3e..bfce03d6638 100644 --- a/bin/tests/test_gui.cpp +++ b/bin/tests/test_gui.cpp @@ -370,7 +370,7 @@ public: tabc->set_size( Point2( 180,250 ) ); - Ref text = memnew( ImageTexture ); + /*Ref text = memnew( ImageTexture ); text->load("test_data/concave.png"); Sprite* sprite = memnew(Sprite); @@ -383,7 +383,7 @@ public: sprite->set_texture(text); sprite->add_child(sprite2); sprite2->set_pos(Point2(50, 50)); - sprite2->show(); + sprite2->show();*/ } diff --git a/bin/tests/test_main.cpp b/bin/tests/test_main.cpp index 363aede96e4..1f7f2d7dda6 100644 --- a/bin/tests/test_main.cpp +++ b/bin/tests/test_main.cpp @@ -37,13 +37,10 @@ #include "test_gui.h" #include "test_render.h" #include "test_sound.h" -#include "test_misc.h" #include "test_physics.h" #include "test_physics_2d.h" -#include "test_python.h" + #include "test_io.h" -#include "test_particles.h" -#include "test_detailer.h" #include "test_shader_lang.h" #include "test_gdscript.h" #include "test_image.h" @@ -56,7 +53,6 @@ const char ** tests_get_names() { "containers", "math", "render", - "particles", "multimesh", "gui", "io", @@ -96,11 +92,6 @@ MainLoop* test_main(String p_test,const List& p_args) { return TestPhysics2D::test(); } - if (p_test=="misc") { - - return TestMisc::test(); - } - if (p_test=="render") { return TestRender::test(); @@ -123,16 +114,6 @@ MainLoop* test_main(String p_test,const List& p_args) { return TestIO::test(); } - if (p_test=="particles") { - - return TestParticles::test(); - } - - if (p_test=="multimesh") { - - return TestMultiMesh::test(); - } - if (p_test=="shaderlang") { return TestShaderLang::test(); @@ -163,19 +144,6 @@ MainLoop* test_main(String p_test,const List& p_args) { return TestImage::test(); } - if (p_test=="detailer") { - - return TestMultiMesh::test(); - } - -#ifdef PYTHON_ENABLED - - if (p_test=="python") { - - return TestPython::test(); - } -#endif - return NULL; } diff --git a/bin/tests/test_misc.cpp b/bin/tests/test_misc.cpp deleted file mode 100644 index 9d7adc35731..00000000000 --- a/bin/tests/test_misc.cpp +++ /dev/null @@ -1,499 +0,0 @@ -/*************************************************************************/ -/* test_misc.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#include "test_misc.h" -#include "servers/visual_server.h" -#include "os/main_loop.h" -#include "math_funcs.h" -#include "print_string.h" - - -namespace TestMisc { - -struct ConvexTestResult -{ - - Vector3 edgeA[2]; - Vector3 edgeB[2]; - bool valid; - Vector3 contactA; - Vector3 contactB; - Vector3 contactNormal; - float depth; - - /* - Vector3 contactA; - Vector3 contactB; - Vector3 contactNormal; - Vector3 contactX; - Vector3 contactY; - Vector3 edgeA[2]; - Vector3 edgeB[2]; - float depth; - bool valid; - bool isEdgeEdge; - bool needTransform; - neBool ComputerEdgeContactPoint(ConvexTestResult & res); - neBool ComputerEdgeContactPoint2(float & au, float & bu); - void Reverse() - { - neSwap(contactA, contactB); - contactNormal *= -1.0f; - }*/ - bool ComputerEdgeContactPoint2(float & au, float & bu); -}; - - - -bool ConvexTestResult::ComputerEdgeContactPoint2(float & au, float & bu) -{ - float d1343, d4321, d1321, d4343, d2121; - float numer, denom; - - Vector3 p13; - Vector3 p43; - Vector3 p21; - Vector3 diff; - - p13 = (edgeA[0]) - (edgeB[0]); - p43 = (edgeB[1]) - (edgeB[0]); - - if ( p43.length_squared() < CMP_EPSILON2 ) - { - valid = false; - goto ComputerEdgeContactPoint2_Exit; - } - - p21 = (edgeA[1]) - (edgeA[0]); - - if ( p21.length_squared()= 1.0f) - { - valid = false; - } - else if (bu < 0.0f || bu >= 1.0f) - { - valid = false; - } - else - { - valid = true; - } - { - Vector3 tmpv; - - tmpv = p21 * au; - contactA = (edgeA[0]) + tmpv; - - tmpv = p43 * bu; - contactB = (edgeB[0]) + tmpv; - } - - diff = contactA - contactB; - - depth = Math::sqrt(diff.dot(diff)); - - return true; - -ComputerEdgeContactPoint2_Exit: - - return false; -} - -struct neCollisionResult { - - float depth; - bool penetrate; - Matrix3 collisionFrame; - Vector3 contactA; - Vector3 contactB; -}; - - -struct TConvex { - - float radius; - float half_height; - float CylinderRadius() const { return radius; } - float CylinderHalfHeight() const { return half_height; } -}; - -float GetDistanceFromLine2(Vector3 v, Vector3 & project, const Vector3 & pointA, const Vector3 & pointB) -{ - Vector3 ba = pointB - pointA; - - float len = ba.length(); - - if (lenCMP_EPSILON) - { - float au, bu; - - cr.ComputerEdgeContactPoint2(au, bu); - - if (cr.valid) - { - float depth = cA.CylinderRadius() + cB.CylinderRadius() - cr.depth; - - if (depth <= 0.0f) - return; - - result.depth = depth; - - result.penetrate = true; - - result.collisionFrame.set_axis(2, (cr.contactA - cr.contactB)*(1.0f / cr.depth)); - - result.contactA = cr.contactA - result.collisionFrame.get_axis(2) * cA.CylinderRadius(); - - result.contactB = cr.contactB + result.collisionFrame.get_axis(2) * cB.CylinderRadius(); - - return; - } - } - result.depth = -1.0e6f; - - int i; - - for (i = 0; i < 2; i++) - { - //project onto edge b - - Vector3 diff = cr.edgeA[i] - cr.edgeB[1]; - - float dot = diff.dot(transB.basis.get_axis(1)); - - if (dot < 0.0f) - { - TestCylinderVertVert(result, cr.edgeA[i], cr.edgeB[1], cA, cB, transA, transB); - } - else if (dot > (2.0f * cB.CylinderHalfHeight())) - { - TestCylinderVertVert(result, cr.edgeA[i], cr.edgeB[0], cA, cB, transA, transB); - } - else - { - TestCylinderVertEdge(result, cr.edgeB[0], cr.edgeB[1], cr.edgeA[i], cB, cA, transB, transA, true); - } - } - for (i = 0; i < 2; i++) - { - //project onto edge b - - Vector3 diff = cr.edgeB[i] - cr.edgeA[1]; - - float dot = diff.dot(transA.basis.get_axis(1)); - - if (dot < 0.0f) - { - TestCylinderVertVert(result, cr.edgeB[i], cr.edgeA[1], cA, cB, transA, transB); - } - else if (dot > (2.0f * cB.CylinderHalfHeight())) - { - TestCylinderVertVert(result, cr.edgeB[i], cr.edgeA[0], cA, cB, transA, transB); - } - else - { - TestCylinderVertEdge(result, cr.edgeA[0], cr.edgeA[1], cr.edgeB[i], cA, cB, transA, transB, false); - } - } -} - - -class TestMainLoop : public MainLoop { - - RID meshA; - RID meshB; - RID poly; - RID instance; - RID camera; - RID viewport; - RID boxA; - RID boxB; - RID scenario; - - Transform rot_a; - Transform rot_b; - - bool quit; -public: - virtual void input_event(const InputEvent& p_event) { - - if (p_event.type==InputEvent::MOUSE_MOTION && p_event.mouse_motion.button_mask&BUTTON_MASK_LEFT) { - - rot_b.origin.y+=-p_event.mouse_motion.relative_y/100.0; - rot_b.origin.x+=p_event.mouse_motion.relative_x/100.0; - } - if (p_event.type==InputEvent::MOUSE_MOTION && p_event.mouse_motion.button_mask&BUTTON_MASK_MIDDLE) { - - //rot_b.origin.x+=-p_event.mouse_motion.relative_y/100.0; - rot_b.origin.z+=p_event.mouse_motion.relative_x/100.0; - } - if (p_event.type==InputEvent::MOUSE_MOTION && p_event.mouse_motion.button_mask&BUTTON_MASK_RIGHT) { - - float rot_x=-p_event.mouse_motion.relative_y/100.0; - float rot_y=p_event.mouse_motion.relative_x/100.0; - rot_b.basis = rot_b.basis * Matrix3(Vector3(1,0,0),rot_x) * Matrix3(Vector3(0,1,0),rot_y); - } - - } - virtual void request_quit() { - - quit=true; - } - virtual void init() { - - VisualServer *vs=VisualServer::get_singleton(); - - camera = vs->camera_create(); - - viewport = vs->viewport_create(); - vs->viewport_attach_to_screen(viewport); - vs->viewport_attach_camera( viewport, camera ); - vs->camera_set_transform(camera, Transform( Matrix3(), Vector3(0,0,3 ) ) ); - - /* CONVEX SHAPE */ - - DVector cylinder_planes = Geometry::build_cylinder_planes(0.5,2,9,Vector3::AXIS_Y); - RID cylinder_material = vs->fixed_material_create(); - vs->fixed_material_set_param( cylinder_material, VisualServer::FIXED_MATERIAL_PARAM_DIFFUSE, Color(0.8,0.2,0.9)); - vs->material_set_flag( cylinder_material, VisualServer::MATERIAL_FLAG_ONTOP,true); - //vs->material_set_flag( cylinder_material, VisualServer::MATERIAL_FLAG_WIREFRAME,true); - vs->material_set_flag( cylinder_material, VisualServer::MATERIAL_FLAG_DOUBLE_SIDED,true); - vs->material_set_flag( cylinder_material, VisualServer::MATERIAL_FLAG_UNSHADED,true); - - RID cylinder_mesh = vs->mesh_create(); - Geometry::MeshData cylinder_data = Geometry::build_convex_mesh(cylinder_planes); - vs->mesh_add_surface_from_mesh_data(cylinder_mesh,cylinder_data); - vs->mesh_surface_set_material( cylinder_mesh, 0, cylinder_material ); - - meshA=vs->instance_create2(cylinder_mesh,scenario); - meshB=vs->instance_create2(cylinder_mesh,scenario); - boxA=vs->instance_create2(vs->get_test_cube(),scenario); - boxB=vs->instance_create2(vs->get_test_cube(),scenario); - - /* - RID lightaux = vs->light_create( VisualServer::LIGHT_OMNI ); - vs->light_set_var( lightaux, VisualServer::LIGHT_VAR_RADIUS, 80 ); - vs->light_set_var( lightaux, VisualServer::LIGHT_VAR_ATTENUATION, 1 ); - vs->light_set_var( lightaux, VisualServer::LIGHT_VAR_ENERGY, 1.5 ); - light = vs->instance_create2( lightaux ); - */ - RID lightaux = vs->light_create( VisualServer::LIGHT_DIRECTIONAL ); - //vs->light_set_color( lightaux, VisualServer::LIGHT_COLOR_AMBIENT, Color(0.0,0.0,0.0) ); - //vs->light_set_shadow( lightaux, true ); - vs->instance_create2( lightaux,scenario ); - - //rot_a=Transform(Matrix3(Vector3(1,0,0),Math_PI/2.0),Vector3()); - rot_b=Transform(Matrix3(),Vector3(2,0,0)); - - //rot_x=0; - //rot_y=0; - quit=false; - } - virtual bool idle(float p_time) { - - VisualServer *vs=VisualServer::get_singleton(); - - vs->instance_set_transform(meshA,rot_a); - vs->instance_set_transform(meshB,rot_b); - - - neCollisionResult res; - TConvex a; - a.radius=0.5; - a.half_height=1; - Cylinder2CylinderTest(res,a,rot_a,a,rot_b); - if (res.penetrate) { - - Matrix3 scale; - scale.scale(Vector3(0.1,0.1,0.1)); - vs->instance_set_transform(boxA,Transform(scale,res.contactA)); - vs->instance_set_transform(boxB,Transform(scale,res.contactB)); - print_line("depth: "+rtos(res.depth)); - } else { - - Matrix3 scale; - scale.scale(Vector3()); - vs->instance_set_transform(boxA,Transform(scale,res.contactA)); - vs->instance_set_transform(boxB,Transform(scale,res.contactB)); - - } - print_line("collided: "+itos(res.penetrate)); - - return false; - } - - - virtual bool iteration(float p_time) { - - - - return quit; - } - virtual void finish() { - - } - -}; - - -MainLoop* test() { - - return memnew( TestMainLoop ); - -} - -} - - - diff --git a/bin/tests/test_misc.h b/bin/tests/test_misc.h deleted file mode 100644 index 55608f6a0e4..00000000000 --- a/bin/tests/test_misc.h +++ /dev/null @@ -1,40 +0,0 @@ -/*************************************************************************/ -/* test_misc.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#ifndef TEST_MISC_H -#define TEST_MISC_H - -#include "os/main_loop.h" - -namespace TestMisc { - -MainLoop* test(); - -} - -#endif diff --git a/bin/tests/test_particles.cpp b/bin/tests/test_particles.cpp deleted file mode 100644 index 23a4b9e6359..00000000000 --- a/bin/tests/test_particles.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/*************************************************************************/ -/* test_particles.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#include "test_particles.h" -#include "servers/visual_server.h" -#include "os/main_loop.h" -#include "math_funcs.h" -#include "print_string.h" - -namespace TestParticles { - - -class TestMainLoop : public MainLoop { - - RID particles; - RID instance; - RID camera; - RID viewport; - RID light; - RID scenario; - - struct InstanceInfo { - - RID instance; - Transform base; - Vector3 rot_axis; - }; - - List instances; - - float ofs; - bool quit; -public: - virtual void input_event(const InputEvent& p_event) { - - - } - virtual void request_quit() { - - quit=true; - } - virtual void init() { - - VisualServer *vs=VisualServer::get_singleton(); - particles = vs->particles_create(); - vs->particles_set_amount(particles,1000); - - instance = vs->instance_create2(particles,scenario); - - - camera = vs->camera_create(); - -// vs->camera_set_perspective( camera, 60.0,0.1, 100.0 ); - viewport = vs->viewport_create(); - vs->viewport_attach_camera( viewport, camera ); - vs->camera_set_transform(camera, Transform( Matrix3(), Vector3(0,0,20 ) ) ); - /* - RID lightaux = vs->light_create( VisualServer::LIGHT_OMNI ); - vs->light_set_var( lightaux, VisualServer::LIGHT_VAR_RADIUS, 80 ); - vs->light_set_var( lightaux, VisualServer::LIGHT_VAR_ATTENUATION, 1 ); - vs->light_set_var( lightaux, VisualServer::LIGHT_VAR_ENERGY, 1.5 ); - light = vs->instance_create2( lightaux ); - */ - RID lightaux = vs->light_create( VisualServer::LIGHT_DIRECTIONAL ); - // vs->light_set_color( lightaux, VisualServer::LIGHT_COLOR_AMBIENT, Color(0.0,0.0,0.0) ); - light = vs->instance_create2( lightaux, scenario ); - - ofs=0; - quit=false; - } - virtual bool idle(float p_time) { - return false; - } - - - virtual bool iteration(float p_time) { - -// VisualServer *vs=VisualServer::get_singleton(); - - ofs+=p_time; - return quit; - } - virtual void finish() { - - } - -}; - - -MainLoop* test() { - - return memnew( TestMainLoop ); - -} - -} diff --git a/bin/tests/test_particles.h b/bin/tests/test_particles.h deleted file mode 100644 index e95637a4e6e..00000000000 --- a/bin/tests/test_particles.h +++ /dev/null @@ -1,43 +0,0 @@ -/*************************************************************************/ -/* test_particles.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#ifndef TEST_PARTICLES_H -#define TEST_PARTICLES_H - -/** - @author Juan Linietsky -*/ -#include "os/main_loop.h" - -namespace TestParticles { - -MainLoop* test(); - -} - -#endif diff --git a/bin/tests/test_physics.cpp b/bin/tests/test_physics.cpp index f202d0dda1b..3a8ca657349 100644 --- a/bin/tests/test_physics.cpp +++ b/bin/tests/test_physics.cpp @@ -138,10 +138,6 @@ protected: /* SPHERE SHAPE */ RID sphere_mesh = vs->make_sphere_mesh(10,20,0.5); - RID sphere_material = vs->fixed_material_create(); - //vs->material_set_flag( sphere_material, VisualServer::MATERIAL_FLAG_WIREFRAME, true ); - vs->fixed_material_set_param( sphere_material, VisualServer::FIXED_MATERIAL_PARAM_DIFFUSE, Color(0.7,0.8,3.0) ); - vs->mesh_surface_set_material( sphere_mesh, 0, sphere_material ); type_mesh_map[PhysicsServer::SHAPE_SPHERE]=sphere_mesh; RID sphere_shape=ps->shape_create(PhysicsServer::SHAPE_SPHERE); @@ -151,12 +147,9 @@ protected: /* BOX SHAPE */ DVector box_planes = Geometry::build_box_planes(Vector3(0.5,0.5,0.5)); - RID box_material = vs->fixed_material_create(); - vs->fixed_material_set_param( box_material, VisualServer::FIXED_MATERIAL_PARAM_DIFFUSE, Color(1.0,0.2,0.2) ); RID box_mesh = vs->mesh_create(); Geometry::MeshData box_data = Geometry::build_convex_mesh(box_planes); vs->mesh_add_surface_from_mesh_data(box_mesh,box_data); - vs->mesh_surface_set_material( box_mesh, 0, box_material ); type_mesh_map[PhysicsServer::SHAPE_BOX]=box_mesh; RID box_shape=ps->shape_create(PhysicsServer::SHAPE_BOX); @@ -167,13 +160,11 @@ protected: /* CAPSULE SHAPE */ DVector capsule_planes = Geometry::build_capsule_planes(0.5,0.7,12,Vector3::AXIS_Z); - RID capsule_material = vs->fixed_material_create(); - vs->fixed_material_set_param( capsule_material, VisualServer::FIXED_MATERIAL_PARAM_DIFFUSE, Color(0.3,0.4,1.0) ); RID capsule_mesh = vs->mesh_create(); Geometry::MeshData capsule_data = Geometry::build_convex_mesh(capsule_planes); vs->mesh_add_surface_from_mesh_data(capsule_mesh,capsule_data); - vs->mesh_surface_set_material( capsule_mesh, 0, capsule_material ); + type_mesh_map[PhysicsServer::SHAPE_CAPSULE]=capsule_mesh; RID capsule_shape=ps->shape_create(PhysicsServer::SHAPE_CAPSULE); @@ -186,14 +177,12 @@ protected: /* CONVEX SHAPE */ DVector convex_planes = Geometry::build_cylinder_planes(0.5,0.7,5,Vector3::AXIS_Z); - RID convex_material = vs->fixed_material_create(); - vs->fixed_material_set_param( convex_material, VisualServer::FIXED_MATERIAL_PARAM_DIFFUSE, Color(0.8,0.2,0.9)); RID convex_mesh = vs->mesh_create(); Geometry::MeshData convex_data = Geometry::build_convex_mesh(convex_planes); QuickHull::build(convex_data.vertices,convex_data); vs->mesh_add_surface_from_mesh_data(convex_mesh,convex_data); - vs->mesh_surface_set_material( convex_mesh, 0, convex_material ); + type_mesh_map[PhysicsServer::SHAPE_CONVEX_POLYGON]=convex_mesh; RID convex_shape=ps->shape_create(PhysicsServer::SHAPE_CONVEX_POLYGON); @@ -223,11 +212,9 @@ protected: d.resize(VS::ARRAY_MAX); d[VS::ARRAY_VERTEX]=p_faces; d[VS::ARRAY_NORMAL]=normals; - vs->mesh_add_surface(trimesh_mesh, VS::PRIMITIVE_TRIANGLES, d ); - RID trimesh_mat = vs->fixed_material_create(); - vs->fixed_material_set_param( trimesh_mat, VisualServer::FIXED_MATERIAL_PARAM_DIFFUSE, Color(1.0,0.5,0.8)); + vs->mesh_add_surface_from_arrays(trimesh_mesh, VS::PRIMITIVE_TRIANGLES, d ); //vs->material_set_flag( trimesh_mat, VisualServer::MATERIAL_FLAG_UNSHADED,true); - vs->mesh_surface_set_material( trimesh_mesh, 0, trimesh_mat ); + RID triins = vs->instance_create2(trimesh_mesh,scenario); @@ -464,7 +451,7 @@ public: } virtual bool iteration(float p_time) { - if (mover) { + if (mover.is_valid()) { static float joy_speed = 10; PhysicsServer * ps = PhysicsServer::get_singleton(); Transform t = ps->body_get_state(mover,PhysicsServer::BODY_STATE_TRANSFORM); @@ -548,15 +535,10 @@ public: DVector capsule_planes = Geometry::build_capsule_planes(0.5,1,12,5,Vector3::AXIS_Y); - RID capsule_material = vs->fixed_material_create(); - - vs->fixed_material_set_param( capsule_material, VisualServer::FIXED_MATERIAL_PARAM_DIFFUSE, Color(1,1,1) ); - RID capsule_mesh = vs->mesh_create(); Geometry::MeshData capsule_data = Geometry::build_convex_mesh(capsule_planes); vs->mesh_add_surface_from_mesh_data(capsule_mesh,capsule_data); - vs->mesh_surface_set_material( capsule_mesh, 0, capsule_material ); type_mesh_map[PhysicsServer::SHAPE_CAPSULE]=capsule_mesh; RID capsule_shape=ps->shape_create(PhysicsServer::SHAPE_CAPSULE); diff --git a/bin/tests/test_physics_2d.cpp b/bin/tests/test_physics_2d.cpp index 845e20b6c38..f369c361d3e 100644 --- a/bin/tests/test_physics_2d.cpp +++ b/bin/tests/test_physics_2d.cpp @@ -85,7 +85,7 @@ class TestPhysics2DMainLoop : public MainLoop { } } - Image image(32,2,0,Image::FORMAT_GRAYSCALE_ALPHA,pixels); + Image image(32,2,0,Image::FORMAT_LA8,pixels); body_shape_data[Physics2DServer::SHAPE_SEGMENT].image=vs->texture_create_from_image(image); @@ -113,7 +113,7 @@ class TestPhysics2DMainLoop : public MainLoop { } } - Image image(32,32,0,Image::FORMAT_GRAYSCALE_ALPHA,pixels); + Image image(32,32,0,Image::FORMAT_LA8,pixels); body_shape_data[Physics2DServer::SHAPE_CIRCLE].image=vs->texture_create_from_image(image); @@ -141,7 +141,7 @@ class TestPhysics2DMainLoop : public MainLoop { } } - Image image(32,32,0,Image::FORMAT_GRAYSCALE_ALPHA,pixels); + Image image(32,32,0,Image::FORMAT_LA8,pixels); body_shape_data[Physics2DServer::SHAPE_RECTANGLE].image=vs->texture_create_from_image(image); @@ -173,7 +173,7 @@ class TestPhysics2DMainLoop : public MainLoop { } } - Image image(32,64,0,Image::FORMAT_GRAYSCALE_ALPHA,pixels); + Image image(32,64,0,Image::FORMAT_LA8,pixels); body_shape_data[Physics2DServer::SHAPE_CAPSULE].image=vs->texture_create_from_image(image); @@ -381,7 +381,7 @@ public: RID vp = vs->viewport_create(); canvas = vs->canvas_create(); vs->viewport_attach_canvas(vp,canvas); - vs->viewport_attach_to_screen(vp); + vs->viewport_attach_to_screen(vp,Rect2(Vector2(),OS::get_singleton()->get_window_size())); Matrix32 smaller; //smaller.scale(Vector2(0.6,0.6)); //smaller.elements[2]=Vector2(100,0); diff --git a/bin/tests/test_python.cpp b/bin/tests/test_python.cpp deleted file mode 100644 index f4a3d7a3a2a..00000000000 --- a/bin/tests/test_python.cpp +++ /dev/null @@ -1,56 +0,0 @@ -/*************************************************************************/ -/* test_python.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#include "test_python.h" - -#ifdef PYTHON_ENABLED - -#include "Python.h" -#include "print_string.h" - -namespace TestPython { - -void test() { - - print_line("testing python"); - PyRun_SimpleString("import engine\n"); - PyRun_SimpleString("def test(self):\n\tprint(\"noway\")\n"); - PyRun_SimpleString("a=engine.ObjectPtr()\n"); - PyRun_SimpleString("a.noway(22,'hello')\n"); - PyRun_SimpleString("a.normalize()\n"); - PyRun_SimpleString("class Moch(engine.ObjectPtr):\n\tdef mooch(self):\n\t\tprint('muchi')\n"); - PyRun_SimpleString("b=Moch();\n"); - PyRun_SimpleString("b.mooch();\n"); - PyRun_SimpleString("b.meis();\n"); - - -} - -} - -#endif diff --git a/bin/tests/test_python.h b/bin/tests/test_python.h deleted file mode 100644 index 77e9603fe27..00000000000 --- a/bin/tests/test_python.h +++ /dev/null @@ -1,43 +0,0 @@ -/*************************************************************************/ -/* test_python.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#ifndef TEST_PYTHON_H -#define TEST_PYTHON_H - -#ifdef PYTHON_ENABLED -/** - @author Juan Linietsky -*/ -namespace TestPython { - -void test(); - -} - -#endif -#endif diff --git a/bin/tests/test_render.cpp b/bin/tests/test_render.cpp index 9c3a287afaa..7bf833c8a72 100644 --- a/bin/tests/test_render.cpp +++ b/bin/tests/test_render.cpp @@ -173,7 +173,7 @@ public: // vs->camera_set_perspective( camera, 60.0,0.1, 100.0 ); viewport = vs->viewport_create(); - vs->viewport_attach_to_screen(viewport); + vs->viewport_attach_to_screen(viewport,Rect2(Vector2(),OS::get_singleton()->get_window_size())); vs->viewport_attach_camera( viewport, camera ); vs->viewport_set_scenario( viewport, scenario ); vs->camera_set_transform(camera, Transform( Matrix3(), Vector3(0,3,30 ) ) ); @@ -192,7 +192,7 @@ public: //* lightaux = vs->light_create( VisualServer::LIGHT_DIRECTIONAL ); //vs->light_set_color( lightaux, VisualServer::LIGHT_COLOR_AMBIENT, Color(0.0,0.0,0.0) ); - vs->light_set_color( lightaux, VisualServer::LIGHT_COLOR_DIFFUSE, Color(1.0,1.0,1.0) ); + vs->light_set_color( lightaux, Color(1.0,1.0,1.0) ); //vs->light_set_shadow( lightaux, true ); light = vs->instance_create2( lightaux, scenario ); Transform lla; @@ -205,8 +205,8 @@ public: //* lightaux = vs->light_create( VisualServer::LIGHT_OMNI ); // vs->light_set_color( lightaux, VisualServer::LIGHT_COLOR_AMBIENT, Color(0.0,0.0,1.0) ); - vs->light_set_color( lightaux, VisualServer::LIGHT_COLOR_DIFFUSE, Color(1.0,1.0,0.0) ); - vs->light_set_param( lightaux, VisualServer::LIGHT_PARAM_RADIUS, 4 ); + vs->light_set_color( lightaux, Color(1.0,1.0,0.0) ); + vs->light_set_param( lightaux, VisualServer::LIGHT_PARAM_RANGE, 4 ); vs->light_set_param( lightaux, VisualServer::LIGHT_PARAM_ENERGY, 8 ); //vs->light_set_shadow( lightaux, true ); //light = vs->instance_create( lightaux ); diff --git a/bin/tests/test_shader_lang.cpp b/bin/tests/test_shader_lang.cpp index 9c0075c47d1..6c8132122ce 100644 --- a/bin/tests/test_shader_lang.cpp +++ b/bin/tests/test_shader_lang.cpp @@ -37,9 +37,9 @@ #include "scene/gui/text_edit.h" #include "print_string.h" #include "servers/visual/shader_language.h" -#include "drivers/gles2/shader_compiler_gles2.h" - +//#include "drivers/gles2/shader_compiler_gles2.h" +#if 0 typedef ShaderLanguage SL; namespace TestShaderLang { @@ -323,7 +323,7 @@ MainLoop* test() { if (!err) { print_line(rcode); } - +#if 0 ShaderCompilerGLES2 comp; String codeline,globalsline; SL::VarInfo vi; @@ -333,6 +333,18 @@ MainLoop* test() { ShaderCompilerGLES2::Flags fl; comp.compile(code,ShaderLanguage::SHADER_MATERIAL_FRAGMENT,codeline,globalsline,fl); +#endif + return NULL; +} + +} +#endif + +typedef ShaderLanguage SL; + +namespace TestShaderLang { + +MainLoop* test() { return NULL; } diff --git a/core/globals.cpp b/core/globals.cpp index b822f52f157..6fe53c74128 100644 --- a/core/globals.cpp +++ b/core/globals.cpp @@ -657,37 +657,37 @@ static Variant _decode_variant(const String& p_string) { String format=params[0].strip_edges(); Image::Format imgformat; - +/* if (format=="grayscale") { - imgformat=Image::FORMAT_GRAYSCALE; + imgformat=Image::FORMAT_L8; } else if (format=="intensity") { imgformat=Image::FORMAT_INTENSITY; } else if (format=="grayscale_alpha") { - imgformat=Image::FORMAT_GRAYSCALE_ALPHA; + imgformat=Image::FORMAT_LA8; } else if (format=="rgb") { - imgformat=Image::FORMAT_RGB; + imgformat=Image::FORMAT_RGB8; } else if (format=="rgba") { - imgformat=Image::FORMAT_RGBA; + imgformat=Image::FORMAT_RGBA8; } else if (format=="indexed") { imgformat=Image::FORMAT_INDEXED; } else if (format=="indexed_alpha") { imgformat=Image::FORMAT_INDEXED_ALPHA; } else if (format=="bc1") { - imgformat=Image::FORMAT_BC1; + imgformat=Image::FORMAT_DXT1; } else if (format=="bc2") { - imgformat=Image::FORMAT_BC2; + imgformat=Image::FORMAT_DXT3; } else if (format=="bc3") { - imgformat=Image::FORMAT_BC3; + imgformat=Image::FORMAT_DXT5; } else if (format=="bc4") { - imgformat=Image::FORMAT_BC4; + imgformat=Image::FORMAT_ATI1; } else if (format=="bc5") { - imgformat=Image::FORMAT_BC5; + imgformat=Image::FORMAT_ATI2; } else if (format=="custom") { imgformat=Image::FORMAT_CUSTOM; } else { ERR_FAIL_V( Image() ); - } + }*/ int mipmaps=params[1].to_int(); int w=params[2].to_int(); @@ -974,26 +974,30 @@ static String _encode_variant(const Variant& p_variant) { if (!img.empty()) { String format; + + /* switch(img.get_format()) { - case Image::FORMAT_GRAYSCALE: format="grayscale"; break; + case Image::FORMAT_L8: format="grayscale"; break; case Image::FORMAT_INTENSITY: format="intensity"; break; - case Image::FORMAT_GRAYSCALE_ALPHA: format="grayscale_alpha"; break; - case Image::FORMAT_RGB: format="rgb"; break; - case Image::FORMAT_RGBA: format="rgba"; break; + case Image::FORMAT_LA8: format="grayscale_alpha"; break; + case Image::FORMAT_RGB8: format="rgb"; break; + case Image::FORMAT_RGBA8: format="rgba"; break; case Image::FORMAT_INDEXED : format="indexed"; break; case Image::FORMAT_INDEXED_ALPHA: format="indexed_alpha"; break; - case Image::FORMAT_BC1: format="bc1"; break; - case Image::FORMAT_BC2: format="bc2"; break; - case Image::FORMAT_BC3: format="bc3"; break; - case Image::FORMAT_BC4: format="bc4"; break; - case Image::FORMAT_BC5: format="bc5"; break; + case Image::FORMAT_DXT1: format="bc1"; break; + case Image::FORMAT_DXT3: format="bc2"; break; + case Image::FORMAT_DXT5: format="bc3"; break; + case Image::FORMAT_ATI1: format="bc4"; break; + case Image::FORMAT_ATI2: format="bc5"; break; case Image::FORMAT_CUSTOM: format="custom custom_size="+itos(img.get_data().size())+""; break; default: {} } + */ + str+=format+", "; - str+=itos(img.get_mipmaps())+", "; + str+=itos(img.has_mipmaps())+", "; str+=itos(img.get_width())+", "; str+=itos(img.get_height())+", "; DVector data = img.get_data(); @@ -1399,6 +1403,7 @@ void Globals::set_custom_property_info(const String& p_prop,const PropertyInfo& ERR_FAIL_COND(!props.has(p_prop)); custom_prop_info[p_prop]=p_info; + custom_prop_info[p_prop].name=p_prop; } diff --git a/core/image.cpp b/core/image.cpp index 90051d7d0d4..73a2ab80132 100644 --- a/core/image.cpp +++ b/core/image.cpp @@ -36,86 +36,186 @@ const char* Image::format_names[Image::FORMAT_MAX]={ - "Grayscale", - "Intensity", - "GrayscaleAlpha", - "RGB", - "RGBA", - "Indexed", - "IndexedAlpha", - "YUV422", - "YUV444", - "BC1", - "BC2", - "BC3", - "BC4", - "BC5", - "PVRTC2", - "PVRTC2Alpha", + "Lum8", //luminance + "LumAlpha8", //luminance-alpha + "Red8", + "RedGreen", + "RGB8", + "RGBA8", + "RGB565", //16 bit + "RGBA4444", + "RGBA5551", + "RFloat", //float + "RGFloat", + "RGBFloat", + "RGBAFloat", + "RHalf", //half float + "RGHalf", + "RGBHalf", + "RGBAHalf", + "DXT1", //s3tc + "DXT3", + "DXT5", + "ATI1", + "ATI2", + "BPTC_RGBA", + "BPTC_RGBF", + "BPTC_RGBFU", + "PVRTC2", //pvrtc + "PVRTC2A", "PVRTC4", - "PVRTC4Alpha", - "ETC", - "ATC", - "ATCAlphaExp", - "ATCAlphaInterp", + "PVRTC4A", + "ETC", //etc1 + "ETC2_R11", //etc2 + "ETC2_R11S", //signed", NOT srgb. + "ETC2_RG11", + "ETC2_RG11S", + "ETC2_RGB8", + "ETC2_RGBA8", + "ETC2_RGB8A1", }; SavePNGFunc Image::save_png_func = NULL; -void Image::_put_pixel(int p_x,int p_y, const BColor& p_color, unsigned char *p_data) { - _put_pixelw(p_x,p_y,width,p_color,p_data); +void Image::_put_pixelb(int p_x,int p_y, uint32_t p_pixelsize,uint8_t *p_dst,const uint8_t *p_src) { + + uint32_t ofs=(p_y*width+p_x)*p_pixelsize; + + for(uint32_t i=0;i::Write wp = data.write(); - unsigned char *data_ptr=wp.ptr(); - - _put_pixelw(p_x,p_y,w,BColor(p_color.r*255,p_color.g*255,p_color.b*255,p_color.a*255),&data_ptr[ofs]); - -} - - -Image::BColor Image::_get_pixel(int p_x,int p_y,const unsigned char *p_data,int p_data_size) const{ - - return _get_pixelw(p_x,p_y,width,p_data,p_data_size); -} -Image::BColor Image::_get_pixelw(int p_x,int p_y,int p_width,const unsigned char *p_data,int p_data_size) const{ - - int ofs=p_y*p_width+p_x; - BColor result(0,0,0,0); - switch(format) { - - case FORMAT_GRAYSCALE: { - - result=BColor(p_data[ofs],p_data[ofs],p_data[ofs],255.0); - } break; - case FORMAT_INTENSITY: { - - result=BColor(255,255,255,p_data[ofs]); - } break; - case FORMAT_GRAYSCALE_ALPHA: { - - result=BColor(p_data[ofs*2],p_data[ofs*2],p_data[ofs*2],p_data[ofs*2+1]); - - } break; - case FORMAT_RGB: { - - result=BColor(p_data[ofs*3],p_data[ofs*3+1],p_data[ofs*3+2]); - - } break; - case FORMAT_RGBA: { - - result=BColor(p_data[ofs*4],p_data[ofs*4+1],p_data[ofs*4+2],p_data[ofs*4+3]); - } break; - case FORMAT_INDEXED_ALPHA: { - - int pitch = 4; - const uint8_t* pal = &p_data[ p_data_size - pitch * 256 ]; - int idx = p_data[ofs]; - result=BColor(pal[idx * pitch + 0] , pal[idx * pitch + 1] , pal[idx * pitch + 2] , pal[idx * pitch + 3] ); - - } break; - case FORMAT_INDEXED: { - - int pitch = 3; - const uint8_t* pal = &p_data[ p_data_size - pitch * 256 ]; - int idx = p_data[ofs]; - result=BColor(pal[idx * pitch + 0] , pal[idx * pitch + 1] , pal[idx * pitch + 2] ,255); - } break; - case FORMAT_YUV_422: { - - int y, u, v; - if (p_x % 2) { - const uint8_t* yp = &p_data[p_width * 2 * p_y + p_x * 2]; - u = *(yp-1); - y = yp[0]; - v = yp[1]; - } else { - - const uint8_t* yp = &p_data[p_width * 2 * p_y + p_x * 2]; - y = yp[0]; - u = yp[1]; - v = yp[3]; - }; - - int32_t r = 1.164 * (y - 16) + 1.596 * (v - 128); - int32_t g = 1.164 * (y - 16) - 0.813 * (v - 128) - 0.391 * (u - 128); - int32_t b = 1.164 * (y - 16) + 2.018 * (u - 128); - result = BColor(CLAMP(r, 0, 255), CLAMP(g, 0, 255), CLAMP(b, 0, 255)); - } break; - case FORMAT_YUV_444: { - - uint8_t y, u, v; - const uint8_t* yp = &p_data[p_width * 3 * p_y + p_x * 3]; - y = yp[0]; - u = yp[1]; - v = yp[2]; - - int32_t r = 1.164 * (y - 16) + 1.596 * (v - 128); - int32_t g = 1.164 * (y - 16) - 0.813 * (v - 128) - 0.391 * (u - 128); - int32_t b = 1.164 * (y - 16) + 2.018 * (u - 128); - result = BColor(CLAMP(r, 0, 255), CLAMP(g, 0, 255), CLAMP(b, 0, 255)); - } break; - default:{} - - } - - return result; - -} - -void Image::put_indexed_pixel(int p_x, int p_y, uint8_t p_idx,int p_mipmap) { - - ERR_FAIL_COND(format != FORMAT_INDEXED && format != FORMAT_INDEXED_ALPHA); - ERR_FAIL_INDEX(p_mipmap,mipmaps+1); - int ofs,w,h; - _get_mipmap_offset_and_size(p_mipmap,ofs,w,h); - ERR_FAIL_INDEX(p_x,w); - ERR_FAIL_INDEX(p_y,h); - - data.set(ofs + p_y * w + p_x, p_idx); -}; - -uint8_t Image::get_indexed_pixel(int p_x, int p_y,int p_mipmap) const { - - ERR_FAIL_COND_V(format != FORMAT_INDEXED && format != FORMAT_INDEXED_ALPHA, 0); - - ERR_FAIL_INDEX_V(p_mipmap,mipmaps+1,0); - int ofs,w,h; - _get_mipmap_offset_and_size(p_mipmap,ofs,w,h); - ERR_FAIL_INDEX_V(p_x,w,0); - ERR_FAIL_INDEX_V(p_y,h,0); - - - return data[ofs + p_y * w + p_x]; -}; - -void Image::set_pallete(const DVector& p_data) { - - - int len = p_data.size(); - - ERR_FAIL_COND(format != FORMAT_INDEXED && format != FORMAT_INDEXED_ALPHA); - ERR_FAIL_COND(format == FORMAT_INDEXED && len!=(256*3)); - ERR_FAIL_COND(format == FORMAT_INDEXED_ALPHA && len!=(256*4)); - - int ofs,w,h; - _get_mipmap_offset_and_size(mipmaps+1,ofs,w,h); - - int pal_ofs = ofs; - data.resize(pal_ofs + p_data.size()); - - DVector::Write wp = data.write(); - unsigned char *dst=wp.ptr() + pal_ofs; - - DVector::Read r = p_data.read(); - const unsigned char *src=r.ptr(); - - copymem(dst, src, len); -}; - int Image::get_width() const { return width; } + int Image::get_height() const{ return height; } -int Image::get_mipmaps() const { - +bool Image::has_mipmaps() const { return mipmaps; } -Color Image::get_pixel(int p_x,int p_y,int p_mipmap) const { +int Image::get_mipmap_count() const { + + if (mipmaps) + return get_image_required_mipmaps(width,height,format); + else + return 0; +} - ERR_FAIL_INDEX_V(p_mipmap,mipmaps+1,Color()); - int ofs,w,h; - _get_mipmap_offset_and_size(p_mipmap,ofs,w,h); - ERR_FAIL_INDEX_V(p_x,w,Color()); - ERR_FAIL_INDEX_V(p_y,h,Color()); +//using template generates perfectly optimized code due to constant expression reduction and unused variable removal present in all compilers +template +static void _convert( int p_width,int p_height,const uint8_t* p_src,uint8_t* p_dst ){ - int len = data.size(); - DVector::Read r = data.read(); - const unsigned char*data_ptr=r.ptr(); - BColor c = _get_pixelw(p_x,p_y,w,&data_ptr[ofs],len); - return Color( c.r/255.0,c.g/255.0,c.b/255.0,c.a/255.0 ); + + for(int y=0;y=FORMAT_BC1 || p_new_format>=FORMAT_BC1) { - ERR_EXPLAIN("Cannot convert to <-> from compressed/custom image formats (for now)."); + if (format>=FORMAT_RGB565 || p_new_format>=FORMAT_RGB565) { + + ERR_EXPLAIN("Cannot convert to <-> from non byte formats."); ERR_FAIL(); } - if (p_new_format==FORMAT_INDEXED || p_new_format==FORMAT_INDEXED_ALPHA) { - - - return; - } - Image new_img(width,height,0,p_new_format); - int len=data.size(); +// int len=data.size(); DVector::Read r = data.read(); DVector::Write w = new_img.data.write(); @@ -391,35 +371,56 @@ void Image::convert( Format p_new_format ){ const uint8_t *rptr = r.ptr(); uint8_t *wptr = w.ptr(); - if (p_new_format==FORMAT_RGBA && format==FORMAT_INDEXED_ALPHA) { + int conversion_type = format | p_new_format<<8; - //optimized unquantized form - int dataend = len-256*4; - const uint32_t *palpos = (const uint32_t*)&rptr[dataend]; - uint32_t *dst32 = (uint32_t *)wptr; + switch(conversion_type) { - for(int i=0;i( width, height,rptr, wptr ); break; + case FORMAT_L8|(FORMAT_R8<<8): _convert<1,false,1,false,true,false>( width, height,rptr, wptr ); break; + case FORMAT_L8|(FORMAT_RG8<<8): _convert<1,false,2,false,true,false>( width, height,rptr, wptr ); break; + case FORMAT_L8|(FORMAT_RGB8<<8): _convert<1,false,3,false,true,false>( width, height,rptr, wptr ); break; + case FORMAT_L8|(FORMAT_RGBA8<<8): _convert<1,false,3,true,true,false>( width, height,rptr, wptr ); break; + case FORMAT_LA8|(FORMAT_L8<<8): _convert<1,true,1,false,true,true>( width, height,rptr, wptr ); break; + case FORMAT_LA8|(FORMAT_R8<<8): _convert<1,true,1,false,true,false>( width, height,rptr, wptr ); break; + case FORMAT_LA8|(FORMAT_RG8<<8): _convert<1,true,2,false,true,false>( width, height,rptr, wptr ); break; + case FORMAT_LA8|(FORMAT_RGB8<<8): _convert<1,true,3,false,true,false>( width, height,rptr, wptr ); break; + case FORMAT_LA8|(FORMAT_RGBA8<<8): _convert<1,true,3,true,true,false>( width, height,rptr, wptr ); break; + case FORMAT_R8|(FORMAT_L8<<8): _convert<1,false,1,false,false,true>( width, height,rptr, wptr ); break; + case FORMAT_R8|(FORMAT_LA8<<8): _convert<1,false,1,true,false,true>( width, height,rptr, wptr ); break; + case FORMAT_R8|(FORMAT_RG8<<8): _convert<1,false,2,false,false,false>( width, height,rptr, wptr ); break; + case FORMAT_R8|(FORMAT_RGB8<<8): _convert<1,false,3,false,false,false>( width, height,rptr, wptr ); break; + case FORMAT_R8|(FORMAT_RGBA8<<8): _convert<1,false,3,true,false,false>( width, height,rptr, wptr ); break; + case FORMAT_RG8|(FORMAT_L8<<8): _convert<1,false,1,false,false,true>( width, height,rptr, wptr ); break; + case FORMAT_RG8|(FORMAT_LA8<<8): _convert<1,false,1,true,false,true>( width, height,rptr, wptr ); break; + case FORMAT_RG8|(FORMAT_R8<<8): _convert<1,false,1,false,false,false>( width, height,rptr, wptr ); break; + case FORMAT_RG8|(FORMAT_RGB8<<8): _convert<1,false,3,false,false,false>( width, height,rptr, wptr ); break; + case FORMAT_RG8|(FORMAT_RGBA8<<8): _convert<1,false,3,true,false,false>( width, height,rptr, wptr ); break; + case FORMAT_RGB8|(FORMAT_L8<<8): _convert<2,false,1,false,false,true>( width, height,rptr, wptr ); break; + case FORMAT_RGB8|(FORMAT_LA8<<8): _convert<2,false,1,true,false,true>( width, height,rptr, wptr ); break; + case FORMAT_RGB8|(FORMAT_R8<<8): _convert<2,false,1,false,false,false>( width, height,rptr, wptr ); break; + case FORMAT_RGB8|(FORMAT_RG8<<8): _convert<2,false,2,false,false,false>( width, height,rptr, wptr ); break; + case FORMAT_RGB8|(FORMAT_RGBA8<<8): _convert<2,false,3,true,false,false>( width, height,rptr, wptr ); break; + case FORMAT_RGBA8|(FORMAT_L8<<8): _convert<3,true,1,false,false,true>( width, height,rptr, wptr ); break; + case FORMAT_RGBA8|(FORMAT_LA8<<8): _convert<3,true,1,true,false,true>( width, height,rptr, wptr ); break; + case FORMAT_RGBA8|(FORMAT_R8<<8): _convert<3,true,1,false,false,false>( width, height,rptr, wptr ); break; + case FORMAT_RGBA8|(FORMAT_RG8<<8): _convert<3,true,2,false,false,false>( width, height,rptr, wptr ); break; + case FORMAT_RGBA8|(FORMAT_RGB8<<8): _convert<3,true,3,true,false,false>( width, height,rptr, wptr ); break; - } else { - - //this is temporary, must find a faster way to do it. - for(int i=0;i::Read(); w = DVector::Write(); - bool gen_mipmaps=mipmaps>0; + bool gen_mipmaps=mipmaps; + + mipmaps=false; *this=new_img; if (gen_mipmaps) generate_mipmaps(); - } Image::Format Image::get_format() const{ @@ -460,13 +461,13 @@ static void _scale_cubic(const uint8_t* p_src, uint8_t* p_dst, uint32_t p_src_wi int xmax = width - 1; // temporary pointer - for ( int y = 0; y < p_dst_height; y++ ) { + for ( uint32_t y = 0; y < p_dst_height; y++ ) { // Y coordinates oy = (double) y * yfac - 0.5f; oy1 = (int) oy; dy = oy - (double) oy1; - for ( int x = 0; x < p_dst_width; x++ ) { + for ( uint32_t x = 0; x < p_dst_width; x++ ) { // X coordinates ox = (double) x * xfac - 0.5f; ox1 = (int) ox; @@ -650,10 +651,6 @@ void Image::resize( int p_width, int p_height, Interpolation p_interpolation ) { Image dst( p_width, p_height, 0, format ); - if (format==FORMAT_INDEXED) - p_interpolation=INTERPOLATE_NEAREST; - - DVector::Read r = data.read(); const unsigned char*r_ptr=r.ptr(); @@ -722,18 +719,33 @@ void Image::crop( int p_width, int p_height ) { if (p_width==width && p_height==height) return; + uint8_t pdata[16]; //largest is 16 + uint32_t pixel_size = get_format_pixel_size(format); + Image dst( p_width, p_height,0, format ); + { + DVector::Read r = data.read(); + DVector::Write w = dst.data.write(); - for (int y=0;y=width || y>=height)? Color() : get_pixel(x,y); - dst.put_pixel(x,y,col); + if ((x>=width || y>=height)) { + for(uint32_t i=0;i0) dst.generate_mipmaps(); *this=dst; @@ -754,18 +766,28 @@ void Image::flip_y() { + { + DVector::Write w = data.write(); + uint8_t up[16]; + uint8_t down[16]; + uint32_t pixel_size = get_format_pixel_size(format); - for (int y=0;y<(height/2);y++) { + for (int y=0;y::Write w = data.write(); + uint8_t up[16]; + uint8_t down[16]; + uint32_t pixel_size = get_format_pixel_size(format); - Color up = get_pixel(x,y); - Color down = get_pixel(width-x-1,y); + for (int y=0;y @@ -903,16 +914,16 @@ static void _generate_po2_mipmap(const uint8_t* p_src, uint8_t* p_dst, uint32_t void Image::expand_x2_hq2x() { - ERR_FAIL_COND(format>=FORMAT_INDEXED); + ERR_FAIL_COND(!_can_modify(format)); Format current = format; - bool mipmaps=get_mipmaps(); - if (mipmaps) { + bool mm=has_mipmaps(); + if (mm) { clear_mipmaps(); } - if (current!=FORMAT_RGBA) - convert(FORMAT_RGBA); + if (current!=FORMAT_RGBA8) + convert(FORMAT_RGBA8); DVector dest; dest.resize(width*2*height*2*4); @@ -930,7 +941,7 @@ void Image::expand_x2_hq2x() { data=dest; - if (current!=FORMAT_RGBA) + if (current!=FORMAT_RGBA8) convert(current); if (mipmaps) { @@ -941,7 +952,6 @@ void Image::expand_x2_hq2x() { void Image::shrink_x2() { - ERR_FAIL_COND(format==FORMAT_INDEXED || format==FORMAT_INDEXED_ALPHA); ERR_FAIL_COND( data.size()==0 ); @@ -964,7 +974,6 @@ void Image::shrink_x2() { copymem(w.ptr(),&r[ofs],new_size); } - mipmaps--; width/=2; height/=2; data=new_img; @@ -973,7 +982,7 @@ void Image::shrink_x2() { DVector new_img; - ERR_FAIL_COND( format>=FORMAT_INDEXED ); + ERR_FAIL_COND( !_can_modify(format) ); int ps = get_format_pixel_size(format); new_img.resize((width/2)*(height/2)*ps); @@ -983,11 +992,12 @@ void Image::shrink_x2() { switch(format) { - case FORMAT_GRAYSCALE: - case FORMAT_INTENSITY: _generate_po2_mipmap<1>(r.ptr(), w.ptr(), width,height); break; - case FORMAT_GRAYSCALE_ALPHA: _generate_po2_mipmap<2>(r.ptr(), w.ptr(), width,height); break; - case FORMAT_RGB: _generate_po2_mipmap<3>(r.ptr(), w.ptr(), width,height); break; - case FORMAT_RGBA: _generate_po2_mipmap<4>(r.ptr(), w.ptr(), width,height); break; + case FORMAT_L8: + case FORMAT_R8: _generate_po2_mipmap<1>(r.ptr(), w.ptr(), width,height); break; + case FORMAT_LA8: _generate_po2_mipmap<2>(r.ptr(), w.ptr(), width,height); break; + case FORMAT_RG8: _generate_po2_mipmap<2>(r.ptr(), w.ptr(), width,height); break; + case FORMAT_RGB8: _generate_po2_mipmap<3>(r.ptr(), w.ptr(), width,height); break; + case FORMAT_RGBA8: _generate_po2_mipmap<4>(r.ptr(), w.ptr(), width,height); break; default: {} } } @@ -999,7 +1009,7 @@ void Image::shrink_x2() { } } -Error Image::generate_mipmaps(int p_mipmaps,bool p_keep_existing) { +Error Image::generate_mipmaps(bool p_keep_existing) { if (!_can_modify(format)) { ERR_EXPLAIN("Cannot generate mipmaps in indexed, compressed or custom image formats."); @@ -1007,11 +1017,13 @@ Error Image::generate_mipmaps(int p_mipmaps,bool p_keep_existing) { } + int mmcount = get_mipmap_count(); + int from_mm=1; if (p_keep_existing) { - from_mm=mipmaps+1; + from_mm=mmcount+1; } - int size = _get_dst_image_size(width,height,format,mipmaps,p_mipmaps); + int size = _get_dst_image_size(width,height,format,mmcount); data.resize(size); @@ -1023,7 +1035,7 @@ Error Image::generate_mipmaps(int p_mipmaps,bool p_keep_existing) { int prev_h=height; int prev_w=width; - for(int i=1;i(&wp[prev_ofs], &wp[ofs], prev_w,prev_h); break; - case FORMAT_GRAYSCALE_ALPHA: _generate_po2_mipmap<2>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h); break; - case FORMAT_RGB: _generate_po2_mipmap<3>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h); break; - case FORMAT_RGBA: _generate_po2_mipmap<4>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h); break; + case FORMAT_L8: + case FORMAT_R8: _generate_po2_mipmap<1>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h); break; + case FORMAT_LA8: + case FORMAT_RG8: _generate_po2_mipmap<2>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h); break; + case FORMAT_RGB8: _generate_po2_mipmap<3>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h); break; + case FORMAT_RGBA8: _generate_po2_mipmap<4>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h); break; default: {} } } @@ -1056,7 +1069,7 @@ Error Image::generate_mipmaps(int p_mipmaps,bool p_keep_existing) { int prev_h=height; int prev_w=width; - for(int i=1;i(&wp[prev_ofs], &wp[ofs], prev_w,prev_h,w,h); break; - case FORMAT_GRAYSCALE_ALPHA: _scale_bilinear<2>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h,w,h); break; - case FORMAT_RGB: _scale_bilinear<3>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h,w,h); break; - case FORMAT_RGBA: _scale_bilinear<4>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h,w,h); break; + case FORMAT_L8: + case FORMAT_R8: _scale_bilinear<1>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h,w,h); break; + case FORMAT_LA8: + case FORMAT_RG8: _scale_bilinear<2>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h,w,h); break; + case FORMAT_RGB8:_scale_bilinear<3>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h,w,h); break; + case FORMAT_RGBA8: _scale_bilinear<4>(&wp[prev_ofs], &wp[ofs], prev_w,prev_h,w,h); break; default: {} } } @@ -1083,85 +1097,25 @@ Error Image::generate_mipmaps(int p_mipmaps,bool p_keep_existing) { } - - return OK; } void Image::clear_mipmaps() { - if (mipmaps==0) + if (!mipmaps) return; - if (format==FORMAT_CUSTOM) { - ERR_EXPLAIN("Cannot clear mipmaps in indexed, compressed or custom image formats."); - ERR_FAIL(); - - } - if (empty()) return; int ofs,w,h; _get_mipmap_offset_and_size(1,ofs,w,h); - int palsize = get_format_pallete_size(format); - DVector pallete; - ERR_FAIL_COND(ofs+palsize > data.size()); //bug? - if (palsize) { + data.resize(ofs); - pallete.resize(palsize); - DVector::Read r = data.read(); - DVector::Write w = pallete.write(); - - copymem(&w[0],&r[data.size()-palsize],palsize); - } - - data.resize(ofs+palsize); - - if (palsize) { - - DVector::Read r = pallete.read(); - DVector::Write w = data.write(); - - copymem(&w[ofs],&r[0],palsize); - } - - mipmaps=0; + mipmaps=false; } -void Image::make_normalmap(float p_height_scale) { - - if (!_can_modify(format)) { - ERR_EXPLAIN("Cannot crop in indexed, compressed or custom image formats."); - ERR_FAIL(); - } - - ERR_FAIL_COND( empty() ); - - Image normalmap(width,height,0, FORMAT_RGB); - /* - for (int y=0;y0)?get_pixel(x,y-1).gray()/255.0:center; - float down=(y<(height-1))?get_pixel(x,y+1).gray()/255.0:center; - float left=(x>0)?get_pixel(x-1,y).gray()/255.0:center; - float right=(x<(width-1))?get_pixel(x+1,y).gray()/255.0:center; - - - // uhm, how do i do this? .... - - Color result( (uint8_t)((normal.x+1.0)*127.0), (uint8_t)((normal.y+1.0)*127.0), (uint8_t)((normal.z+1.0)*127.0) ); - - normalmap.put_pixel( x, y, result ); - } - - } - */ - *this=normalmap; -} bool Image::empty() const { @@ -1186,32 +1140,30 @@ void Image::create(int p_width, int p_height, bool p_use_mipmaps,Format p_format width=p_width; height=p_height; - mipmaps=mm; + mipmaps=p_use_mipmaps; format=p_format; } -void Image::create(int p_width, int p_height, int p_mipmaps, Format p_format, const DVector& p_data) { +void Image::create(int p_width, int p_height, bool p_use_mipmaps, Format p_format, const DVector& p_data) { ERR_FAIL_INDEX(p_width-1,MAX_WIDTH); ERR_FAIL_INDEX(p_height-1,MAX_HEIGHT); - if (p_format < FORMAT_CUSTOM) { - int mm; - int size = _get_dst_image_size(p_width,p_height,p_format,mm,p_mipmaps); + int mm; + int size = _get_dst_image_size(p_width,p_height,p_format,mm,p_use_mipmaps); - if (size!=p_data.size()) { - ERR_EXPLAIN("Expected data size of "+itos(size)+" in Image::create()"); - ERR_FAIL_COND(p_data.size()!=size); - } - }; + if (size!=p_data.size()) { + ERR_EXPLAIN("Expected data size of "+itos(size)+" in Image::create()"); + ERR_FAIL_COND(p_data.size()!=size); + } height=p_height; width=p_width; format=p_format; data=p_data; - mipmaps=p_mipmaps; + mipmaps=p_use_mipmaps; } @@ -1220,7 +1172,7 @@ void Image::create( const char ** p_xpm ) { int size_width,size_height; int pixelchars=0; - mipmaps=0; + mipmaps=false; bool has_alpha=false; enum Status { @@ -1235,6 +1187,8 @@ void Image::create( const char ** p_xpm ) { HashMap colormap; int colormap_size; + uint32_t pixel_size; + DVector::Write w; while (status!=DONE) { @@ -1327,7 +1281,9 @@ void Image::create( const char ** p_xpm ) { if (line==colormap_size) { status=READING_PIXELS; - create(size_width,size_height,0,has_alpha?FORMAT_RGBA:FORMAT_RGB); + create(size_width,size_height,0,has_alpha?FORMAT_RGBA8:FORMAT_RGB8); + w=data.write(); + pixel_size=has_alpha?4:3; } } break; case READING_PIXELS: { @@ -1341,7 +1297,11 @@ void Image::create( const char ** p_xpm ) { Color *colorptr = colormap.getptr(pixelstr); ERR_FAIL_COND(!colorptr); - put_pixel(x,y,*colorptr); + uint8_t pixel[4]; + for(uint32_t i=0;i= FORMAT_YUV_422 && format <= FORMAT_YUV_444) - return false; int w,h; _get_mipmap_offset_and_size(1,len,w,h); @@ -1404,13 +1361,8 @@ bool Image::is_invisible() const { bool detected=false; switch(format) { - case FORMAT_INTENSITY: { - for(int i=0;i>1);i++) { @@ -1418,25 +1370,18 @@ bool Image::is_invisible() const { } } break; - case FORMAT_RGBA: { + case FORMAT_RGBA8: { for(int i=0;i<(len>>2);i++) { DETECT_NON_ALPHA(data_ptr[(i<<2)+3]) } } break; - case FORMAT_INDEXED: { - return false; - } break; - case FORMAT_INDEXED_ALPHA: { - - return false; - } break; - case FORMAT_PVRTC2_ALPHA: - case FORMAT_PVRTC4_ALPHA: - case FORMAT_BC2: - case FORMAT_BC3: { + case FORMAT_PVRTC2A: + case FORMAT_PVRTC4A: + case FORMAT_DXT3: + case FORMAT_DXT5: { detected=true; } break; default: {} @@ -1447,19 +1392,12 @@ bool Image::is_invisible() const { Image::AlphaMode Image::detect_alpha() const { - if (format==FORMAT_GRAYSCALE || - format==FORMAT_RGB || - format==FORMAT_INDEXED) - return ALPHA_NONE; int len = data.size(); if (len==0) return ALPHA_NONE; - if (format >= FORMAT_YUV_422 && format <= FORMAT_YUV_444) - return ALPHA_NONE; - int w,h; _get_mipmap_offset_and_size(1,len,w,h); @@ -1470,13 +1408,8 @@ Image::AlphaMode Image::detect_alpha() const { bool detected=false; switch(format) { - case FORMAT_INTENSITY: { - for(int i=0;i>1);i++) { @@ -1484,25 +1417,17 @@ Image::AlphaMode Image::detect_alpha() const { } } break; - case FORMAT_RGBA: { + case FORMAT_RGBA8: { for(int i=0;i<(len>>2);i++) { DETECT_ALPHA(data_ptr[(i<<2)+3]) } - } break; - case FORMAT_INDEXED: { - - return ALPHA_NONE; - } break; - case FORMAT_INDEXED_ALPHA: { - - return ALPHA_BLEND; - } break; - case FORMAT_PVRTC2_ALPHA: - case FORMAT_PVRTC4_ALPHA: - case FORMAT_BC2: - case FORMAT_BC3: { + } break; + case FORMAT_PVRTC2A: + case FORMAT_PVRTC4A: + case FORMAT_DXT3: + case FORMAT_DXT5: { detected=true; } break; default: {} @@ -1528,7 +1453,7 @@ Error Image::save_png(const String& p_path) { return ERR_UNAVAILABLE; return save_png_func(p_path, *this); -}; +} bool Image::operator==(const Image& p_image) const { @@ -1541,84 +1466,7 @@ bool Image::operator==(const Image& p_image) const { } -int Image::get_format_pixel_size(Format p_format) { - switch(p_format) { - case FORMAT_GRAYSCALE: { - - return 1; - } break; - case FORMAT_INTENSITY: { - - return 1; - } break; - case FORMAT_GRAYSCALE_ALPHA: { - - return 2; - } break; - case FORMAT_RGB: { - - return 3; - } break; - case FORMAT_RGBA: { - - return 4; - } break; - case FORMAT_INDEXED: { - - return 1; - } break; - case FORMAT_INDEXED_ALPHA: { - - return 1; - } break; - case FORMAT_BC1: - case FORMAT_BC2: - case FORMAT_BC3: - case FORMAT_BC4: - case FORMAT_BC5: { - - return 1; - } break; - case FORMAT_PVRTC2: - case FORMAT_PVRTC2_ALPHA: { - - return 1; - } break; - case FORMAT_PVRTC4: - case FORMAT_PVRTC4_ALPHA: { - - return 1; - } break; - case FORMAT_ATC: - case FORMAT_ATC_ALPHA_EXPLICIT: - case FORMAT_ATC_ALPHA_INTERPOLATED: { - - return 1; - } break; - case FORMAT_ETC: { - - return 1; - } break; - case FORMAT_YUV_422: { - return 2; - }; - case FORMAT_YUV_444: { - return 3; - } break; - case FORMAT_CUSTOM: { - - ERR_EXPLAIN("pixel size requested for custom image format, and it's unknown obviously"); - ERR_FAIL_V(1); - } break; - default:{ - ERR_EXPLAIN("Cannot obtain pixel size from this format"); - ERR_FAIL_V(1); - - } - } - return 0; -} int Image::get_image_data_size(int p_width, int p_height, Format p_format,int p_mipmaps) { @@ -1635,105 +1483,12 @@ int Image::get_image_required_mipmaps(int p_width, int p_height, Format p_format } -void Image::_get_format_min_data_size(Format p_format,int &r_w, int &r_h) { - - - switch(p_format) { - case FORMAT_BC1: - case FORMAT_BC2: - case FORMAT_BC3: - case FORMAT_BC4: - case FORMAT_BC5: { - r_w=4; - r_h=4; - } break; - case FORMAT_PVRTC2: - case FORMAT_PVRTC2_ALPHA: { - - r_w=16; - r_h=8; - } break; - case FORMAT_PVRTC4_ALPHA: - case FORMAT_PVRTC4: { - - r_w=8; - r_h=8; - } break; - case FORMAT_ATC: - case FORMAT_ATC_ALPHA_EXPLICIT: - case FORMAT_ATC_ALPHA_INTERPOLATED: { - - r_w=8; - r_h=8; - - } break; - - case FORMAT_ETC: { - - r_w=4; - r_h=4; - } break; - default: { - r_w=1; - r_h=1; - } break; - } - -} - - -int Image::get_format_pixel_rshift(Format p_format) { - - if (p_format==FORMAT_BC1 || p_format==FORMAT_BC4 || p_format==FORMAT_ATC || p_format==FORMAT_PVRTC4 || p_format==FORMAT_PVRTC4_ALPHA || p_format==FORMAT_ETC) - return 1; - else if (p_format==FORMAT_PVRTC2 || p_format==FORMAT_PVRTC2_ALPHA) - return 2; - else - return 0; -} - -int Image::get_format_pallete_size(Format p_format) { - - switch(p_format) { - case FORMAT_GRAYSCALE: { - - return 0; - } break; - case FORMAT_INTENSITY: { - - return 0; - } break; - case FORMAT_GRAYSCALE_ALPHA: { - - return 0; - } break; - case FORMAT_RGB: { - - return 0; - } break; - case FORMAT_RGBA: { - - return 0; - } break; - case FORMAT_INDEXED: { - - return 3*256; - } break; - case FORMAT_INDEXED_ALPHA: { - - return 4*256; - } break; - default:{} - } - return 0; -} Error Image::_decompress_bc() { - print_line("decompressing bc"); int wd=width,ht=height; if (wd%4!=0) { @@ -1745,7 +1500,7 @@ Error Image::_decompress_bc() { int mm; - int size = _get_dst_image_size(wd,ht,FORMAT_RGBA,mm,mipmaps); + int size = _get_dst_image_size(wd,ht,FORMAT_RGBA8,mm); DVector newdata; newdata.resize(size); @@ -1762,7 +1517,7 @@ Error Image::_decompress_bc() { switch(format) { - case FORMAT_BC1: { + case FORMAT_DXT1: { int len = (wd*ht)/16; uint8_t* dst=&w[wofs]; @@ -1788,8 +1543,8 @@ Error Image::_decompress_bc() { col_b|=src[2]; uint8_t table[4][4]={ - { (col_a>>11)<<3, ((col_a>>5)&0x3f)<<2, ((col_a)&0x1f)<<3, 255 }, - { (col_b>>11)<<3, ((col_b>>5)&0x3f)<<2, ((col_b)&0x1f)<<3, 255 }, + { uint8_t((col_a>>11)<<3), uint8_t(((col_a>>5)&0x3f)<<2),uint8_t(((col_a)&0x1f)<<3), 255 }, + { uint8_t((col_b>>11)<<3), uint8_t(((col_b>>5)&0x3f)<<2),uint8_t(((col_b)&0x1f)<<3), 255 }, {0,0,0,255}, {0,0,0,255} }; @@ -1841,7 +1596,7 @@ Error Image::_decompress_bc() { ht/=2; } break; - case FORMAT_BC2: { + case FORMAT_DXT3: { int len = (wd*ht)/16; uint8_t* dst=&w[wofs]; @@ -1885,8 +1640,9 @@ Error Image::_decompress_bc() { col_b|=src[8+2]; uint8_t table[4][4]={ - { (col_a>>11)<<3, ((col_a>>5)&0x3f)<<2, ((col_a)&0x1f)<<3, 255 }, - { (col_b>>11)<<3, ((col_b>>5)&0x3f)<<2, ((col_b)&0x1f)<<3, 255 }, + { uint8_t((col_a>>11)<<3), uint8_t(((col_a>>5)&0x3f)<<2),uint8_t(((col_a)&0x1f)<<3), 255 }, + { uint8_t((col_b>>11)<<3), uint8_t(((col_b>>5)&0x3f)<<2),uint8_t(((col_b)&0x1f)<<3), 255 }, + {0,0,0,255}, {0,0,0,255} }; @@ -1933,7 +1689,7 @@ Error Image::_decompress_bc() { ht/=2; } break; - case FORMAT_BC3: { + case FORMAT_DXT5: { int len = (wd*ht)/16; uint8_t* dst=&w[wofs]; @@ -2001,9 +1757,10 @@ Error Image::_decompress_bc() { col_b<<=8; col_b|=src[8+2]; - uint8_t table[4][4]={ - { (col_a>>11)<<3, ((col_a>>5)&0x3f)<<2, ((col_a)&0x1f)<<3, 255 }, - { (col_b>>11)<<3, ((col_b>>5)&0x3f)<<2, ((col_b)&0x1f)<<3, 255 }, + uint8_t table[4][4]={ + { uint8_t((col_a>>11)<<3), uint8_t(((col_a>>5)&0x3f)<<2),uint8_t(((col_a)&0x1f)<<3), 255 }, + { uint8_t((col_b>>11)<<3), uint8_t(((col_b>>5)&0x3f)<<2),uint8_t(((col_b)&0x1f)<<3), 255 }, + {0,0,0,255}, {0,0,0,255} }; @@ -2061,18 +1818,19 @@ Error Image::_decompress_bc() { r=DVector::Read(); data=newdata; - format=FORMAT_RGBA; + format=FORMAT_RGBA8; if (wd!=width || ht!=height) { - //todo, crop - width=wd; - height=ht; + + SWAP(width,wd); + SWAP(height,ht); + crop(wd,ht); } return OK; } bool Image::is_compressed() const { - return format>=FORMAT_BC1; + return format>=FORMAT_RGB565; } @@ -2085,12 +1843,14 @@ Image Image::decompressed() const { Error Image::decompress() { - if (format>=FORMAT_BC1 && format<=FORMAT_BC5 ) + if (format>=FORMAT_DXT1 && format<=FORMAT_ATI2 ) _decompress_bc();//_image_decompress_bc(this); - else if (format>=FORMAT_PVRTC2 && format<=FORMAT_PVRTC4_ALPHA && _image_decompress_pvrtc) + else if (format>=FORMAT_PVRTC2 && format<=FORMAT_PVRTC4A&& _image_decompress_pvrtc) _image_decompress_pvrtc(this); else if (format==FORMAT_ETC && _image_decompress_etc) _image_decompress_etc(this); + else if (format>=FORMAT_ETC2_R11 && format<=FORMAT_ETC2_RGB8A1 && _image_decompress_etc) + _image_decompress_etc2(this); else return ERR_UNAVAILABLE; return OK; @@ -2101,7 +1861,12 @@ Error Image::compress(CompressMode p_mode) { switch(p_mode) { - case COMPRESS_BC: { + case COMPRESS_16BIT: { + + //ERR_FAIL_COND_V(!_image_compress_bc_func, ERR_UNAVAILABLE); + //_image_compress_bc_func(this); + } break; + case COMPRESS_S3TC: { ERR_FAIL_COND_V(!_image_compress_bc_func, ERR_UNAVAILABLE); _image_compress_bc_func(this); @@ -2118,6 +1883,11 @@ Error Image::compress(CompressMode p_mode) { } break; case COMPRESS_ETC: { + ERR_FAIL_COND_V(!_image_compress_etc_func, ERR_UNAVAILABLE); + _image_compress_etc_func(this); + } break; + case COMPRESS_ETC2: { + ERR_FAIL_COND_V(!_image_compress_etc_func, ERR_UNAVAILABLE); _image_compress_etc_func(this); } break; @@ -2133,14 +1903,14 @@ Image Image::compressed(int p_mode) { ret.compress((Image::CompressMode)p_mode); return ret; -}; +} Image::Image(const char **p_xpm) { width=0; height=0; - mipmaps=0; - format=FORMAT_GRAYSCALE; + mipmaps=false; + format=FORMAT_L8; create(p_xpm); } @@ -2150,37 +1920,28 @@ Image::Image(int p_width, int p_height,bool p_use_mipmaps, Format p_format) { width=0; height=0; - mipmaps=0; - format=FORMAT_GRAYSCALE; + mipmaps=p_use_mipmaps; + format=FORMAT_L8; create(p_width,p_height,p_use_mipmaps,p_format); } -Image::Image(int p_width, int p_height, int p_mipmaps, Format p_format, const DVector& p_data) { +Image::Image(int p_width, int p_height, bool p_mipmaps, Format p_format, const DVector& p_data) { width=0; height=0; - mipmaps=0; - format=FORMAT_GRAYSCALE; + mipmaps=p_mipmaps; + format=FORMAT_L8; create(p_width,p_height,p_mipmaps,p_format,p_data); } -Image Image::brushed(const Image& p_src, const Image& p_brush, const Point2& p_dest) const { - - Image img = *this; - img.brush_transfer(p_src,p_brush,p_dest); - return img; -} - Rect2 Image::get_used_rect() const { - if (format==FORMAT_GRAYSCALE || - format==FORMAT_RGB || - format==FORMAT_INDEXED || format>FORMAT_INDEXED_ALPHA) + if (format!=FORMAT_LA8 && format!=FORMAT_RGBA8) return Rect2(Point2(),Size2(width,height)); int len = data.size(); @@ -2188,16 +1949,18 @@ Rect2 Image::get_used_rect() const { if (len==0) return Rect2(); - int data_size = len; + //int data_size = len; DVector::Read r = data.read(); const unsigned char *rptr=r.ptr(); + int ps = format==FORMAT_LA8?2:4; int minx=0xFFFFFF,miny=0xFFFFFFF; int maxx=-1,maxy=-1; - for(int i=0;i2; + + bool opaque = rptr[(j*width+i)*ps+(ps-1)]>2; if (!opaque) continue; if (i>maxx) @@ -2225,101 +1988,47 @@ Image Image::get_rect(const Rect2& p_area) const { img.blit_rect(*this, p_area, Point2(0, 0)); return img; -}; - -void Image::brush_transfer(const Image& p_src, const Image& p_brush, const Point2& p_dest) { - - - ERR_FAIL_COND( width != p_src.width || height !=p_src.height); - - int dst_data_size = data.size(); - DVector::Write wp = data.write(); - unsigned char *dst_data_ptr=wp.ptr(); - - - int src_data_size = p_src.data.size(); - DVector::Read rp = p_src.data.read(); - const unsigned char *src_data_ptr=rp.ptr(); - - int brush_data_size = p_brush.data.size(); - DVector::Read bp = p_brush.data.read(); - const unsigned char *src_brush_ptr=bp.ptr(); - - int bw = p_brush.get_width(); - int bh = p_brush.get_height(); - int dx=p_dest.x; - int dy=p_dest.y; - - for(int i=dy;i= height) - continue; - for(int j=dx;j=width) - continue; - - BColor src = p_src._get_pixel(j,i,src_data_ptr,src_data_size); - BColor dst = _get_pixel(j,i,dst_data_ptr,dst_data_size); - BColor brush = p_brush._get_pixel(j-dx,i-dy,src_brush_ptr,brush_data_size); - uint32_t mult = brush.r; - dst.r = dst.r + (((int32_t(src.r)-int32_t(dst.r))*mult)>>8); - dst.g = dst.g + (((int32_t(src.g)-int32_t(dst.g))*mult)>>8); - dst.b = dst.b + (((int32_t(src.b)-int32_t(dst.b))*mult)>>8); - dst.a = dst.a + (((int32_t(src.a)-int32_t(dst.a))*mult)>>8); - _put_pixel(j,i,dst,dst_data_ptr); - } - } } - void Image::blit_rect(const Image& p_src, const Rect2& p_src_rect,const Point2& p_dest) { int dsize=data.size(); int srcdsize=p_src.data.size(); ERR_FAIL_COND( dsize==0 ); ERR_FAIL_COND( srcdsize==0 ); + ERR_FAIL_COND( format!=p_src.format ); + Rect2i local_src_rect = Rect2i(0,0,width,height).clip( Rect2i(p_dest+p_src_rect.pos,p_src_rect.size) ); - Rect2 rrect = Rect2(0,0,p_src.width,p_src.height).clip(p_src_rect); + if (local_src_rect.size.x<=0 || local_src_rect.size.y<=0) + return; + Rect2i src_rect( p_src_rect.pos + ( local_src_rect.pos - p_dest), local_src_rect.size ); DVector::Write wp = data.write(); - unsigned char *dst_data_ptr=wp.ptr(); + uint8_t *dst_data_ptr=wp.ptr(); DVector::Read rp = p_src.data.read(); - const unsigned char *src_data_ptr=rp.ptr(); + const uint8_t *src_data_ptr=rp.ptr(); - if ((format==FORMAT_INDEXED || format == FORMAT_INDEXED_ALPHA) && (p_src.format==FORMAT_INDEXED || p_src.format == FORMAT_INDEXED_ALPHA)) { + int pixel_size=get_format_pixel_size(format); - Point2i desti(p_dest.x, p_dest.y); - Point2i srci(rrect.pos.x, rrect.pos.y); + for(int i=0;i= height) - continue; - for(int j=0;j=width) - continue; + int src_x = src_rect.pos.x+j; + int src_y = src_rect.pos.y+i; - dst_data_ptr[width * (desti.y + i) + desti.x + j] = src_data_ptr[p_src.width * (srci.y+i) + srci.x+j]; - } - } + int dst_x = local_src_rect.pos.x+j; + int dst_y = local_src_rect.pos.y+i; - } else { + const uint8_t *src = &src_data_ptr[ (src_y*p_src.width+src_x)*pixel_size]; + uint8_t *dst = &dst_data_ptr[ (dst_y*width+dst_x)*pixel_size]; - for(int i=0;i= height) - continue; - for(int j=0;j=width) - continue; - - _put_pixel(p_dest.x+j,p_dest.y+i,p_src._get_pixel(rrect.pos.x+j,rrect.pos.y+i,src_data_ptr,srcdsize),dst_data_ptr); + for(int k=0;k (*Image::lossy_packer)(const Image& ,float )=NULL; Image (*Image::lossy_unpacker)(const DVector& )=NULL; @@ -2352,7 +2063,7 @@ void Image::set_compress_bc_func(void (*p_compress_func)(Image *)) { void Image::normalmap_to_xy() { - convert(Image::FORMAT_RGBA); + convert(Image::FORMAT_RGBA8); { int len = data.size()/4; @@ -2367,7 +2078,7 @@ void Image::normalmap_to_xy() { } } - convert(Image::FORMAT_GRAYSCALE_ALPHA); + convert(Image::FORMAT_LA8); } void Image::srgb_to_linear() { @@ -2378,9 +2089,9 @@ void Image::srgb_to_linear() { static const uint8_t srgb2lin[256]={0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 14, 14, 15, 15, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 22, 22, 23, 23, 24, 24, 25, 26, 26, 27, 27, 28, 29, 29, 30, 31, 31, 32, 33, 33, 34, 35, 36, 36, 37, 38, 38, 39, 40, 41, 42, 42, 43, 44, 45, 46, 47, 47, 48, 49, 50, 51, 52, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 87, 88, 89, 90, 92, 93, 94, 95, 97, 98, 99, 101, 102, 103, 105, 106, 107, 109, 110, 112, 113, 114, 116, 117, 119, 120, 122, 123, 125, 126, 128, 129, 131, 132, 134, 135, 137, 139, 140, 142, 144, 145, 147, 148, 150, 152, 153, 155, 157, 159, 160, 162, 164, 166, 167, 169, 171, 173, 175, 176, 178, 180, 182, 184, 186, 188, 190, 192, 193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 218, 220, 222, 224, 226, 228, 230, 232, 235, 237, 239, 241, 243, 245, 248, 250, 252}; - ERR_FAIL_COND( format!=FORMAT_RGB && format!=FORMAT_RGBA ); + ERR_FAIL_COND( format!=FORMAT_RGB8 && format!=FORMAT_RGBA8 ); - if (format==FORMAT_RGBA) { + if (format==FORMAT_RGBA8) { int len = data.size()/4; DVector::Write wp = data.write(); @@ -2393,7 +2104,7 @@ void Image::srgb_to_linear() { data_ptr[(i<<2)+2]=srgb2lin[ data_ptr[(i<<2)+2] ]; } - } else if (format==FORMAT_RGB) { + } else if (format==FORMAT_RGB8) { int len = data.size()/3; DVector::Write wp = data.write(); @@ -2414,7 +2125,7 @@ void Image::premultiply_alpha() { if (data.size()==0) return; - if (format!=FORMAT_RGBA) + if (format!=FORMAT_RGBA8) return; //not needed DVector::Write wp = data.write(); @@ -2424,11 +2135,11 @@ void Image::premultiply_alpha() { for(int i=0;i>8; - bc.g=(int(bc.g)*int(bc.a))>>8; - bc.b=(int(bc.b)*int(bc.a))>>8; - _put_pixel(j,i,bc,data_ptr); + uint8_t *ptr = &data_ptr[(i*width+j)*4]; + + ptr[0]=(uint16_t(ptr[0])*uint16_t(ptr[3]))>>8; + ptr[1]=(uint16_t(ptr[1])*uint16_t(ptr[3]))>>8; + ptr[2]=(uint16_t(ptr[2])*uint16_t(ptr[3]))>>8; } } } @@ -2438,12 +2149,12 @@ void Image::fix_alpha_edges() { if (data.size()==0) return; - if (format!=FORMAT_RGBA) + if (format!=FORMAT_RGBA8) return; //not needed DVector dcopy = data; DVector::Read rp = data.read(); - const uint8_t *rptr=rp.ptr(); + const uint8_t *srcptr=rp.ptr(); DVector::Write wp = data.write(); unsigned char *data_ptr=wp.ptr(); @@ -2455,13 +2166,16 @@ void Image::fix_alpha_edges() { for(int i=0;i=alpha_treshold) + const uint8_t *rptr = &srcptr[(i*width+j)*4]; + uint8_t *wptr = &data_ptr[(i*width+j)*4]; + + if (rptr[3]>=alpha_treshold) continue; int closest_dist=max_dist; - BColor closest_color; - closest_color.a=bc.a; + uint8_t closest_color[3]; + + int from_x = MAX(0,j-max_radius); int to_x = MIN(width-1,j+max_radius); int from_y = MAX(0,i-max_radius); @@ -2476,22 +2190,25 @@ void Image::fix_alpha_edges() { if (dist>=closest_dist) continue; - const uint8_t * rp = &rptr[(k*width+l)<<2]; + const uint8_t * rp = &srcptr[(k*width+l)<<2]; if (rp[3]& p_buffer); private: - //internal byte based color - struct BColor { - union { - uint8_t col[4]; - struct { - uint8_t r,g,b,a; - }; - }; - - bool operator==(const BColor& p_color) const { for(int i=0;i<4;i++) {if (col[i]!=p_color.col[i]) return false; } return true; } - _FORCE_INLINE_ uint8_t gray() const { return (uint16_t(col[0])+uint16_t(col[1])+uint16_t(col[2]))/3; } - _FORCE_INLINE_ BColor() {} - BColor(uint8_t p_r,uint8_t p_g,uint8_t p_b,uint8_t p_a=255) { col[0]=p_r; col[1]=p_g; col[2]=p_b; col[3]=p_a; } - }; - - //median cut classes - - struct BColorPos { - - uint32_t index; - BColor color; - struct SortR { - - bool operator()(const BColorPos& ca, const BColorPos& cb) const { return ca.color.r < cb.color.r; } - }; - - struct SortG { - - bool operator()(const BColorPos& ca, const BColorPos& cb) const { return ca.color.g < cb.color.g; } - }; - - struct SortB { - - bool operator()(const BColorPos& ca, const BColorPos& cb) const { return ca.color.b < cb.color.b; } - }; - - struct SortA { - - bool operator()(const BColorPos& ca, const BColorPos& cb) const { return ca.color.a < cb.color.a; } - }; - }; - - struct SPTree { - - bool leaf; - uint8_t split_plane; - uint8_t split_value; - union { - int left; - int color; - }; - int right; - SPTree() { leaf=true; left=-1; right=-1;} - }; - - struct MCBlock { - - BColorPos min_color,max_color; - BColorPos *colors; - int sp_idx; - int color_count; - int get_longest_axis_index() const; - int get_longest_axis_length() const; - bool operator<(const MCBlock& p_block) const; - void shrink(); - MCBlock(); - MCBlock(BColorPos *p_colors,int p_color_count); - }; - Format format; DVector data; - int width,height,mipmaps; + int width,height; + bool mipmaps; - - - _FORCE_INLINE_ BColor _get_pixel(int p_x,int p_y,const unsigned char *p_data,int p_data_size) const; - _FORCE_INLINE_ BColor _get_pixelw(int p_x,int p_y,int p_width,const unsigned char *p_data,int p_data_size) const; - _FORCE_INLINE_ void _put_pixelw(int p_x,int p_y, int p_width, const BColor& p_color, unsigned char *p_data); - _FORCE_INLINE_ void _put_pixel(int p_x,int p_y, const BColor& p_color, unsigned char *p_data); _FORCE_INLINE_ void _get_mipmap_offset_and_size(int p_mipmap,int &r_offset, int &r_width, int &r_height) const; //get where the mipmap begins in data - _FORCE_INLINE_ static void _get_format_min_data_size(Format p_format,int &r_w, int &r_h); static int _get_dst_image_size(int p_width, int p_height, Format p_format,int &r_mipmaps,int p_mipmaps=-1); bool _can_modify(Format p_format) const; + _FORCE_INLINE_ void _put_pixelb(int p_x,int p_y, uint32_t p_pixelsize,uint8_t *p_dst,const uint8_t *p_src); + _FORCE_INLINE_ void _get_pixelb(int p_x,int p_y, uint32_t p_pixelsize,const uint8_t *p_src,uint8_t *p_dst); public: @@ -207,20 +149,11 @@ public: int get_width() const; ///< Get image width int get_height() const; ///< Get image height - int get_mipmaps() const; + bool has_mipmaps() const; + int get_mipmap_count() const; /** - * Get a pixel from the image. for grayscale or indexed formats, use Color::gray to obtain the actual - * value. - */ - Color get_pixel(int p_x,int p_y,int p_mipmap=0) const; - /** - * Set a pixel into the image. for grayscale or indexed formats, a suitable Color constructor. - */ - void put_pixel(int p_x,int p_y, const Color& p_color,int p_mipmap=0); /* alpha and index are averaged */ - - /** - * Convert the image to another format, as close as it can be done. + * Convert the image to another format, conversion only to raw byte format */ void convert( Format p_new_format ); @@ -259,25 +192,21 @@ public: void flip_x(); void flip_y(); + /** * Generate a mipmap to an image (creates an image 1/4 the size, with averaging of 4->1) */ - Error generate_mipmaps(int p_amount=-1,bool p_keep_existing=false); + Error generate_mipmaps(bool p_keep_existing=false); void clear_mipmaps(); - /** - * Generate a normal map from a grayscale image - */ - - void make_normalmap(float p_height_scale=1.0); /** * Create a new image of a given size and format. Current image will be lost */ void create(int p_width, int p_height, bool p_use_mipmaps, Format p_format); - void create(int p_width, int p_height, int p_mipmaps, Format p_format, const DVector& p_data); + void create(int p_width, int p_height, bool p_use_mipmaps, Format p_format, const DVector& p_data); void create( const char ** p_xpm ); /** @@ -301,7 +230,7 @@ public: /** * import an image of a specific size and format from a pointer */ - Image(int p_width, int p_height, int p_mipmaps, Format p_format, const DVector& p_data); + Image(int p_width, int p_height, bool p_mipmaps, Format p_format, const DVector& p_data); enum AlphaMode { ALPHA_NONE, @@ -312,32 +241,27 @@ public: AlphaMode detect_alpha() const; bool is_invisible() const; - void put_indexed_pixel(int p_x, int p_y, uint8_t p_idx,int p_mipmap=0); - uint8_t get_indexed_pixel(int p_x, int p_y,int p_mipmap=0) const; - void set_pallete(const DVector& p_data); - static int get_format_pixel_size(Format p_format); static int get_format_pixel_rshift(Format p_format); - static int get_format_pallete_size(Format p_format); + static void get_format_min_pixel_size(Format p_format,int &r_w, int &r_h); + static int get_image_data_size(int p_width, int p_height, Format p_format,int p_mipmaps=0); static int get_image_required_mipmaps(int p_width, int p_height, Format p_format); - - bool operator==(const Image& p_image) const; - void quantize(); - enum CompressMode { - COMPRESS_BC, + COMPRESS_16BIT, + COMPRESS_S3TC, COMPRESS_PVRTC2, COMPRESS_PVRTC4, - COMPRESS_ETC + COMPRESS_ETC, + COMPRESS_ETC2 }; - Error compress(CompressMode p_mode=COMPRESS_BC); + Error compress(CompressMode p_mode=COMPRESS_S3TC); Image compressed(int p_mode); /* from the Image::CompressMode enum */ Error decompress(); Image decompressed() const; @@ -349,8 +273,6 @@ public: void normalmap_to_xy(); void blit_rect(const Image& p_src, const Rect2& p_src_rect,const Point2& p_dest); - void brush_transfer(const Image& p_src, const Image& p_brush, const Point2& p_dest); - Image brushed(const Image& p_src, const Image& p_brush, const Point2& p_dest) const; Rect2 get_used_rect() const; Image get_rect(const Rect2& p_area) const; diff --git a/core/image_quantize.cpp b/core/image_quantize.cpp index f6fe7a88a03..e69de29bb2d 100644 --- a/core/image_quantize.cpp +++ b/core/image_quantize.cpp @@ -1,365 +0,0 @@ -/*************************************************************************/ -/* image_quantize.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#include "image.h" -#include -#include "print_string.h" -#ifdef TOOLS_ENABLED -#include "set.h" -#include "sort.h" -#include "os/os.h" - -//#define QUANTIZE_SPEED_OVER_QUALITY - - -Image::MCBlock::MCBlock() { - - -} - -Image::MCBlock::MCBlock(BColorPos *p_colors,int p_color_count) { - - colors=p_colors; - color_count=p_color_count; - min_color.color=BColor(255,255,255,255); - max_color.color=BColor(0,0,0,0); - shrink(); -} - -int Image::MCBlock::get_longest_axis_index() const { - - int max_dist=-1; - int max_index=0; - - for(int i=0;i<4;i++) { - - int d = max_color.color.col[i]-min_color.color.col[i]; - if (d>max_dist) { - max_index=i; - max_dist=d; - } - } - - return max_index; -} -int Image::MCBlock::get_longest_axis_length() const { - - int max_dist=-1; - - for(int i=0;i<4;i++) { - - int d = max_color.color.col[i]-min_color.color.col[i]; - if (d>max_dist) { - max_dist=d; - } - } - - return max_dist; -} - -bool Image::MCBlock::operator<(const MCBlock& p_block) const { - - int alen = get_longest_axis_length(); - int blen = p_block.get_longest_axis_length(); - if (alen==blen) { - - return colors < p_block.colors; - } else - return alen < blen; - -} - -void Image::MCBlock::shrink() { - - min_color=colors[0]; - max_color=colors[0]; - - for(int i=1;ihas_environment("QUANTIZE_FAST"); - - convert(FORMAT_RGBA); - - ERR_FAIL_COND( format!=FORMAT_RGBA ); - - DVector indexed_data; - - - { - int color_count = data.size()/4; - - ERR_FAIL_COND(color_count==0); - - Set block_queue; - - DVector data_colors; - data_colors.resize(color_count); - - DVector::Write dcw=data_colors.write(); - - DVector::Read dr = data.read(); - const BColor * drptr=(const BColor*)&dr[0]; - BColorPos *bcptr=&dcw[0]; - - - - { - for(int i=0;iget().color_count > 1 ) { - - MCBlock longest = block_queue.back()->get(); - //printf("longest: %i (%i)\n",longest.get_longest_axis_index(),longest.get_longest_axis_length()); - - block_queue.erase(block_queue.back()); - - BColorPos *first = longest.colors; - BColorPos *median = longest.colors + (longest.color_count+1)/2; - BColorPos *end = longest.colors + longest.color_count; - -#if 0 - int lai =longest.get_longest_axis_index(); - switch(lai) { -#if 0 - case 0: { SortArray sort; sort.sort(first,end-first); } break; - case 1: { SortArray sort; sort.sort(first,end-first); } break; - case 2: { SortArray sort; sort.sort(first,end-first); } break; - case 3: { SortArray sort; sort.sort(first,end-first); } break; -#else - case 0: { SortArray sort; sort.nth_element(0,end-first,median-first,first); } break; - case 1: { SortArray sort; sort.nth_element(0,end-first,median-first,first); } break; - case 2: { SortArray sort; sort.nth_element(0,end-first,median-first,first); } break; - case 3: { SortArray sort; sort.nth_element(0,end-first,median-first,first); } break; -#endif - - } - - //avoid same color from being split in 2 - //search forward and flip - BColorPos *median_end=median; - BColorPos *p=median_end+1; - - while(p!=end) { - if (median_end->color==p->color) { - SWAP(*(median_end+1),*p); - median_end++; - } - p++; - } - - //search backward and flip - BColorPos *median_begin=median; - p=median_begin-1; - - while(p!=(first-1)) { - if (median_begin->color==p->color) { - SWAP(*(median_begin-1),*p); - median_begin--; - } - p--; - } - - - if (first < median_begin) { - median=median_begin; - } else if (median_end < end-1) { - median=median_end+1; - } else { - break; //shouldn't have arrived here, since it means all pixels are equal, but wathever - } - - MCBlock left(first,median-first); - MCBlock right(median,end-median); - - block_queue.insert(left); - block_queue.insert(right); - -#else - switch(longest.get_longest_axis_index()) { - case 0: { SortArray sort; sort.nth_element(0,end-first,median-first,first); } break; - case 1: { SortArray sort; sort.nth_element(0,end-first,median-first,first); } break; - case 2: { SortArray sort; sort.nth_element(0,end-first,median-first,first); } break; - case 3: { SortArray sort; sort.nth_element(0,end-first,median-first,first); } break; - - } - - MCBlock left(first,median-first); - MCBlock right(median,end-median); - - block_queue.insert(left); - block_queue.insert(right); - - -#endif - - - } - - while(block_queue.size() > 256) { - - block_queue.erase(block_queue.front());// erase least significant - } - - int res_colors=0; - - int comp_size = (has_alpha?4:3); - indexed_data.resize(color_count + 256*comp_size); - - DVector::Write iw = indexed_data.write(); - uint8_t *iwptr=&iw[0]; - BColor pallete[256]; - - // print_line("applying quantization - res colors "+itos(block_queue.size())); - - while(block_queue.size()) { - - const MCBlock &b = block_queue.back()->get(); - - uint64_t sum[4]={0,0,0,0}; - - for(int i=0;i::Write(); - //dr = DVector::Read(); - //wb = DVector::Write(); - } - - print_line(itos(indexed_data.size())); - data=indexed_data; - format=has_alpha?FORMAT_INDEXED_ALPHA:FORMAT_INDEXED; - - -} //do none - - - -#else - - -void Image::quantize() {} //do none - - -#endif diff --git a/core/io/marshalls.cpp b/core/io/marshalls.cpp index c9bd38c6542..13817b762de 100644 --- a/core/io/marshalls.cpp +++ b/core/io/marshalls.cpp @@ -1060,7 +1060,7 @@ Error encode_variant(const Variant& p_variant, uint8_t *r_buffer, int &r_len) { if (buf) { encode_uint32(image.get_format(),&buf[0]); - encode_uint32(image.get_mipmaps(),&buf[4]); + encode_uint32(image.has_mipmaps(),&buf[4]); encode_uint32(image.get_width(),&buf[8]); encode_uint32(image.get_height(),&buf[12]); int ds=data.size(); diff --git a/core/io/resource_format_binary.cpp b/core/io/resource_format_binary.cpp index 0544fd6ba8b..8db06bf3d9c 100644 --- a/core/io/resource_format_binary.cpp +++ b/core/io/resource_format_binary.cpp @@ -74,29 +74,6 @@ enum { IMAGE_ENCODING_LOSSLESS=2, IMAGE_ENCODING_LOSSY=3, - IMAGE_FORMAT_GRAYSCALE=0, - IMAGE_FORMAT_INTENSITY=1, - IMAGE_FORMAT_GRAYSCALE_ALPHA=2, - IMAGE_FORMAT_RGB=3, - IMAGE_FORMAT_RGBA=4, - IMAGE_FORMAT_INDEXED=5, - IMAGE_FORMAT_INDEXED_ALPHA=6, - IMAGE_FORMAT_BC1=7, - IMAGE_FORMAT_BC2=8, - IMAGE_FORMAT_BC3=9, - IMAGE_FORMAT_BC4=10, - IMAGE_FORMAT_BC5=11, - IMAGE_FORMAT_PVRTC2=12, - IMAGE_FORMAT_PVRTC2_ALPHA=13, - IMAGE_FORMAT_PVRTC4=14, - IMAGE_FORMAT_PVRTC4_ALPHA=15, - IMAGE_FORMAT_ETC=16, - IMAGE_FORMAT_ATC=17, - IMAGE_FORMAT_ATC_ALPHA_EXPLICIT=18, - IMAGE_FORMAT_ATC_ALPHA_INTERPOLATED=19, - IMAGE_FORMAT_CUSTOM=30, - - OBJECT_EMPTY=0, OBJECT_EXTERNAL_RESOURCE=1, OBJECT_INTERNAL_RESOURCE=2, @@ -269,38 +246,22 @@ Error ResourceInteractiveLoaderBinary::parse_variant(Variant& r_v) { uint32_t height = f->get_32(); uint32_t mipmaps = f->get_32(); uint32_t format = f->get_32(); - Image::Format fmt; - switch(format) { + const uint32_t format_version_shift=24; + const uint32_t format_version_mask=format_version_shift-1; - case IMAGE_FORMAT_GRAYSCALE: { fmt=Image::FORMAT_GRAYSCALE; } break; - case IMAGE_FORMAT_INTENSITY: { fmt=Image::FORMAT_INTENSITY; } break; - case IMAGE_FORMAT_GRAYSCALE_ALPHA: { fmt=Image::FORMAT_GRAYSCALE_ALPHA; } break; - case IMAGE_FORMAT_RGB: { fmt=Image::FORMAT_RGB; } break; - case IMAGE_FORMAT_RGBA: { fmt=Image::FORMAT_RGBA; } break; - case IMAGE_FORMAT_INDEXED: { fmt=Image::FORMAT_INDEXED; } break; - case IMAGE_FORMAT_INDEXED_ALPHA: { fmt=Image::FORMAT_INDEXED_ALPHA; } break; - case IMAGE_FORMAT_BC1: { fmt=Image::FORMAT_BC1; } break; - case IMAGE_FORMAT_BC2: { fmt=Image::FORMAT_BC2; } break; - case IMAGE_FORMAT_BC3: { fmt=Image::FORMAT_BC3; } break; - case IMAGE_FORMAT_BC4: { fmt=Image::FORMAT_BC4; } break; - case IMAGE_FORMAT_BC5: { fmt=Image::FORMAT_BC5; } break; - case IMAGE_FORMAT_PVRTC2: { fmt=Image::FORMAT_PVRTC2; } break; - case IMAGE_FORMAT_PVRTC2_ALPHA: { fmt=Image::FORMAT_PVRTC2_ALPHA; } break; - case IMAGE_FORMAT_PVRTC4: { fmt=Image::FORMAT_PVRTC4; } break; - case IMAGE_FORMAT_PVRTC4_ALPHA: { fmt=Image::FORMAT_PVRTC4_ALPHA; } break; - case IMAGE_FORMAT_ETC: { fmt=Image::FORMAT_ETC; } break; - case IMAGE_FORMAT_ATC: { fmt=Image::FORMAT_ATC; } break; - case IMAGE_FORMAT_ATC_ALPHA_EXPLICIT: { fmt=Image::FORMAT_ATC_ALPHA_EXPLICIT; } break; - case IMAGE_FORMAT_ATC_ALPHA_INTERPOLATED: { fmt=Image::FORMAT_ATC_ALPHA_INTERPOLATED; } break; - case IMAGE_FORMAT_CUSTOM: { fmt=Image::FORMAT_CUSTOM; } break; - default: { + uint32_t format_version = format>>format_version_shift; - ERR_FAIL_V(ERR_FILE_CORRUPT); - } + const uint32_t current_version = 0; + if (format_version>current_version) { + ERR_PRINT("Format version for encoded binary image is too new"); + return ERR_PARSE_ERROR; } + Image::Format fmt=Image::Format(format&format_version_mask); //if format changes, we can add a compatibility bit on top + + uint32_t datalen = f->get_32(); DVector imgdata; @@ -1599,7 +1560,7 @@ void ResourceFormatSaverBinaryInstance::write_variant(const Variant& p_property, int encoding=IMAGE_ENCODING_RAW; float quality=0.7; - if (val.get_format() <= Image::FORMAT_INDEXED_ALPHA) { + if (!val.is_compressed()) { //can only compress uncompressed stuff if (p_hint.hint==PROPERTY_HINT_IMAGE_COMPRESS_LOSSY && Image::lossy_packer) { @@ -1621,33 +1582,8 @@ void ResourceFormatSaverBinaryInstance::write_variant(const Variant& p_property, f->store_32(val.get_width()); f->store_32(val.get_height()); - f->store_32(val.get_mipmaps()); - switch(val.get_format()) { - - case Image::FORMAT_GRAYSCALE: f->store_32(IMAGE_FORMAT_GRAYSCALE ); break; ///< one byte per pixel: f->store_32(IMAGE_FORMAT_ ); break; 0-255 - case Image::FORMAT_INTENSITY: f->store_32(IMAGE_FORMAT_INTENSITY ); break; ///< one byte per pixel: f->store_32(IMAGE_FORMAT_ ); break; 0-255 - case Image::FORMAT_GRAYSCALE_ALPHA: f->store_32(IMAGE_FORMAT_GRAYSCALE_ALPHA ); break; ///< two bytes per pixel: f->store_32(IMAGE_FORMAT_ ); break; 0-255. alpha 0-255 - case Image::FORMAT_RGB: f->store_32(IMAGE_FORMAT_RGB ); break; ///< one byte R: f->store_32(IMAGE_FORMAT_ ); break; one byte G: f->store_32(IMAGE_FORMAT_ ); break; one byte B - case Image::FORMAT_RGBA: f->store_32(IMAGE_FORMAT_RGBA ); break; ///< one byte R: f->store_32(IMAGE_FORMAT_ ); break; one byte G: f->store_32(IMAGE_FORMAT_ ); break; one byte B: f->store_32(IMAGE_FORMAT_ ); break; one byte A - case Image::FORMAT_INDEXED: f->store_32(IMAGE_FORMAT_INDEXED ); break; ///< index byte 0-256: f->store_32(IMAGE_FORMAT_ ); break; and after image end: f->store_32(IMAGE_FORMAT_ ); break; 256*3 bytes of palette - case Image::FORMAT_INDEXED_ALPHA: f->store_32(IMAGE_FORMAT_INDEXED_ALPHA ); break; ///< index byte 0-256: f->store_32(IMAGE_FORMAT_ ); break; and after image end: f->store_32(IMAGE_FORMAT_ ); break; 256*4 bytes of palette (alpha) - case Image::FORMAT_BC1: f->store_32(IMAGE_FORMAT_BC1 ); break; // DXT1 - case Image::FORMAT_BC2: f->store_32(IMAGE_FORMAT_BC2 ); break; // DXT3 - case Image::FORMAT_BC3: f->store_32(IMAGE_FORMAT_BC3 ); break; // DXT5 - case Image::FORMAT_BC4: f->store_32(IMAGE_FORMAT_BC4 ); break; // ATI1 - case Image::FORMAT_BC5: f->store_32(IMAGE_FORMAT_BC5 ); break; // ATI2 - case Image::FORMAT_PVRTC2: f->store_32(IMAGE_FORMAT_PVRTC2 ); break; - case Image::FORMAT_PVRTC2_ALPHA: f->store_32(IMAGE_FORMAT_PVRTC2_ALPHA ); break; - case Image::FORMAT_PVRTC4: f->store_32(IMAGE_FORMAT_PVRTC4 ); break; - case Image::FORMAT_PVRTC4_ALPHA: f->store_32(IMAGE_FORMAT_PVRTC4_ALPHA ); break; - case Image::FORMAT_ETC: f->store_32(IMAGE_FORMAT_ETC); break; - case Image::FORMAT_ATC: f->store_32(IMAGE_FORMAT_ATC); break; - case Image::FORMAT_ATC_ALPHA_EXPLICIT: f->store_32(IMAGE_FORMAT_ATC_ALPHA_EXPLICIT); break; - case Image::FORMAT_ATC_ALPHA_INTERPOLATED: f->store_32(IMAGE_FORMAT_ATC_ALPHA_INTERPOLATED); break; - case Image::FORMAT_CUSTOM: f->store_32(IMAGE_FORMAT_CUSTOM ); break; - default: {} - - } + f->store_32(val.has_mipmaps()); + f->store_32(val.get_format()); //if format changes we can add a compatibility version bit int dlen = val.get_data().size(); f->store_32(dlen); diff --git a/core/io/resource_format_xml.cpp b/core/io/resource_format_xml.cpp index 44fbaf02acc..bbd395f21af 100644 --- a/core/io/resource_format_xml.cpp +++ b/core/io/resource_format_xml.cpp @@ -553,39 +553,39 @@ Error ResourceInteractiveLoaderXML::parse_property(Variant& r_v, String &r_name) Image::Format imgformat; - +/* if (format=="grayscale") { - imgformat=Image::FORMAT_GRAYSCALE; + imgformat=Image::FORMAT_L8; } else if (format=="intensity") { imgformat=Image::FORMAT_INTENSITY; } else if (format=="grayscale_alpha") { - imgformat=Image::FORMAT_GRAYSCALE_ALPHA; + imgformat=Image::FORMAT_LA8; } else if (format=="rgb") { - imgformat=Image::FORMAT_RGB; + imgformat=Image::FORMAT_RGB8; } else if (format=="rgba") { - imgformat=Image::FORMAT_RGBA; + imgformat=Image::FORMAT_RGBA8; } else if (format=="indexed") { imgformat=Image::FORMAT_INDEXED; } else if (format=="indexed_alpha") { imgformat=Image::FORMAT_INDEXED_ALPHA; } else if (format=="bc1") { - imgformat=Image::FORMAT_BC1; + imgformat=Image::FORMAT_DXT1; } else if (format=="bc2") { - imgformat=Image::FORMAT_BC2; + imgformat=Image::FORMAT_DXT3; } else if (format=="bc3") { - imgformat=Image::FORMAT_BC3; + imgformat=Image::FORMAT_DXT5; } else if (format=="bc4") { - imgformat=Image::FORMAT_BC4; + imgformat=Image::FORMAT_ATI1; } else if (format=="bc5") { - imgformat=Image::FORMAT_BC5; + imgformat=Image::FORMAT_ATI2; } else if (format=="pvrtc2") { imgformat=Image::FORMAT_PVRTC2; } else if (format=="pvrtc2a") { - imgformat=Image::FORMAT_PVRTC2_ALPHA; + imgformat=Image::FORMAT_PVRTC2A; } else if (format=="pvrtc4") { imgformat=Image::FORMAT_PVRTC4; } else if (format=="pvrtc4a") { - imgformat=Image::FORMAT_PVRTC4_ALPHA; + imgformat=Image::FORMAT_PVRTC4A; } else if (format=="etc") { imgformat=Image::FORMAT_ETC; } else if (format=="atc") { @@ -599,7 +599,7 @@ Error ResourceInteractiveLoaderXML::parse_property(Variant& r_v, String &r_name) } else { ERR_FAIL_V( ERR_FILE_CORRUPT ); - } + }*/ int datasize; @@ -614,13 +614,6 @@ Error ResourceInteractiveLoaderXML::parse_property(Variant& r_v, String &r_name) return OK; }; - if (imgformat==Image::FORMAT_CUSTOM) { - - datasize=custom_size; - } else { - - datasize = Image::get_image_data_size(h,w,imgformat,mipmaps); - } if (datasize==0) { //r_v = Image(w, h, imgformat); @@ -2186,33 +2179,33 @@ void ResourceFormatSaverXMLInstance::write_property(const String& p_name,const V params+="encoding=\"raw\""; params+=" width=\""+itos(img.get_width())+"\""; params+=" height=\""+itos(img.get_height())+"\""; - params+=" mipmaps=\""+itos(img.get_mipmaps())+"\""; - + params+=" mipmaps=\""+itos(img.has_mipmaps())+"\""; +/* switch(img.get_format()) { - case Image::FORMAT_GRAYSCALE: params+=" format=\"grayscale\""; break; + case Image::FORMAT_L8: params+=" format=\"grayscale\""; break; case Image::FORMAT_INTENSITY: params+=" format=\"intensity\""; break; - case Image::FORMAT_GRAYSCALE_ALPHA: params+=" format=\"grayscale_alpha\""; break; - case Image::FORMAT_RGB: params+=" format=\"rgb\""; break; - case Image::FORMAT_RGBA: params+=" format=\"rgba\""; break; + case Image::FORMAT_LA8: params+=" format=\"grayscale_alpha\""; break; + case Image::FORMAT_RGB8: params+=" format=\"rgb\""; break; + case Image::FORMAT_RGBA8: params+=" format=\"rgba\""; break; case Image::FORMAT_INDEXED : params+=" format=\"indexed\""; break; case Image::FORMAT_INDEXED_ALPHA: params+=" format=\"indexed_alpha\""; break; - case Image::FORMAT_BC1: params+=" format=\"bc1\""; break; - case Image::FORMAT_BC2: params+=" format=\"bc2\""; break; - case Image::FORMAT_BC3: params+=" format=\"bc3\""; break; - case Image::FORMAT_BC4: params+=" format=\"bc4\""; break; - case Image::FORMAT_BC5: params+=" format=\"bc5\""; break; + case Image::FORMAT_DXT1: params+=" format=\"bc1\""; break; + case Image::FORMAT_DXT3: params+=" format=\"bc2\""; break; + case Image::FORMAT_DXT5: params+=" format=\"bc3\""; break; + case Image::FORMAT_ATI1: params+=" format=\"bc4\""; break; + case Image::FORMAT_ATI2: params+=" format=\"bc5\""; break; case Image::FORMAT_PVRTC2: params+=" format=\"pvrtc2\""; break; - case Image::FORMAT_PVRTC2_ALPHA: params+=" format=\"pvrtc2a\""; break; + case Image::FORMAT_PVRTC2A: params+=" format=\"pvrtc2a\""; break; case Image::FORMAT_PVRTC4: params+=" format=\"pvrtc4\""; break; - case Image::FORMAT_PVRTC4_ALPHA: params+=" format=\"pvrtc4a\""; break; + case Image::FORMAT_PVRTC4A: params+=" format=\"pvrtc4a\""; break; case Image::FORMAT_ETC: params+=" format=\"etc\""; break; case Image::FORMAT_ATC: params+=" format=\"atc\""; break; case Image::FORMAT_ATC_ALPHA_EXPLICIT: params+=" format=\"atcae\""; break; case Image::FORMAT_ATC_ALPHA_INTERPOLATED: params+=" format=\"atcai\""; break; case Image::FORMAT_CUSTOM: params+=" format=\"custom\" custom_size=\""+itos(img.get_data().size())+"\""; break; default: {} - } + }*/ } break; case Variant::NODE_PATH: type="node_path"; break; case Variant::OBJECT: { diff --git a/core/rid.cpp b/core/rid.cpp index 219c2f0e69d..1b7a58313a3 100644 --- a/core/rid.cpp +++ b/core/rid.cpp @@ -28,18 +28,16 @@ /*************************************************************************/ #include "rid.h" -static SafeRefCount current_id; +RID_Data::~RID_Data() { + +} + + +SafeRefCount RID_OwnerBase::refcount; + void RID_OwnerBase::init_rid() { - current_id.init(1); + refcount.init(); } - -ID RID_OwnerBase::new_ID() { - - - ID id = current_id.refval(); - return id; -} - diff --git a/core/rid.h b/core/rid.h index 2de6956096b..85a69ac0efa 100644 --- a/core/rid.h +++ b/core/rid.h @@ -33,183 +33,189 @@ #include "safe_refcount.h" #include "typedefs.h" #include "os/memory.h" -#include "hash_map.h" +#include "set.h" #include "list.h" /** @author Juan Linietsky */ + class RID_OwnerBase; -typedef uint32_t ID; +class RID_Data { + +friend class RID_OwnerBase; + +#ifndef DEBUG_ENABLED + RID_OwnerBase *_owner; +#endif + uint32_t _id; +public: + _FORCE_INLINE_ uint32_t get_id() const { return _id; } + + virtual ~RID_Data(); +}; + class RID { friend class RID_OwnerBase; - ID _id; - RID_OwnerBase *owner; + + mutable RID_Data *_data; + public: - _FORCE_INLINE_ ID get_id() const { return _id; } - bool operator==(const RID& p_rid) const { + _FORCE_INLINE_ RID_Data *get_data() const { return _data; } - return _id==p_rid._id; + _FORCE_INLINE_ bool operator==(const RID& p_rid) const { + + return _data==p_rid._data; } _FORCE_INLINE_ bool operator<(const RID& p_rid) const { - return _id < p_rid._id; + return _data < p_rid._data; } _FORCE_INLINE_ bool operator<=(const RID& p_rid) const { - return _id <= p_rid._id; + return _data <= p_rid._data; } _FORCE_INLINE_ bool operator>(const RID& p_rid) const { - return _id > p_rid._id; + return _data > p_rid._data; } - bool operator!=(const RID& p_rid) const { + _FORCE_INLINE_ bool operator!=(const RID& p_rid) const { - return _id!=p_rid._id; + return _data!=p_rid._data; } - _FORCE_INLINE_ bool is_valid() const { return _id>0; } + _FORCE_INLINE_ bool is_valid() const { return _data!=NULL; } - operator const void*() const { - return is_valid() ? this : 0; - }; + _FORCE_INLINE_ uint32_t get_id() const { return _data?_data->get_id():0; } _FORCE_INLINE_ RID() { - _id = 0; - owner=0; + _data=NULL; } + }; class RID_OwnerBase { protected: -friend class RID; - void set_id(RID& p_rid, ID p_id) const { p_rid._id=p_id; } - void set_ownage(RID& p_rid) const { p_rid.owner=const_cast(this); } - ID new_ID(); + + static SafeRefCount refcount; + _FORCE_INLINE_ void _set_data(RID& p_rid, RID_Data* p_data) { + p_rid._data=p_data; + refcount.ref(); + p_data->_id=refcount.get(); +#ifndef DEBUG_ENABLED + p_data->_owner=this; +#endif + } + +#ifndef DEBUG_ENABLED + + _FORCE_INLINE_ bool _is_owner(RID& p_rid) const { + + return this==p_rid._owner; + + } + + _FORCE_INLINE_ void _remove_owner(RID& p_rid) { + + return p_rid._owner=NULL; + + } +# +#endif + + public: - virtual bool owns(const RID& p_rid) const=0; - virtual void get_owned_list(List *p_owned) const=0; + + virtual void get_owned_list(List *p_owned)=0; static void init_rid(); - virtual ~RID_OwnerBase() {} }; -template +template class RID_Owner : public RID_OwnerBase { public: - - typedef void (*ReleaseNotifyFunc)(void*user,T *p_data); -private: - - Mutex *mutex; - mutable HashMap id_map; - +#ifdef DEBUG_ENABLED + mutable Set id_map; +#endif public: - RID make_rid(T * p_data) { + _FORCE_INLINE_ RID make_rid(T * p_data) { - if (thread_safe) { - mutex->lock(); - } - ID id = new_ID(); - id_map[id]=p_data; RID rid; - set_id(rid,id); - set_ownage(rid); + _set_data(rid,p_data); - if (thread_safe) { - mutex->unlock(); - } +#ifdef DEBUG_ENABLED + id_map.insert(p_data) ; +#endif return rid; } _FORCE_INLINE_ T * get(const RID& p_rid) { - if (thread_safe) { - mutex->lock(); - } +#ifdef DEBUG_ENABLED - T**elem = id_map.getptr(p_rid.get_id()); - - if (thread_safe) { - mutex->unlock(); - } - - ERR_FAIL_COND_V(!elem,NULL); - - return *elem; + ERR_FAIL_COND_V(!p_rid.is_valid(),NULL); + ERR_FAIL_COND_V(!id_map.has(p_rid.get_data()),NULL); +#endif + return static_cast(p_rid.get_data()); } - virtual bool owns(const RID& p_rid) const { + _FORCE_INLINE_ T * getornull(const RID& p_rid) { - if (thread_safe) { - mutex->lock(); - } - - T**elem = id_map.getptr(p_rid.get_id()); - - if (thread_safe) { - mutex->lock(); - } - - return elem!=NULL; - } - - virtual void free(RID p_rid) { - - if (thread_safe) { - mutex->lock(); - } - ERR_FAIL_COND(!owns(p_rid)); - id_map.erase(p_rid.get_id()); - } - virtual void get_owned_list(List *p_owned) const { - - if (thread_safe) { - mutex->lock(); - } - - const ID*id=NULL; - while((id=id_map.next(id))) { - - RID rid; - set_id(rid,*id); - set_ownage(rid); - p_owned->push_back(rid); - - } - - if (thread_safe) { - mutex->lock(); - } - - } - RID_Owner() { - - if (thread_safe) { - - mutex = Mutex::create(); +#ifdef DEBUG_ENABLED + + if (p_rid.get_data()) { + ERR_FAIL_COND_V(!id_map.has(p_rid.get_data()),NULL); } +#endif + return static_cast(p_rid.get_data()); } + _FORCE_INLINE_ bool owns(const RID& p_rid) const { - ~RID_Owner() { - - if (thread_safe) { - - memdelete(mutex); - } + if (p_rid.get_data()==NULL) + return false; +#ifdef DEBUG_ENABLED + return id_map.has(p_rid.get_data()); +#else + return _is_owner(p_rid); +#endif } + + void free(RID p_rid) { + +#ifdef DEBUG_ENABLED + id_map.erase(p_rid.get_data()); +#else + _remove_owner(p_rid); +#endif + } + + void get_owned_list(List *p_owned) { + + + +#ifdef DEBUG_ENABLED + + for (typename Set::Element *E=id_map.front();E;E=E->next()) { + RID r; + _set_data(r,static_cast(E->get())); + p_owned->push_back(r); + } +#endif + + } + }; diff --git a/core/variant_call.cpp b/core/variant_call.cpp index 51cd4c23995..bfbb95085ec 100644 --- a/core/variant_call.cpp +++ b/core/variant_call.cpp @@ -617,13 +617,9 @@ static void _call_##m_type##_##m_method(Variant& r_ret,Variant& p_self,const Var VCALL_PTR0R(Image,get_width); VCALL_PTR0R(Image,get_height); VCALL_PTR0R(Image,empty); - VCALL_PTR3R(Image,get_pixel); - VCALL_PTR4(Image, put_pixel); VCALL_PTR0R(Image,get_used_rect); - VCALL_PTR3R(Image,brushed); VCALL_PTR1R(Image,load); VCALL_PTR1R(Image,save_png); - VCALL_PTR3(Image,brush_transfer); VCALL_PTR1R(Image,get_rect); VCALL_PTR1R(Image,compressed); VCALL_PTR0R(Image,decompressed); @@ -1526,12 +1522,8 @@ _VariantCall::addfunc(Variant::m_vtype,Variant::m_ret,_SCS(#m_method),VCALL(m_cl ADDFUNC0(IMAGE, INT, Image, get_width, varray()); ADDFUNC0(IMAGE, INT, Image, get_height, varray()); ADDFUNC0(IMAGE, BOOL, Image, empty, varray()); - ADDFUNC3(IMAGE, COLOR, Image, get_pixel, INT, "x", INT, "y", INT, "mipmap_level", varray(0)); - ADDFUNC4(IMAGE, NIL, Image, put_pixel, INT, "x", INT, "y", COLOR, "color", INT, "mipmap_level", varray(0)); - ADDFUNC3(IMAGE, IMAGE, Image, brushed, IMAGE, "src", IMAGE, "brush", VECTOR2, "pos", varray(0)); ADDFUNC1(IMAGE, INT, Image, load, STRING, "path", varray(0)); ADDFUNC1(IMAGE, INT, Image, save_png, STRING, "path", varray(0)); - ADDFUNC3(IMAGE, NIL, Image, brush_transfer, IMAGE, "src", IMAGE, "brush", VECTOR2, "pos", varray(0)); ADDFUNC0(IMAGE, RECT2, Image, get_used_rect, varray(0)); ADDFUNC1(IMAGE, IMAGE, Image, get_rect, RECT2, "area", varray(0)); ADDFUNC1(IMAGE, IMAGE, Image, compressed, INT, "format", varray(0)); @@ -1788,34 +1780,53 @@ _VariantCall::addfunc(Variant::m_vtype,Variant::m_ret,_SCS(#m_method),VCALL(m_cl _VariantCall::add_constant(Variant::INPUT_EVENT,"ACTION",InputEvent::ACTION); - _VariantCall::add_constant(Variant::IMAGE,"COMPRESS_BC",Image::COMPRESS_BC); + _VariantCall::add_constant(Variant::IMAGE,"COMPRESS_16BIT",Image::COMPRESS_16BIT); + _VariantCall::add_constant(Variant::IMAGE,"COMPRESS_S3TC",Image::COMPRESS_S3TC); _VariantCall::add_constant(Variant::IMAGE,"COMPRESS_PVRTC2",Image::COMPRESS_PVRTC2); _VariantCall::add_constant(Variant::IMAGE,"COMPRESS_PVRTC4",Image::COMPRESS_PVRTC4); _VariantCall::add_constant(Variant::IMAGE,"COMPRESS_ETC",Image::COMPRESS_ETC); + _VariantCall::add_constant(Variant::IMAGE,"COMPRESS_ETC2",Image::COMPRESS_ETC2); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_GRAYSCALE",Image::FORMAT_GRAYSCALE); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_INTENSITY",Image::FORMAT_INTENSITY); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_GRAYSCALE_ALPHA",Image::FORMAT_GRAYSCALE_ALPHA); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGB",Image::FORMAT_RGB); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGBA",Image::FORMAT_RGBA); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_INDEXED",Image::FORMAT_INDEXED); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_INDEXED_ALPHA",Image::FORMAT_INDEXED_ALPHA); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_YUV_422",Image::FORMAT_YUV_422); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_YUV_444",Image::FORMAT_YUV_444); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_BC1",Image::FORMAT_BC1); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_BC2",Image::FORMAT_BC2); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_BC3",Image::FORMAT_BC3); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_BC4",Image::FORMAT_BC4); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_BC5",Image::FORMAT_BC5); + + + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_L8",Image::FORMAT_L8); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_LA8",Image::FORMAT_LA8); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_R8",Image::FORMAT_R8); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RG8",Image::FORMAT_RG8); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGB8",Image::FORMAT_RGB8); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGBA8",Image::FORMAT_RGBA8); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGB565",Image::FORMAT_RGB565); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGBA4444",Image::FORMAT_RGBA4444); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGBA5551",Image::FORMAT_DXT1); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RF",Image::FORMAT_RF); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGF",Image::FORMAT_RGF); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGBF",Image::FORMAT_RGBF); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGBAF",Image::FORMAT_RGBAF); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RH",Image::FORMAT_RH); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGH",Image::FORMAT_RGH); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGBH",Image::FORMAT_RGBH); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_RGBAH",Image::FORMAT_RGBAH); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_DXT1",Image::FORMAT_DXT1); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_DXT3",Image::FORMAT_DXT3); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_DXT5",Image::FORMAT_DXT5); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ATI1",Image::FORMAT_ATI1); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ATI2",Image::FORMAT_ATI2); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_BPTC_RGBA",Image::FORMAT_BPTC_RGBA); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_BPTC_RGBF",Image::FORMAT_BPTC_RGBF); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_BPTC_RGBFU",Image::FORMAT_BPTC_RGBFU); _VariantCall::add_constant(Variant::IMAGE,"FORMAT_PVRTC2",Image::FORMAT_PVRTC2); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_PVRTC2_ALPHA",Image::FORMAT_PVRTC2_ALPHA); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_PVRTC2A",Image::FORMAT_PVRTC2A); _VariantCall::add_constant(Variant::IMAGE,"FORMAT_PVRTC4",Image::FORMAT_PVRTC4); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_PVRTC4_ALPHA",Image::FORMAT_PVRTC4_ALPHA); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_PVRTC4A",Image::FORMAT_PVRTC4A); _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ETC",Image::FORMAT_ETC); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ATC",Image::FORMAT_ATC); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ATC_ALPHA_EXPLICIT",Image::FORMAT_ATC_ALPHA_EXPLICIT); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ATC_ALPHA_INTERPOLATED",Image::FORMAT_ATC_ALPHA_INTERPOLATED); - _VariantCall::add_constant(Variant::IMAGE,"FORMAT_CUSTOM",Image::FORMAT_CUSTOM); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ETC2_R11",Image::FORMAT_ETC2_R11); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ETC2_R11S",Image::FORMAT_ETC2_R11S); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ETC2_RG11",Image::FORMAT_ETC2_RG11); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ETC2_RG11S",Image::FORMAT_ETC2_RG11S); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ETC2_RGB8",Image::FORMAT_ETC2_RGB8); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ETC2_RGBA8",Image::FORMAT_ETC2_RGBA8); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_ETC2_RGB8A1",Image::FORMAT_ETC2_RGB8A1); + _VariantCall::add_constant(Variant::IMAGE,"FORMAT_MAX",Image::FORMAT_MAX); _VariantCall::add_constant(Variant::IMAGE,"INTERPOLATE_NEAREST",Image::INTERPOLATE_NEAREST); _VariantCall::add_constant(Variant::IMAGE,"INTERPOLATE_BILINEAR",Image::INTERPOLATE_BILINEAR); diff --git a/core/variant_parser.cpp b/core/variant_parser.cpp index 6b3828a5726..32358e1180e 100644 --- a/core/variant_parser.cpp +++ b/core/variant_parser.cpp @@ -755,8 +755,17 @@ Error VariantParser::parse_value(Token& token,Variant &value,Stream *p_stream,in } get_token(p_stream,token,line,r_err_str); - if (token.type!=TK_NUMBER) { - r_err_str="Expected number (mipmaps)"; + + bool has_mipmaps=false; + + if (token.type==TK_NUMBER) { + has_mipmaps=bool(token.value); + } else if (token.type==TK_IDENTIFIER && String(token.value)=="true") { + has_mipmaps=true; + } else if (token.type==TK_IDENTIFIER && String(token.value)=="false") { + has_mipmaps=false; + } else { + r_err_str="Expected number/true/false (mipmaps)"; return ERR_PARSE_ERROR; } @@ -778,32 +787,18 @@ Error VariantParser::parse_value(Token& token,Variant &value,Stream *p_stream,in String sformat=token.value; - Image::Format format; + Image::Format format=Image::FORMAT_MAX; - if (sformat=="GRAYSCALE") format=Image::FORMAT_GRAYSCALE; - else if (sformat=="INTENSITY") format=Image::FORMAT_INTENSITY; - else if (sformat=="GRAYSCALE_ALPHA") format=Image::FORMAT_GRAYSCALE_ALPHA; - else if (sformat=="RGB") format=Image::FORMAT_RGB; - else if (sformat=="RGBA") format=Image::FORMAT_RGBA; - else if (sformat=="INDEXED") format=Image::FORMAT_INDEXED; - else if (sformat=="INDEXED_ALPHA") format=Image::FORMAT_INDEXED_ALPHA; - else if (sformat=="BC1") format=Image::FORMAT_BC1; - else if (sformat=="BC2") format=Image::FORMAT_BC2; - else if (sformat=="BC3") format=Image::FORMAT_BC3; - else if (sformat=="BC4") format=Image::FORMAT_BC4; - else if (sformat=="BC5") format=Image::FORMAT_BC5; - else if (sformat=="PVRTC2") format=Image::FORMAT_PVRTC2; - else if (sformat=="PVRTC2_ALPHA") format=Image::FORMAT_PVRTC2_ALPHA; - else if (sformat=="PVRTC4") format=Image::FORMAT_PVRTC4; - else if (sformat=="PVRTC4_ALPHA") format=Image::FORMAT_PVRTC4_ALPHA; - else if (sformat=="ATC") format=Image::FORMAT_ATC; - else if (sformat=="ATC_ALPHA_EXPLICIT") format=Image::FORMAT_ATC_ALPHA_EXPLICIT; - else if (sformat=="ATC_ALPHA_INTERPOLATED") format=Image::FORMAT_ATC_ALPHA_INTERPOLATED; - else if (sformat=="CUSTOM") format=Image::FORMAT_CUSTOM; - else { - r_err_str="Invalid image format: '"+sformat+"'"; + for(int i=0;i - + - + - + - + @@ -16342,23 +16342,23 @@ - + - + - + - + - + - + - + diff --git a/drivers/SCsub b/drivers/SCsub index 8243483e176..7420d576043 100644 --- a/drivers/SCsub +++ b/drivers/SCsub @@ -12,7 +12,7 @@ SConscript('unix/SCsub'); SConscript('alsa/SCsub'); SConscript('pulseaudio/SCsub'); SConscript('windows/SCsub'); -SConscript('gles2/SCsub'); +SConscript('gles3/SCsub'); SConscript('gl_context/SCsub'); SConscript('pnm/SCsub'); diff --git a/drivers/dds/texture_loader_dds.cpp b/drivers/dds/texture_loader_dds.cpp index 0cc84f02f7b..23fb9f48f73 100644 --- a/drivers/dds/texture_loader_dds.cpp +++ b/drivers/dds/texture_loader_dds.cpp @@ -74,21 +74,20 @@ struct DDSFormatInfo { static const DDSFormatInfo dds_format_info[DDS_MAX]={ - {"DXT1",true,false,4,8,Image::FORMAT_BC1}, - {"DXT3",true,false,4,16,Image::FORMAT_BC2}, - {"DXT5",true,false,4,16,Image::FORMAT_BC3}, - {"ATI1",true,false,4,8,Image::FORMAT_BC4}, - {"ATI2",true,false,4,16,Image::FORMAT_BC5}, - {"BGRA8",false,false,1,4,Image::FORMAT_RGBA}, - {"BGR8",false,false,1,3,Image::FORMAT_RGB}, - {"RGBA8",false,false,1,4,Image::FORMAT_RGBA}, - {"RGB8",false,false,1,3,Image::FORMAT_RGB}, - {"BGR5A1",false,false,1,2,Image::FORMAT_RGBA}, - {"BGR565",false,false,1,2,Image::FORMAT_RGB}, - {"BGR10A2",false,false,1,4,Image::FORMAT_RGBA}, - {"INDEXED",false,true,1,1,Image::FORMAT_INDEXED}, - {"GRAYSCALE",false,false,1,1,Image::FORMAT_GRAYSCALE}, - {"GRAYSCALE_ALPHA",false,false,1,2,Image::FORMAT_GRAYSCALE_ALPHA} + {"DXT1",true,false,4,8,Image::FORMAT_DXT1}, + {"DXT3",true,false,4,16,Image::FORMAT_DXT3}, + {"DXT5",true,false,4,16,Image::FORMAT_DXT5}, + {"ATI1",true,false,4,8,Image::FORMAT_ATI1}, + {"ATI2",true,false,4,16,Image::FORMAT_ATI2}, + {"BGRA8",false,false,1,4,Image::FORMAT_RGBA8}, + {"BGR8",false,false,1,3,Image::FORMAT_RGB8}, + {"RGBA8",false,false,1,4,Image::FORMAT_RGBA8}, + {"RGB8",false,false,1,3,Image::FORMAT_RGB8}, + {"BGR5A1",false,false,1,2,Image::FORMAT_RGBA8}, + {"BGR565",false,false,1,2,Image::FORMAT_RGB8}, + {"BGR10A2",false,false,1,4,Image::FORMAT_RGBA8}, + {"GRAYSCALE",false,false,1,1,Image::FORMAT_L8}, + {"GRAYSCALE_ALPHA",false,false,1,2,Image::FORMAT_LA8} }; diff --git a/drivers/etc1/image_etc.cpp b/drivers/etc1/image_etc.cpp index cf2384240b9..176abc356c4 100644 --- a/drivers/etc1/image_etc.cpp +++ b/drivers/etc1/image_etc.cpp @@ -42,7 +42,7 @@ static void _decompress_etc(Image *p_img) { DVector::Read r = src.read(); - int mmc=p_img->get_mipmaps(); + int mmc=p_img->get_mipmap_count(); for(int i=0;i<=mmc;i++) { @@ -93,9 +93,9 @@ static void _decompress_etc(Image *p_img) { r=DVector::Read(); //print_line("Re Creating ETC into regular image: w "+itos(p_img->get_width())+" h "+itos(p_img->get_height())+" mm "+itos(p_img->get_mipmaps())); - *p_img=Image(p_img->get_width(),p_img->get_height(),p_img->get_mipmaps(),Image::FORMAT_RGB,dst); - if (p_img->get_mipmaps()) - p_img->generate_mipmaps(-1,true); + *p_img=Image(p_img->get_width(),p_img->get_height(),p_img->has_mipmaps(),Image::FORMAT_RGB8,dst); + if (p_img->has_mipmaps()) + p_img->generate_mipmaps(true); } @@ -108,11 +108,11 @@ static void _compress_etc(Image *p_img) { ERR_FAIL_COND( nearest_power_of_2(imgw)!=imgw || nearest_power_of_2(imgh)!=imgh ); - if (img.get_format()!=Image::FORMAT_RGB) - img.convert(Image::FORMAT_RGB); + if (img.get_format()!=Image::FORMAT_RGB8) + img.convert(Image::FORMAT_RGB8); - int mmc=img.get_mipmaps(); + int mmc=img.get_mipmap_count(); if (mmc==0) img.generate_mipmaps(); // force mipmaps, so it works on most hardware @@ -186,7 +186,7 @@ static void _compress_etc(Image *p_img) { } - *p_img=Image(p_img->get_width(),p_img->get_height(),mc-1,Image::FORMAT_ETC,dst_data); + *p_img=Image(p_img->get_width(),p_img->get_height(),(mc-1)?true:false,Image::FORMAT_ETC,dst_data); } diff --git a/drivers/gles2/rasterizer_gles2.cpp b/drivers/gles2/rasterizer_gles2.cpp index a7edc8d935a..db874d1287b 100644 --- a/drivers/gles2/rasterizer_gles2.cpp +++ b/drivers/gles2/rasterizer_gles2.cpp @@ -376,7 +376,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For switch(p_format) { - case Image::FORMAT_GRAYSCALE: { + case Image::FORMAT_L8: { r_gl_components=1; r_gl_format=GL_LUMINANCE; r_gl_internal_format=(srgb_supported && p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?_EXT_SLUMINANCE_NV:GL_LUMINANCE; @@ -385,15 +385,15 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For case Image::FORMAT_INTENSITY: { if (!image.empty()) - image.convert(Image::FORMAT_RGBA); + image.convert(Image::FORMAT_RGBA8); r_gl_components=4; r_gl_format=GL_RGBA; r_gl_internal_format=(srgb_supported && p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?_GL_SRGB_ALPHA_EXT:GL_RGBA; r_has_alpha_cache=true; } break; - case Image::FORMAT_GRAYSCALE_ALPHA: { + case Image::FORMAT_LA8: { - //image.convert(Image::FORMAT_RGBA); + //image.convert(Image::FORMAT_RGBA8); r_gl_components=2; r_gl_format=GL_LUMINANCE_ALPHA; r_gl_internal_format=(srgb_supported && p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?_EXT_SLUMINANCE_ALPHA_NV:GL_LUMINANCE_ALPHA; @@ -403,7 +403,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For case Image::FORMAT_INDEXED: { if (!image.empty()) - image.convert(Image::FORMAT_RGB); + image.convert(Image::FORMAT_RGB8); r_gl_components=3; r_gl_format=GL_RGB; r_gl_internal_format=(srgb_supported && p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?_GL_SRGB_EXT:GL_RGB; @@ -413,7 +413,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For case Image::FORMAT_INDEXED_ALPHA: { if (!image.empty()) - image.convert(Image::FORMAT_RGBA); + image.convert(Image::FORMAT_RGBA8); r_gl_components=4; if (p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR) { @@ -432,7 +432,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For r_has_alpha_cache=true; } break; - case Image::FORMAT_RGB: { + case Image::FORMAT_RGB8: { r_gl_components=3; @@ -450,7 +450,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For r_gl_internal_format=GL_RGB; } } break; - case Image::FORMAT_RGBA: { + case Image::FORMAT_RGBA8: { r_gl_components=4; if (p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR) { @@ -470,7 +470,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For r_has_alpha_cache=true; } break; - case Image::FORMAT_BC1: { + case Image::FORMAT_DXT1: { if (!s3tc_supported || (!s3tc_srgb_supported && p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)) { @@ -501,7 +501,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For }; } break; - case Image::FORMAT_BC2: { + case Image::FORMAT_DXT3: { if (!s3tc_supported || (!s3tc_srgb_supported && p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)) { @@ -533,7 +533,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For }; } break; - case Image::FORMAT_BC3: { + case Image::FORMAT_DXT5: { if (!s3tc_supported || (!s3tc_srgb_supported && p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)) { @@ -564,7 +564,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For }; } break; - case Image::FORMAT_BC4: { + case Image::FORMAT_ATI1: { if (!latc_supported) { @@ -595,7 +595,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For }; } break; - case Image::FORMAT_BC5: { + case Image::FORMAT_ATI2: { if (!latc_supported ) { @@ -657,7 +657,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For } } break; - case Image::FORMAT_PVRTC2_ALPHA: { + case Image::FORMAT_PVRTC2A: { if (!pvr_supported || (!pvr_srgb_supported && p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)) { @@ -719,7 +719,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For } } break; - case Image::FORMAT_PVRTC4_ALPHA: { + case Image::FORMAT_PVRTC4A: { if (!pvr_supported || (!pvr_srgb_supported && p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)) { @@ -841,7 +841,7 @@ Image RasterizerGLES2::_get_gl_image_and_format(const Image& p_image, Image::For case Image::FORMAT_YUV_444: { if (!image.empty()) - image.convert(Image::FORMAT_RGB); + image.convert(Image::FORMAT_RGB8); r_gl_internal_format=GL_RGB; r_gl_components=3; @@ -1145,7 +1145,7 @@ Image RasterizerGLES2::texture_get_data(RID p_texture,VS::CubeMapSide p_cube_sid switch(texture->format) { - case Image::FORMAT_GRAYSCALE: { + case Image::FORMAT_L8: { format=GL_LUMINANCE; type=GL_UNSIGNED_BYTE; @@ -1156,19 +1156,19 @@ Image RasterizerGLES2::texture_get_data(RID p_texture,VS::CubeMapSide p_cube_sid case Image::FORMAT_INTENSITY: { return Image(); } break; - case Image::FORMAT_GRAYSCALE_ALPHA: { + case Image::FORMAT_LA8: { format=GL_LUMINANCE_ALPHA; type=GL_UNSIGNED_BYTE; pixelsize=2; } break; - case Image::FORMAT_RGB: { + case Image::FORMAT_RGB8: { format=GL_RGB; type=GL_UNSIGNED_BYTE; pixelsize=3; } break; - case Image::FORMAT_RGBA: { + case Image::FORMAT_RGBA8: { format=GL_RGBA; type=GL_UNSIGNED_BYTE; @@ -1178,18 +1178,18 @@ Image RasterizerGLES2::texture_get_data(RID p_texture,VS::CubeMapSide p_cube_sid format=GL_RGB; type=GL_UNSIGNED_BYTE; - fmt=Image::FORMAT_RGB; + fmt=Image::FORMAT_RGB8; pixelsize=3; } break; case Image::FORMAT_INDEXED_ALPHA: { format=GL_RGBA; type=GL_UNSIGNED_BYTE; - fmt=Image::FORMAT_RGBA; + fmt=Image::FORMAT_RGBA8; pixelsize=4; } break; - case Image::FORMAT_BC1: { + case Image::FORMAT_DXT1: { pixelsize=1; //doesn't matter much format=GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; @@ -1198,14 +1198,14 @@ Image RasterizerGLES2::texture_get_data(RID p_texture,VS::CubeMapSide p_cube_sid minw=minh=4; } break; - case Image::FORMAT_BC2: { + case Image::FORMAT_DXT3: { pixelsize=1; //doesn't matter much format=GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; compressed=true; minw=minh=4; } break; - case Image::FORMAT_BC3: { + case Image::FORMAT_DXT5: { pixelsize=1; //doesn't matter much format=GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; @@ -1213,7 +1213,7 @@ Image RasterizerGLES2::texture_get_data(RID p_texture,VS::CubeMapSide p_cube_sid minw=minh=4; } break; - case Image::FORMAT_BC4: { + case Image::FORMAT_ATI1: { format=GL_COMPRESSED_RED_RGTC1; pixelsize=1; //doesn't matter much @@ -1222,7 +1222,7 @@ Image RasterizerGLES2::texture_get_data(RID p_texture,VS::CubeMapSide p_cube_sid minw=minh=4; } break; - case Image::FORMAT_BC5: { + case Image::FORMAT_ATI2: { format=GL_COMPRESSED_RG_RGTC2; pixelsize=1; //doesn't matter much @@ -1359,7 +1359,7 @@ Image::Format RasterizerGLES2::texture_get_format(RID p_texture) const { Texture * texture = texture_owner.get(p_texture); - ERR_FAIL_COND_V(!texture,Image::FORMAT_GRAYSCALE); + ERR_FAIL_COND_V(!texture,Image::FORMAT_L8); return texture->format; } @@ -4320,7 +4320,7 @@ void RasterizerGLES2::capture_viewport(Image* r_capture) { w=DVector::Write(); - r_capture->create(viewport.width,viewport.height,0,Image::FORMAT_RGB,pixels); + r_capture->create(viewport.width,viewport.height,0,Image::FORMAT_RGB8,pixels); #else @@ -4360,7 +4360,7 @@ void RasterizerGLES2::capture_viewport(Image* r_capture) { } w=DVector::Write(); - r_capture->create(viewport.width,viewport.height,0,Image::FORMAT_RGBA,pixels); + r_capture->create(viewport.width,viewport.height,0,Image::FORMAT_RGBA8,pixels); //r_capture->flip_y(); diff --git a/drivers/gles2/rasterizer_gles2.h b/drivers/gles2/rasterizer_gles2.h index b18f89d8e75..23f7cb6a1fc 100644 --- a/drivers/gles2/rasterizer_gles2.h +++ b/drivers/gles2/rasterizer_gles2.h @@ -154,7 +154,7 @@ class RasterizerGLES2 : public Rasterizer { flags=width=height=0; tex_id=0; data_size=0; - format=Image::FORMAT_GRAYSCALE; + format=Image::FORMAT_L8; gl_components_cache=0; format_has_alpha=false; has_alpha=false; diff --git a/drivers/gles2/shaders/canvas.glsl b/drivers/gles2/shaders/canvas.glsl index 5f4767940d4..38c579c15c2 100644 --- a/drivers/gles2/shaders/canvas.glsl +++ b/drivers/gles2/shaders/canvas.glsl @@ -105,8 +105,8 @@ precision mediump float; precision mediump int; #endif - // texunit:0 -uniform sampler2D texture; + +uniform sampler2D texture; // texunit:0 varying vec2 uv_interp; varying vec4 color_interp; @@ -319,7 +319,7 @@ LIGHT_SHADER_CODE #ifdef USE_DEPTH_SHADOWS -#define SHADOW_DEPTH(m_tex,m_uv) (texture2D((m_tex),(m_uv)).z) +#define SHADOW_DEPTH(m_tex,m_uv) (texture2D((m_tex),(m_uv)).r) #else diff --git a/drivers/gles3/SCsub b/drivers/gles3/SCsub new file mode 100644 index 00000000000..a17335b41b3 --- /dev/null +++ b/drivers/gles3/SCsub @@ -0,0 +1,5 @@ +Import('env') + +env.add_source_files(env.drivers_sources,"*.cpp") + +SConscript("shaders/SCsub") diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp new file mode 100644 index 00000000000..f24560763f3 --- /dev/null +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -0,0 +1,1461 @@ +#include "rasterizer_canvas_gles3.h" +#include "os/os.h" + +static _FORCE_INLINE_ void store_matrix32(const Matrix32& p_mtx, float* p_array) { + + p_array[ 0]=p_mtx.elements[0][0]; + p_array[ 1]=p_mtx.elements[0][1]; + p_array[ 2]=0; + p_array[ 3]=0; + p_array[ 4]=p_mtx.elements[1][0]; + p_array[ 5]=p_mtx.elements[1][1]; + p_array[ 6]=0; + p_array[ 7]=0; + p_array[ 8]=0; + p_array[ 9]=0; + p_array[10]=1; + p_array[11]=0; + p_array[12]=p_mtx.elements[2][0]; + p_array[13]=p_mtx.elements[2][1]; + p_array[14]=0; + p_array[15]=1; +} + + +static _FORCE_INLINE_ void store_transform(const Transform& p_mtx, float* p_array) { + p_array[ 0]=p_mtx.basis.elements[0][0]; + p_array[ 1]=p_mtx.basis.elements[1][0]; + p_array[ 2]=p_mtx.basis.elements[2][0]; + p_array[ 3]=0; + p_array[ 4]=p_mtx.basis.elements[0][1]; + p_array[ 5]=p_mtx.basis.elements[1][1]; + p_array[ 6]=p_mtx.basis.elements[2][1]; + p_array[ 7]=0; + p_array[ 8]=p_mtx.basis.elements[0][2]; + p_array[ 9]=p_mtx.basis.elements[1][2]; + p_array[10]=p_mtx.basis.elements[2][2]; + p_array[11]=0; + p_array[12]=p_mtx.origin.x; + p_array[13]=p_mtx.origin.y; + p_array[14]=p_mtx.origin.z; + p_array[15]=1; +} + +static _FORCE_INLINE_ void store_camera(const CameraMatrix& p_mtx, float* p_array) { + + for (int i=0;i<4;i++) { + for (int j=0;j<4;j++) { + + p_array[i*4+j]=p_mtx.matrix[i][j]; + } + } +} + + +RID RasterizerCanvasGLES3::light_internal_create() { + + LightInternal * li = memnew( LightInternal ); + + glGenBuffers(1, &li->ubo); + glBindBuffer(GL_UNIFORM_BUFFER, li->ubo); + glBufferData(GL_UNIFORM_BUFFER, sizeof(LightInternal::UBOData), &state.canvas_item_ubo_data, GL_DYNAMIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + return light_internal_owner.make_rid(li); +} + +void RasterizerCanvasGLES3::light_internal_update(RID p_rid, Light* p_light) { + + LightInternal * li = light_internal_owner.getornull(p_rid); + ERR_FAIL_COND(!li); + + store_matrix32(p_light->light_shader_xform,li->ubo_data.light_matrix); + store_matrix32(p_light->xform_cache.affine_inverse(),li->ubo_data.local_matrix); + store_camera(p_light->shadow_matrix_cache,li->ubo_data.shadow_matrix); + + for(int i=0;i<4;i++) { + + li->ubo_data.color[i]=p_light->color[i]*p_light->energy; + li->ubo_data.shadow_color[i]=p_light->shadow_color[i]; + } + + li->ubo_data.light_pos[0]=p_light->light_shader_pos.x; + li->ubo_data.light_pos[1]=p_light->light_shader_pos.y; + li->ubo_data.shadowpixel_size=1.0/p_light->shadow_buffer_size; + li->ubo_data.light_outside_alpha=p_light->mode==VS::CANVAS_LIGHT_MODE_MASK?1.0:0.0; + li->ubo_data.light_height=p_light->height; + if (p_light->radius_cache==0) + li->ubo_data.shadow_gradient=0; + else + li->ubo_data.shadow_gradient=p_light->shadow_gradient_length/(p_light->radius_cache*1.1);; + + li->ubo_data.shadow_distance_mult=(p_light->radius_cache*1.1); + + + glBindBuffer(GL_UNIFORM_BUFFER, li->ubo); + glBufferSubData(GL_UNIFORM_BUFFER, 0,sizeof(LightInternal::UBOData), &li->ubo_data); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + +} + +void RasterizerCanvasGLES3::light_internal_free(RID p_rid) { + + LightInternal * li = light_internal_owner.getornull(p_rid); + ERR_FAIL_COND(!li); + + glDeleteBuffers(1,&li->ubo); + light_internal_owner.free(p_rid); + memdelete(li); + +} + +void RasterizerCanvasGLES3::canvas_begin(){ + + /*canvas_shader.unbind(); + canvas_shader.set_custom_shader(0); + canvas_shader.set_conditional(CanvasShaderGLES2::USE_MODULATE,false); + canvas_shader.bind(); + canvas_shader.set_uniform(CanvasShaderGLES2::TEXTURE, 0); + canvas_use_modulate=false;*/ + + reset_canvas(); + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_TEXTURE_RECT,true); + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_LIGHTING,false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_SHADOWS,false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_NEAREST,false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF5,false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF13,false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_DISTANCE_FIELD,false); + + + state.canvas_shader.bind(); + state.canvas_shader.set_uniform(CanvasShaderGLES3::FINAL_MODULATE,Color(1,1,1,1)); + state.canvas_shader.set_uniform(CanvasShaderGLES3::MODELVIEW_MATRIX,Matrix32()); + state.canvas_shader.set_uniform(CanvasShaderGLES3::EXTRA_MATRIX,Matrix32()); + + + + +// state.canvas_shader.set_uniform(CanvasShaderGLES3::PROJECTION_MATRIX,state.vp); + //state.canvas_shader.set_uniform(CanvasShaderGLES3::MODELVIEW_MATRIX,Transform()); + //state.canvas_shader.set_uniform(CanvasShaderGLES3::EXTRA_MATRIX,Transform()); + + glBindBufferBase(GL_UNIFORM_BUFFER,0,state.canvas_item_ubo); + glBindVertexArray(data.canvas_quad_array); + state.using_texture_rect=true; + + +} + + +void RasterizerCanvasGLES3::canvas_end(){ + + + glBindVertexArray(0); + glBindBufferBase(GL_UNIFORM_BUFFER,0,0); + + state.using_texture_rect=false; + +} + + + +RasterizerStorageGLES3::Texture* RasterizerCanvasGLES3::_bind_canvas_texture(const RID& p_texture) { + + if (p_texture==state.current_tex) { + return state.current_tex_ptr; + } + + if (p_texture.is_valid()) { + + + RasterizerStorageGLES3::Texture*texture=storage->texture_owner.getornull(p_texture); + + if (!texture) { + state.current_tex=RID(); + state.current_tex_ptr=NULL; + glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + return NULL; + } + + if (texture->render_target) + texture->render_target->used_in_frame=true; + + glBindTexture(GL_TEXTURE_2D,texture->tex_id); + state.current_tex=p_texture; + state.current_tex_ptr=texture; + + return texture; + + + } else { + + + glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + state.current_tex=RID(); + state.current_tex_ptr=NULL; + } + + + return NULL; +} + +void RasterizerCanvasGLES3::_set_texture_rect_mode(bool p_enable) { + + if (state.using_texture_rect==p_enable) + return; + + if (p_enable) { + glBindVertexArray(data.canvas_quad_array); + + + } else { + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,0); + + + } + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_TEXTURE_RECT,p_enable); + state.canvas_shader.bind(); + state.canvas_shader.set_uniform(CanvasShaderGLES3::FINAL_MODULATE,state.canvas_item_modulate); + state.canvas_shader.set_uniform(CanvasShaderGLES3::MODELVIEW_MATRIX,state.final_transform); + state.canvas_shader.set_uniform(CanvasShaderGLES3::EXTRA_MATRIX,state.extra_matrix); + + + state.using_texture_rect=p_enable; +} + + +void RasterizerCanvasGLES3::_draw_polygon(int p_vertex_count, const int* p_indices, const Vector2* p_vertices, const Vector2* p_uvs, const Color* p_colors,const RID& p_texture,bool p_singlecolor) { + + bool do_colors=false; + Color m; + if (p_singlecolor) { + m = *p_colors; + glVertexAttrib4f(VS::ARRAY_COLOR,m.r,m.g,m.b,m.a); + } else if (!p_colors) { + + glVertexAttrib4f(VS::ARRAY_COLOR,1,1,1,1); + } else + do_colors=true; + + RasterizerStorageGLES3::Texture *texture = _bind_canvas_texture(p_texture); + +#ifndef GLES_NO_CLIENT_ARRAYS + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glVertexAttribPointer( VS::ARRAY_VERTEX, 2 ,GL_FLOAT, false, sizeof(Vector2), p_vertices ); + if (do_colors) { + + glEnableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttribPointer( VS::ARRAY_COLOR, 4 ,GL_FLOAT, false, sizeof(Color), p_colors ); + } else { + glDisableVertexAttribArray(VS::ARRAY_COLOR); + } + + if (texture && p_uvs) { + + glEnableVertexAttribArray(VS::ARRAY_TEX_UV); + glVertexAttribPointer( VS::ARRAY_TEX_UV, 2 ,GL_FLOAT, false, sizeof(Vector2), p_uvs ); + } else { + glDisableVertexAttribArray(VS::ARRAY_TEX_UV); + } + + if (p_indices) { +#ifdef GLEW_ENABLED + glDrawElements(GL_TRIANGLES, p_vertex_count, GL_UNSIGNED_INT, p_indices ); +#else + static const int _max_draw_poly_indices = 16*1024; // change this size if needed!!! + ERR_FAIL_COND(p_vertex_count > _max_draw_poly_indices); + static uint16_t _draw_poly_indices[_max_draw_poly_indices]; + for (int i=0; i MAX_POLYGON_VERTICES){ + print_line("Too many vertices to render"); + return; + } + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glVertexAttribPointer( VS::ARRAY_VERTEX, 2 ,GL_FLOAT, false, sizeof(float)*2, ((float*)0)+ofs ); + for(int i=0;i _max_draw_poly_indices); + static uint16_t _draw_poly_indices[_max_draw_poly_indices]; + for (int i=0; iprint("ind: %d ", p_indices[i]); + }; + + //copy the data to GPU. + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, p_vertex_count * sizeof(uint16_t), &_draw_poly_indices[0]); + + //draw the triangles. + glDrawElements(GL_TRIANGLES, p_vertex_count, GL_UNSIGNED_SHORT, 0); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); +#endif + + storage->frame.canvas_draw_commands++; + +} + +void RasterizerCanvasGLES3::_draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color* p_colors, const Vector2 *p_uvs) { + + + + static const GLenum prim[5]={GL_POINTS,GL_POINTS,GL_LINES,GL_TRIANGLES,GL_TRIANGLE_FAN}; + + + //#define GLES_USE_PRIMITIVE_BUFFER + +#ifndef GLES_NO_CLIENT_ARRAYS + + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glVertexAttribPointer( VS::ARRAY_VERTEX, 2 ,GL_FLOAT, false, sizeof(Vector2), p_vertices ); + + if (p_colors) { + + glEnableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttribPointer( VS::ARRAY_COLOR, 4 ,GL_FLOAT, false, sizeof(Color), p_colors ); + } else { + glDisableVertexAttribArray(VS::ARRAY_COLOR); + } + + if (p_uvs) { + + glEnableVertexAttribArray(VS::ARRAY_TEX_UV); + glVertexAttribPointer( VS::ARRAY_TEX_UV, 2 ,GL_FLOAT, false, sizeof(Vector2), p_uvs ); + } else { + glDisableVertexAttribArray(VS::ARRAY_TEX_UV); + } + + glDrawArrays(prim[p_points],0,p_points); + +#else + + glBindBuffer(GL_ARRAY_BUFFER,gui_quad_buffer); + float b[32]; + int ofs=0; + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glVertexAttribPointer( VS::ARRAY_VERTEX, 2 ,GL_FLOAT, false, sizeof(float)*2, ((float*)0)+ofs ); + for(int i=0;iframe.canvas_draw_commands++; +} + +void RasterizerCanvasGLES3::_canvas_item_render_commands(Item *p_item,Item *current_clip,bool &reclip) { + + int cc=p_item->commands.size(); + Item::Command **commands = p_item->commands.ptr(); + + + for(int i=0;itype) { + case Item::Command::TYPE_LINE: { + + + Item::CommandLine* line = static_cast(c); + _set_texture_rect_mode(false); + + + _bind_canvas_texture(RID()); + + glVertexAttrib4f(VS::ARRAY_COLOR,line->color.r,line->color.g,line->color.b,line->color.a); + + Vector2 verts[2]={ + Vector2(line->from.x,line->from.y), + Vector2(line->to.x,line->to.y) + }; + +#ifdef GLEW_ENABLED + if (line->antialiased) + glEnable(GL_LINE_SMOOTH); +#endif + glLineWidth(line->width); + _draw_gui_primitive(2,verts,NULL,NULL); + +#ifdef GLEW_ENABLED + if (line->antialiased) + glDisable(GL_LINE_SMOOTH); +#endif + + + } break; + case Item::Command::TYPE_RECT: { + + Item::CommandRect* rect = static_cast(c); + + _set_texture_rect_mode(true); + + //set color + glVertexAttrib4f(VS::ARRAY_COLOR,rect->modulate.r,rect->modulate.g,rect->modulate.b,rect->modulate.a); + + RasterizerStorageGLES3::Texture* texture = _bind_canvas_texture(rect->texture); + + if ( texture ) { + + bool untile=false; + + if (rect->flags&CANVAS_RECT_TILE && !(texture->flags&VS::TEXTURE_FLAG_REPEAT)) { + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT ); + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT ); + untile=true; + } + + Size2 texpixel_size( 1.0/texture->width, 1.0/texture->height ); + Rect2 src_rect = (rect->flags&CANVAS_RECT_REGION) ? Rect2( rect->source.pos * texpixel_size, rect->source.size * texpixel_size ) : Rect2(0,0,1,1); + + if (rect->flags&CANVAS_RECT_FLIP_H) { + src_rect.size.x*=-1; + } + + if (rect->flags&CANVAS_RECT_FLIP_V) { + src_rect.size.x*=-1; + } + + if (rect->flags&CANVAS_RECT_TRANSPOSE) { + //err.. + } + + + glVertexAttrib4f(1,rect->rect.pos.x,rect->rect.pos.y,rect->rect.size.x,rect->rect.size.y); + glVertexAttrib4f(2,src_rect.pos.x,src_rect.pos.y,src_rect.size.x,src_rect.size.y); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + + if (untile) { + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE ); + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE ); + } + + } else { + + + glVertexAttrib4f(1,rect->rect.pos.x,rect->rect.pos.y,rect->rect.size.x,rect->rect.size.y); + glVertexAttrib4f(2,0,0,1,1); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + + } + + storage->frame.canvas_draw_commands++; + + } break; + + case Item::Command::TYPE_NINEPATCH: { + + Item::CommandNinePatch* np = static_cast(c); + + _set_texture_rect_mode(true); + + glVertexAttrib4f(VS::ARRAY_COLOR,np->color.r,np->color.g,np->color.b,np->color.a); + + + RasterizerStorageGLES3::Texture* texture = _bind_canvas_texture(np->texture); + + if ( !texture ) { + + glVertexAttrib4f(1,np->rect.pos.x,np->rect.pos.y,np->rect.size.x,np->rect.size.y); + glVertexAttrib4f(2,0,0,1,1); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + continue; + } + + + Size2 texpixel_size( 1.0/texture->width, 1.0/texture->height ); + +#define DSTRECT(m_x,m_y,m_w,m_h) glVertexAttrib4f(1,m_x,m_y,m_w,m_h) +#define SRCRECT(m_x,m_y,m_w,m_h) glVertexAttrib4f(2,(m_x)*texpixel_size.x,(m_y)*texpixel_size.y,(m_w)*texpixel_size.x,(m_h)*texpixel_size.y) + + //top left + DSTRECT(np->rect.pos.x,np->rect.pos.y,np->margin[MARGIN_LEFT],np->margin[MARGIN_TOP]); + SRCRECT(0,0,np->margin[MARGIN_LEFT],np->margin[MARGIN_TOP]); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + //top right + DSTRECT(np->rect.pos.x+np->rect.size.x-np->margin[MARGIN_RIGHT],np->rect.pos.y,np->margin[MARGIN_RIGHT],np->margin[MARGIN_TOP]); + SRCRECT(texture->width-np->margin[MARGIN_RIGHT],0,np->margin[MARGIN_RIGHT],np->margin[MARGIN_TOP]); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + //bottom right + DSTRECT(np->rect.pos.x+np->rect.size.x-np->margin[MARGIN_RIGHT],np->rect.pos.y+np->rect.size.y-np->margin[MARGIN_BOTTOM],np->margin[MARGIN_RIGHT],np->margin[MARGIN_BOTTOM]); + SRCRECT(texture->width-np->margin[MARGIN_RIGHT],texture->height-np->margin[MARGIN_BOTTOM],np->margin[MARGIN_RIGHT],np->margin[MARGIN_BOTTOM]); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + //bottom left + DSTRECT(np->rect.pos.x,np->rect.pos.y+np->rect.size.y-np->margin[MARGIN_BOTTOM],np->margin[MARGIN_LEFT],np->margin[MARGIN_BOTTOM]); + SRCRECT(0,texture->height-np->margin[MARGIN_BOTTOM],np->margin[MARGIN_LEFT],np->margin[MARGIN_BOTTOM]); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + + //top + DSTRECT(np->rect.pos.x+np->margin[MARGIN_LEFT],np->rect.pos.y,np->rect.size.width-np->margin[MARGIN_LEFT]-np->margin[MARGIN_RIGHT],np->margin[MARGIN_TOP]); + SRCRECT(np->margin[MARGIN_LEFT],0,texture->width-np->margin[MARGIN_LEFT]-np->margin[MARGIN_RIGHT],np->margin[MARGIN_TOP]); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + //bottom + DSTRECT(np->rect.pos.x+np->margin[MARGIN_LEFT],np->rect.pos.y+np->rect.size.y-np->margin[MARGIN_BOTTOM],np->rect.size.width-np->margin[MARGIN_LEFT]-np->margin[MARGIN_RIGHT],np->margin[MARGIN_TOP]); + SRCRECT(np->margin[MARGIN_LEFT],texture->height-np->margin[MARGIN_BOTTOM],texture->width-np->margin[MARGIN_LEFT]-np->margin[MARGIN_LEFT],np->margin[MARGIN_TOP]); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + + //left + DSTRECT(np->rect.pos.x,np->rect.pos.y+np->margin[MARGIN_TOP],np->margin[MARGIN_LEFT],np->rect.size.height-np->margin[MARGIN_TOP]-np->margin[MARGIN_BOTTOM]); + SRCRECT(0,np->margin[MARGIN_TOP],np->margin[MARGIN_LEFT],texture->height-np->margin[MARGIN_TOP]-np->margin[MARGIN_BOTTOM]); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + //right + DSTRECT(np->rect.pos.x+np->rect.size.width-np->margin[MARGIN_RIGHT],np->rect.pos.y+np->margin[MARGIN_TOP],np->margin[MARGIN_RIGHT],np->rect.size.height-np->margin[MARGIN_TOP]-np->margin[MARGIN_BOTTOM]); + SRCRECT(texture->width-np->margin[MARGIN_RIGHT],np->margin[MARGIN_TOP],np->margin[MARGIN_RIGHT],texture->height-np->margin[MARGIN_TOP]-np->margin[MARGIN_BOTTOM]); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + if (np->draw_center) { + + //center + DSTRECT(np->rect.pos.x+np->margin[MARGIN_LEFT],np->rect.pos.y+np->margin[MARGIN_TOP],np->rect.size.x-np->margin[MARGIN_LEFT]-np->margin[MARGIN_RIGHT],np->rect.size.height-np->margin[MARGIN_TOP]-np->margin[MARGIN_BOTTOM]); + SRCRECT(np->margin[MARGIN_LEFT],np->margin[MARGIN_TOP],texture->width-np->margin[MARGIN_LEFT]-np->margin[MARGIN_RIGHT],texture->height-np->margin[MARGIN_TOP]-np->margin[MARGIN_BOTTOM]); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + } + +#undef SRCRECT +#undef DSTRECT + + storage->frame.canvas_draw_commands++; + } break; + + case Item::Command::TYPE_PRIMITIVE: { + + Item::CommandPrimitive* primitive = static_cast(c); + _set_texture_rect_mode(false); + + ERR_CONTINUE( primitive->points.size()<1); + + _bind_canvas_texture(primitive->texture); + + if (primitive->colors.size()==1 && primitive->points.size()>1) { + + Color c = primitive->colors[0]; + glVertexAttrib4f(VS::ARRAY_COLOR,c.r,c.g,c.b,c.a); + + } else if (primitive->colors.empty()) { + glVertexAttrib4f(VS::ARRAY_COLOR,1,1,1,1); + } + + _draw_gui_primitive(primitive->points.size(),primitive->points.ptr(),primitive->colors.ptr(),primitive->uvs.ptr()); + + } break; + case Item::Command::TYPE_POLYGON: { + + Item::CommandPolygon* polygon = static_cast(c); + _set_texture_rect_mode(false); + _draw_polygon(polygon->count,polygon->indices.ptr(),polygon->points.ptr(),polygon->uvs.ptr(),polygon->colors.ptr(),polygon->texture,polygon->colors.size()==1); + + } break; + case Item::Command::TYPE_CIRCLE: { + + _set_texture_rect_mode(false); + + Item::CommandCircle* circle = static_cast(c); + static const int numpoints=32; + Vector2 points[numpoints+1]; + points[numpoints]=circle->pos; + int indices[numpoints*3]; + + for(int i=0;ipos+Vector2( Math::sin(i*Math_PI*2.0/numpoints),Math::cos(i*Math_PI*2.0/numpoints) )*circle->radius; + indices[i*3+0]=i; + indices[i*3+1]=(i+1)%numpoints; + indices[i*3+2]=numpoints; + } + _draw_polygon(numpoints*3,indices,points,NULL,&circle->color,RID(),true); + //canvas_draw_circle(circle->indices.size(),circle->indices.ptr(),circle->points.ptr(),circle->uvs.ptr(),circle->colors.ptr(),circle->texture,circle->colors.size()==1); + } break; + case Item::Command::TYPE_TRANSFORM: { + + Item::CommandTransform* transform = static_cast(c); + state.extra_matrix=transform->xform; + state.canvas_shader.set_uniform(CanvasShaderGLES3::EXTRA_MATRIX,state.extra_matrix); + + } break; + case Item::Command::TYPE_CLIP_IGNORE: { + + Item::CommandClipIgnore* ci = static_cast(c); + if (current_clip) { + + if (ci->ignore!=reclip) { + if (ci->ignore) { + + glDisable(GL_SCISSOR_TEST); + reclip=true; + } else { + + glEnable(GL_SCISSOR_TEST); + //glScissor(viewport.x+current_clip->final_clip_rect.pos.x,viewport.y+ (viewport.height-(current_clip->final_clip_rect.pos.y+current_clip->final_clip_rect.size.height)), + //current_clip->final_clip_rect.size.width,current_clip->final_clip_rect.size.height); + + int x = current_clip->final_clip_rect.pos.x; + int y = storage->frame.current_rt->height - ( current_clip->final_clip_rect.pos.y + current_clip->final_clip_rect.size.y ); + int w = current_clip->final_clip_rect.size.x; + int h = current_clip->final_clip_rect.size.y; + + glScissor(x,y,w,h); + + reclip=false; + } + } + } + + + + } break; + } + } +} + +#if 0 +void RasterizerGLES2::_canvas_item_setup_shader_params(CanvasItemMaterial *material,Shader* shader) { + + if (canvas_shader.bind()) + rebind_texpixel_size=true; + + if (material->shader_version!=shader->version) { + //todo optimize uniforms + material->shader_version=shader->version; + } + + if (shader->has_texscreen && framebuffer.active) { + + int x = viewport.x; + int y = window_size.height-(viewport.height+viewport.y); + + canvas_shader.set_uniform(CanvasShaderGLES2::TEXSCREEN_SCREEN_MULT,Vector2(float(viewport.width)/framebuffer.width,float(viewport.height)/framebuffer.height)); + canvas_shader.set_uniform(CanvasShaderGLES2::TEXSCREEN_SCREEN_CLAMP,Color(float(x)/framebuffer.width,float(y)/framebuffer.height,float(x+viewport.width)/framebuffer.width,float(y+viewport.height)/framebuffer.height)); + canvas_shader.set_uniform(CanvasShaderGLES2::TEXSCREEN_TEX,max_texture_units-1); + glActiveTexture(GL_TEXTURE0+max_texture_units-1); + glBindTexture(GL_TEXTURE_2D,framebuffer.sample_color); + if (framebuffer.scale==1 && !canvas_texscreen_used) { +#ifdef GLEW_ENABLED + if (current_rt) { + glReadBuffer(GL_COLOR_ATTACHMENT0); + } else { + glReadBuffer(GL_BACK); + } +#endif + if (current_rt) { + glCopyTexSubImage2D(GL_TEXTURE_2D,0,viewport.x,viewport.y,viewport.x,viewport.y,viewport.width,viewport.height); + canvas_shader.set_uniform(CanvasShaderGLES2::TEXSCREEN_SCREEN_CLAMP,Color(float(x)/framebuffer.width,float(viewport.y)/framebuffer.height,float(x+viewport.width)/framebuffer.width,float(y+viewport.height)/framebuffer.height)); + //window_size.height-(viewport.height+viewport.y) + } else { + glCopyTexSubImage2D(GL_TEXTURE_2D,0,x,y,x,y,viewport.width,viewport.height); + } +// if (current_clip) { +// // print_line(" a clip "); +// } + + canvas_texscreen_used=true; + } + + glActiveTexture(GL_TEXTURE0); + + } + + if (shader->has_screen_uv) { + canvas_shader.set_uniform(CanvasShaderGLES2::SCREEN_UV_MULT,Vector2(1.0/viewport.width,1.0/viewport.height)); + } + + + uses_texpixel_size=shader->uses_texpixel_size; + +} + +#endif + +void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const Color& p_modulate,Light *p_light) { + + + if (storage->frame.clear_request) { + // a clear request may be pending, so do it + glClearColor( storage->frame.clear_request_color.r, storage->frame.clear_request_color.g, storage->frame.clear_request_color.b, storage->frame.clear_request_color.a ); + glClear(GL_COLOR_BUFFER_BIT); + storage->frame.clear_request=false; + + + } + + Item *current_clip=NULL; + RasterizerStorageGLES3::Shader *shader_cache=NULL; + + bool rebind_shader=true; + + Size2 rt_size = Size2(storage->frame.current_rt->width,storage->frame.current_rt->height); + + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_DISTANCE_FIELD,false); + + glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_item_ubo); + glBufferData(GL_UNIFORM_BUFFER, sizeof(CanvasItemUBO), &state.canvas_item_ubo_data, GL_DYNAMIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + state.current_tex=RID(); + state.current_tex_ptr=NULL; + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + + + RID canvas_last_material; + + bool prev_distance_field=false; + + while(p_item_list) { + + Item *ci=p_item_list; + + + if (prev_distance_field!=ci->distance_field) { + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_DISTANCE_FIELD,ci->distance_field); + prev_distance_field=ci->distance_field; + rebind_shader=true; + } + + + if (current_clip!=ci->final_clip_owner) { + + current_clip=ci->final_clip_owner; + + //setup clip + if (current_clip) { + + glEnable(GL_SCISSOR_TEST); + glScissor(current_clip->final_clip_rect.pos.x,(rt_size.height-(current_clip->final_clip_rect.pos.y+current_clip->final_clip_rect.size.height)),current_clip->final_clip_rect.size.width,current_clip->final_clip_rect.size.height); + + + } else { + + glDisable(GL_SCISSOR_TEST); + } + } +#if 0 + if (ci->copy_back_buffer && framebuffer.active && framebuffer.scale==1) { + + Rect2 rect; + int x,y; + + if (ci->copy_back_buffer->full) { + + x = viewport.x; + y = window_size.height-(viewport.height+viewport.y); + } else { + x = viewport.x+ci->copy_back_buffer->screen_rect.pos.x; + y = window_size.height-(viewport.y+ci->copy_back_buffer->screen_rect.pos.y+ci->copy_back_buffer->screen_rect.size.y); + } + glActiveTexture(GL_TEXTURE0+max_texture_units-1); + glBindTexture(GL_TEXTURE_2D,framebuffer.sample_color); + +#ifdef GLEW_ENABLED + if (current_rt) { + glReadBuffer(GL_COLOR_ATTACHMENT0); + } else { + glReadBuffer(GL_BACK); + } +#endif + if (current_rt) { + glCopyTexSubImage2D(GL_TEXTURE_2D,0,viewport.x,viewport.y,viewport.x,viewport.y,viewport.width,viewport.height); + //window_size.height-(viewport.height+viewport.y) + } else { + glCopyTexSubImage2D(GL_TEXTURE_2D,0,x,y,x,y,viewport.width,viewport.height); + } + + canvas_texscreen_used=true; + glActiveTexture(GL_TEXTURE0); + + } + +#endif + + + //begin rect + Item *material_owner = ci->material_owner?ci->material_owner:ci; + + RID material = material_owner->material; + + if (material!=canvas_last_material || rebind_shader) { +#if 0 + Shader *shader = NULL; + if (material && material->shader.is_valid()) { + shader = shader_owner.get(material->shader); + if (shader && !shader->valid) { + shader=NULL; + } + } + + shader_cache=shader; + + if (shader) { + canvas_shader.set_custom_shader(shader->custom_code_id); + _canvas_item_setup_shader_params(material,shader); + } else { + shader_cache=NULL; + canvas_shader.set_custom_shader(0); + canvas_shader.bind(); + uses_texpixel_size=false; + + } + + + canvas_shader.set_uniform(CanvasShaderGLES3::PROJECTION_MATRIX,canvas_transform); + if (canvas_use_modulate) + reset_modulate=true; + canvas_last_material=material; + rebind_shader=false; +#endif + } + + if (material.is_valid() && shader_cache) { +#if 0 + _canvas_item_setup_shader_uniforms(material,shader_cache); +#endif + } + + bool unshaded = false; //(material && material->shading_mode==VS::CANVAS_ITEM_SHADING_UNSHADED) || ci->blend_mode!=VS::MATERIAL_BLEND_MODE_MIX; + bool reclip=false; +#if 0 + if (ci==p_item_list || ci->blend_mode!=canvas_blend_mode) { + + switch(ci->blend_mode) { + + case VS::MATERIAL_BLEND_MODE_MIX: { + glBlendEquation(GL_FUNC_ADD); + if (current_rt && current_rt_transparent) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } + else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + + } break; + case VS::MATERIAL_BLEND_MODE_ADD: { + + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(GL_SRC_ALPHA,GL_ONE); + + } break; + case VS::MATERIAL_BLEND_MODE_SUB: { + + glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); + glBlendFunc(GL_SRC_ALPHA,GL_ONE); + } break; + case VS::MATERIAL_BLEND_MODE_MUL: { + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(GL_DST_COLOR,GL_ZERO); + } break; + case VS::MATERIAL_BLEND_MODE_PREMULT_ALPHA: { + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(GL_ONE,GL_ONE_MINUS_SRC_ALPHA); + } break; + + } + + canvas_blend_mode=ci->blend_mode; + } +#endif + +// canvas_shader.set_uniform(CanvasShaderGLES3::CANVAS_MODULATE,unshaded ? Color(1,1,1,1) : p_modulate); + + state.canvas_item_modulate = unshaded ? ci->final_modulate : Color( + ci->final_modulate.r * p_modulate.r, + ci->final_modulate.g * p_modulate.g, + ci->final_modulate.b * p_modulate.b, + ci->final_modulate.a * p_modulate.a ); + + state.final_transform = ci->final_transform; + state.extra_matrix=Matrix32(); + + state.canvas_shader.set_uniform(CanvasShaderGLES3::FINAL_MODULATE,state.canvas_item_modulate); + state.canvas_shader.set_uniform(CanvasShaderGLES3::MODELVIEW_MATRIX,state.final_transform); + state.canvas_shader.set_uniform(CanvasShaderGLES3::EXTRA_MATRIX,state.extra_matrix); + + + if (unshaded || (state.canvas_item_modulate.a>0.001 && (!material.is_valid() /*|| material->shading_mode!=VS::CANVAS_ITEM_SHADING_ONLY_LIGHT*/) && !ci->light_masked )) + _canvas_item_render_commands(ci,current_clip,reclip); + + if (/*canvas_blend_mode==VS::MATERIAL_BLEND_MODE_MIX &&*/ p_light && !unshaded) { + + Light *light = p_light; + bool light_used=false; + VS::CanvasLightMode mode=VS::CANVAS_LIGHT_MODE_ADD; + state.canvas_item_modulate=ci->final_modulate; // remove the canvas modulate + + + while(light) { + + + if (ci->light_mask&light->item_mask && p_z>=light->z_min && p_z<=light->z_max && ci->global_rect_cache.intersects_transformed(light->xform_cache,light->rect_cache)) { + + //intersects this light + + if (!light_used || mode!=light->mode) { + + mode=light->mode; + + switch(mode) { + + case VS::CANVAS_LIGHT_MODE_ADD: { + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(GL_SRC_ALPHA,GL_ONE); + + } break; + case VS::CANVAS_LIGHT_MODE_SUB: { + glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); + glBlendFunc(GL_SRC_ALPHA,GL_ONE); + } break; + case VS::CANVAS_LIGHT_MODE_MIX: + case VS::CANVAS_LIGHT_MODE_MASK: { + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + + } break; + } + + } + + if (!light_used) { + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_LIGHTING,true); + light_used=true; + + } + + + bool has_shadow = light->shadow_buffer.is_valid() && ci->light_mask&light->item_shadow_mask; + + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_SHADOWS,has_shadow); + if (has_shadow) { + state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_USE_GRADIENT,light->shadow_gradient_length>0); + switch(light->shadow_filter) { + + case VS::CANVAS_LIGHT_FILTER_NONE: state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_NEAREST,true); break; + case VS::CANVAS_LIGHT_FILTER_PCF3: state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF3,true); break; + case VS::CANVAS_LIGHT_FILTER_PCF5: state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF5,true); break; + case VS::CANVAS_LIGHT_FILTER_PCF9: state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF9,true); break; + case VS::CANVAS_LIGHT_FILTER_PCF13: state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF13,true); break; + } + + + } + + bool light_rebind = state.canvas_shader.bind(); + + if (light_rebind) { +#if 0 + if (material && shader_cache) { + _canvas_item_setup_shader_params(material,shader_cache); + _canvas_item_setup_shader_uniforms(material,shader_cache); + } +#endif + state.canvas_shader.set_uniform(CanvasShaderGLES3::FINAL_MODULATE,state.canvas_item_modulate); + state.canvas_shader.set_uniform(CanvasShaderGLES3::MODELVIEW_MATRIX,state.final_transform); + state.canvas_shader.set_uniform(CanvasShaderGLES3::EXTRA_MATRIX,Matrix32()); + + } + + glBindBufferBase(GL_UNIFORM_BUFFER,1,static_cast(light->light_internal.get_data())->ubo); + + if (has_shadow) { + + RasterizerStorageGLES3::CanvasLightShadow *cls = storage->canvas_light_shadow_owner.get(light->shadow_buffer); + glActiveTexture(GL_TEXTURE0+storage->config.max_texture_image_units-2); + glBindTexture(GL_TEXTURE_2D,cls->distance); + + /*canvas_shader.set_uniform(CanvasShaderGLES3::SHADOW_MATRIX,light->shadow_matrix_cache); + canvas_shader.set_uniform(CanvasShaderGLES3::SHADOW_ESM_MULTIPLIER,light->shadow_esm_mult); + canvas_shader.set_uniform(CanvasShaderGLES3::LIGHT_SHADOW_COLOR,light->shadow_color);*/ + + } + + glActiveTexture(GL_TEXTURE0+storage->config.max_texture_image_units-1); + RasterizerStorageGLES3::Texture *t = storage->texture_owner.getornull(light->texture); + if (!t) { + glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + } else { + + glBindTexture(t->target,t->tex_id); + } + + glActiveTexture(GL_TEXTURE0); + _canvas_item_render_commands(ci,current_clip,reclip); //redraw using light + + } + + light=light->next_ptr; + } + + if (light_used) { + + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_LIGHTING,false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_SHADOWS,false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_NEAREST,false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF3,false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF5,false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF9,false); + state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF13,false); + + + state.canvas_shader.bind(); +#if 0 + if (material && shader_cache) { + _canvas_item_setup_shader_params(material,shader_cache); + _canvas_item_setup_shader_uniforms(material,shader_cache); + } +#endif + + state.canvas_item_modulate = unshaded ? ci->final_modulate : Color( + ci->final_modulate.r * p_modulate.r, + ci->final_modulate.g * p_modulate.g, + ci->final_modulate.b * p_modulate.b, + ci->final_modulate.a * p_modulate.a ); + + + state.canvas_shader.set_uniform(CanvasShaderGLES3::MODELVIEW_MATRIX,state.final_transform); + state.canvas_shader.set_uniform(CanvasShaderGLES3::EXTRA_MATRIX,Matrix32()); + state.canvas_shader.set_uniform(CanvasShaderGLES3::FINAL_MODULATE,state.canvas_item_modulate); + + + glBlendEquation(GL_FUNC_ADD); + + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + + //@TODO RESET canvas_blend_mode + } + + + } + + if (reclip) { + + glEnable(GL_SCISSOR_TEST); + glScissor(current_clip->final_clip_rect.pos.x,(rt_size.height-(current_clip->final_clip_rect.pos.y+current_clip->final_clip_rect.size.height)),current_clip->final_clip_rect.size.width,current_clip->final_clip_rect.size.height); + + + } + + + + p_item_list=p_item_list->next; + } + + if (current_clip) { + glDisable(GL_SCISSOR_TEST); + } + +} + +void RasterizerCanvasGLES3::canvas_debug_viewport_shadows(Light* p_lights_with_shadow){ + + Light* light=p_lights_with_shadow; + + canvas_begin(); //reset + glVertexAttrib4f(VS::ARRAY_COLOR,1,1,1,1); + int h = 10; + int w = storage->frame.current_rt->width; + int ofs = h; + glDisable(GL_BLEND); + + //print_line(" debug lights "); + while(light) { + + + // print_line("debug light"); + if (light->shadow_buffer.is_valid()) { + + // print_line("sb is valid"); + RasterizerStorageGLES3::CanvasLightShadow * sb = storage->canvas_light_shadow_owner.get(light->shadow_buffer); + if (sb) { + glBindTexture(GL_TEXTURE_2D,sb->distance); + //glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + draw_generic_textured_rect(Rect2(h,ofs,w-h*2,h),Rect2(0,0,1,1)); + ofs+=h*2; + + } + } + + light=light->shadows_next_ptr; + } +} + + +void RasterizerCanvasGLES3::canvas_light_shadow_buffer_update(RID p_buffer, const Matrix32& p_light_xform, int p_light_mask,float p_near, float p_far, LightOccluderInstance* p_occluders, CameraMatrix *p_xform_cache) { + + RasterizerStorageGLES3::CanvasLightShadow *cls = storage->canvas_light_shadow_owner.get(p_buffer); + ERR_FAIL_COND(!cls); + + + glDisable(GL_BLEND); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_DITHER); + glDisable(GL_CULL_FACE); + glDepthFunc(GL_LEQUAL); + glEnable(GL_DEPTH_TEST); + glDepthMask(true); + + glBindFramebuffer(GL_FRAMEBUFFER, cls->fbo); + + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + state.canvas_shadow_shader.bind(); + + glViewport(0, 0, cls->size,cls->height); + glClearDepth(1.0f); + glClearColor(1,1,1,1); + glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); + + VS::CanvasOccluderPolygonCullMode cull=VS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED; + + + for(int i=0;i<4;i++) { + + //make sure it remains orthogonal, makes easy to read angle later + + Transform light; + light.origin[0]=p_light_xform[2][0]; + light.origin[1]=p_light_xform[2][1]; + light.basis[0][0]=p_light_xform[0][0]; + light.basis[0][1]=p_light_xform[1][0]; + light.basis[1][0]=p_light_xform[0][1]; + light.basis[1][1]=p_light_xform[1][1]; + + //light.basis.scale(Vector3(to_light.elements[0].length(),to_light.elements[1].length(),1)); + + /// p_near=1; + CameraMatrix projection; + { + real_t fov = 90; + real_t near = p_near; + real_t far = p_far; + real_t aspect = 1.0; + + real_t ymax = near * Math::tan( Math::deg2rad( fov * 0.5 ) ); + real_t ymin = - ymax; + real_t xmin = ymin * aspect; + real_t xmax = ymax * aspect; + + projection.set_frustum( xmin, xmax, ymin, ymax, near, far ); + } + + Vector3 cam_target=Matrix3(Vector3(0,0,Math_PI*2*(i/4.0))).xform(Vector3(0,1,0)); + projection = projection * CameraMatrix(Transform().looking_at(cam_target,Vector3(0,0,-1)).affine_inverse()); + + state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES3::PROJECTION_MATRIX,projection); + state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES3::LIGHT_MATRIX,light); + state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES3::DISTANCE_NORM,1.0/p_far); + + + if (i==0) + *p_xform_cache=projection; + + glViewport(0, (cls->height/4)*i, cls->size,cls->height/4); + + LightOccluderInstance *instance=p_occluders; + + while(instance) { + + RasterizerStorageGLES3::CanvasOccluder *cc = storage->canvas_occluder_owner.get(instance->polygon_buffer); + if (!cc || cc->len==0 || !(p_light_mask&instance->light_mask)) { + + instance=instance->next; + continue; + } + + state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES3::WORLD_MATRIX,instance->xform_cache); + if (cull!=instance->cull_cache) { + + cull=instance->cull_cache; + switch(cull) { + case VS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED: { + + glDisable(GL_CULL_FACE); + + } break; + case VS::CANVAS_OCCLUDER_POLYGON_CULL_CLOCKWISE: { + + glEnable(GL_CULL_FACE); + glCullFace(GL_FRONT); + } break; + case VS::CANVAS_OCCLUDER_POLYGON_CULL_COUNTER_CLOCKWISE: { + + glEnable(GL_CULL_FACE); + glCullFace(GL_BACK); + + } break; + } + } +/* + if (i==0) { + for(int i=0;ilines.size();i++) { + Vector2 p = instance->xform_cache.xform(cc->lines.get(i)); + Plane pp(Vector3(p.x,p.y,0),1); + pp.normal = light.xform(pp.normal); + pp = projection.xform4(pp); + print_line(itos(i)+": "+pp.normal/pp.d); + //pp=light_mat.xform4(pp); + //print_line(itos(i)+": "+pp.normal/pp.d); + } + } +*/ + glBindBuffer(GL_ARRAY_BUFFER,cc->vertex_id); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,cc->index_id); + glVertexAttribPointer(VS::ARRAY_VERTEX, 3, GL_FLOAT, false, 0, 0); + glDrawElements(GL_TRIANGLES,cc->len*3,GL_UNSIGNED_SHORT,0); + + + instance=instance->next; + } + + + } + + glDisableVertexAttribArray(VS::ARRAY_VERTEX); + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,0); +} +void RasterizerCanvasGLES3::reset_canvas() { + + + if (storage->frame.current_rt) { + glBindFramebuffer(GL_FRAMEBUFFER, storage->frame.current_rt->front.fbo); + glColorMask(1,1,1,1); //don't touch alpha + } + + + glBindVertexArray(0); + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_SCISSOR_TEST); +#ifdef GLEW_ENABLED + glDisable(GL_POINT_SPRITE); + glDisable(GL_VERTEX_PROGRAM_POINT_SIZE); +#endif + glEnable(GL_BLEND); + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } + else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + //glPolygonMode(GL_FRONT_AND_BACK,GL_FILL); + glLineWidth(1.0); + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,0); + for(int i=0;iresources.white_tex ); + + + glVertexAttrib4f(VS::ARRAY_COLOR,1,1,1,1); + + Transform canvas_transform; + + if (storage->frame.current_rt) { + + float csy = 1.0; + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) { + csy = -1.0; + } + canvas_transform.translate(-(storage->frame.current_rt->width / 2.0f), -(storage->frame.current_rt->height / 2.0f), 0.0f); + canvas_transform.scale( Vector3( 2.0f / storage->frame.current_rt->width, csy * -2.0f / storage->frame.current_rt->height, 1.0f ) ); + } else { + Vector2 ssize = OS::get_singleton()->get_window_size(); + canvas_transform.translate(-(ssize.width / 2.0f), -(ssize.height / 2.0f), 0.0f); + canvas_transform.scale( Vector3( 2.0f / ssize.width, -2.0f / ssize.height, 1.0f ) ); + + } + + state.vp=canvas_transform; + + store_transform(canvas_transform,state.canvas_item_ubo_data.projection_matrix); + + glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_item_ubo); + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(CanvasItemUBO), &state.canvas_item_ubo_data); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + + state.canvas_texscreen_used=false; + + +} + + +void RasterizerCanvasGLES3::draw_generic_textured_rect(const Rect2& p_rect, const Rect2& p_src) { + + + glVertexAttrib4f(1,p_rect.pos.x,p_rect.pos.y,p_rect.size.x,p_rect.size.y); + glVertexAttrib4f(2,p_src.pos.x,p_src.pos.y,p_src.size.x,p_src.size.y); + glDrawArrays(GL_TRIANGLE_FAN,0,4); +} + +void RasterizerCanvasGLES3::initialize() { + + + { + //quad buffers + + glGenBuffers(1,&data.canvas_quad_vertices); + glBindBuffer(GL_ARRAY_BUFFER,data.canvas_quad_vertices); + { + const float qv[8]={ + 0,0, + 0,1, + 1,1, + 1,0 + }; + + glBufferData(GL_ARRAY_BUFFER,sizeof(float)*8,qv,GL_STATIC_DRAW); + } + + glBindBuffer(GL_ARRAY_BUFFER,0); //unbind + + + glGenVertexArrays(1,&data.canvas_quad_array); + glBindVertexArray(data.canvas_quad_array); + glBindBuffer(GL_ARRAY_BUFFER,data.canvas_quad_vertices); + glVertexAttribPointer(0,2,GL_FLOAT,GL_FALSE,sizeof(float)*2,0); + glEnableVertexAttribArray(0); + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER,0); //unbind + } + + + store_transform(Transform(),state.canvas_item_ubo_data.projection_matrix); + + + + glGenBuffers(1, &state.canvas_item_ubo); + glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_item_ubo); + glBufferData(GL_UNIFORM_BUFFER, sizeof(CanvasItemUBO), &state.canvas_item_ubo_data, GL_DYNAMIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + state.canvas_shader.init(); + state.canvas_shadow_shader.init(); + + state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_RGBA_SHADOWS,storage->config.use_rgba_2d_shadows); + state.canvas_shadow_shader.set_conditional(CanvasShadowShaderGLES3::USE_RGBA_SHADOWS,storage->config.use_rgba_2d_shadows); + + +} + + +void RasterizerCanvasGLES3::finalize() { + + glDeleteBuffers(1,&data.canvas_quad_vertices); + glDeleteVertexArrays(1,&data.canvas_quad_array); +} + +RasterizerCanvasGLES3::RasterizerCanvasGLES3() +{ + +} diff --git a/drivers/gles3/rasterizer_canvas_gles3.h b/drivers/gles3/rasterizer_canvas_gles3.h new file mode 100644 index 00000000000..1f72a8dbcfb --- /dev/null +++ b/drivers/gles3/rasterizer_canvas_gles3.h @@ -0,0 +1,102 @@ +#ifndef RASTERIZERCANVASGLES3_H +#define RASTERIZERCANVASGLES3_H + +#include "servers/visual/rasterizer.h" +#include "rasterizer_storage_gles3.h" +#include "shaders/canvas_shadow.glsl.h" + +class RasterizerCanvasGLES3 : public RasterizerCanvas { +public: + + struct CanvasItemUBO { + + float projection_matrix[16]; + + }; + + struct Data { + + GLuint canvas_quad_vertices; + GLuint canvas_quad_array; + + } data; + + struct State { + CanvasItemUBO canvas_item_ubo_data; + GLuint canvas_item_ubo; + bool canvas_texscreen_used; + CanvasShaderGLES3 canvas_shader; + CanvasShadowShaderGLES3 canvas_shadow_shader; + + bool using_texture_rect; + + + RID current_tex; + RasterizerStorageGLES3::Texture *current_tex_ptr; + + Transform vp; + + Color canvas_item_modulate; + Matrix32 extra_matrix; + Matrix32 final_transform; + + } state; + + RasterizerStorageGLES3 *storage; + + struct LightInternal : public RID_Data { + + struct UBOData { + + float light_matrix[16]; + float local_matrix[16]; + float shadow_matrix[16]; + float color[4]; + float shadow_color[4]; + float light_pos[2]; + float shadowpixel_size; + float shadow_gradient; + float light_height; + float light_outside_alpha; + float shadow_distance_mult; + } ubo_data; + + GLuint ubo; + }; + + RID_Owner light_internal_owner; + + virtual RID light_internal_create(); + virtual void light_internal_update(RID p_rid, Light* p_light); + virtual void light_internal_free(RID p_rid); + + + virtual void canvas_begin(); + virtual void canvas_end(); + + _FORCE_INLINE_ void _set_texture_rect_mode(bool p_enable); + _FORCE_INLINE_ RasterizerStorageGLES3::Texture* _bind_canvas_texture(const RID& p_texture); + + _FORCE_INLINE_ void _draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color* p_colors, const Vector2 *p_uvs); + _FORCE_INLINE_ void _draw_polygon(int p_vertex_count, const int* p_indices, const Vector2* p_vertices, const Vector2* p_uvs, const Color* p_colors,const RID& p_texture,bool p_singlecolor); + _FORCE_INLINE_ void _canvas_item_render_commands(Item *p_item,Item *current_clip,bool &reclip); + + + virtual void canvas_render_items(Item *p_item_list,int p_z,const Color& p_modulate,Light *p_light); + virtual void canvas_debug_viewport_shadows(Light* p_lights_with_shadow); + + virtual void canvas_light_shadow_buffer_update(RID p_buffer, const Matrix32& p_light_xform, int p_light_mask,float p_near, float p_far, LightOccluderInstance* p_occluders, CameraMatrix *p_xform_cache); + + + virtual void reset_canvas(); + + void draw_generic_textured_rect(const Rect2& p_rect, const Rect2& p_src); + + + void initialize(); + void finalize(); + + RasterizerCanvasGLES3(); +}; + +#endif // RASTERIZERCANVASGLES3_H diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp new file mode 100644 index 00000000000..ba83a572e52 --- /dev/null +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -0,0 +1,266 @@ +#include "rasterizer_gles3.h" +#include "os/os.h" +#include "globals.h" +#include "gl_context/context_gl.h" +#include +RasterizerStorage *RasterizerGLES3::get_storage() { + + return storage; +} + +RasterizerCanvas *RasterizerGLES3::get_canvas() { + + return canvas; +} + +RasterizerScene *RasterizerGLES3::get_scene() { + + return NULL; +} + + +static void _gl_debug_print(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const GLvoid *userParam) +{ + + if (type==GL_DEBUG_TYPE_OTHER_ARB) + return; + + print_line("mesege"); + char debSource[256], debType[256], debSev[256]; + if(source == GL_DEBUG_SOURCE_API_ARB) + strcpy(debSource, "OpenGL"); + else if(source == GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB) + strcpy(debSource, "Windows"); + else if(source == GL_DEBUG_SOURCE_SHADER_COMPILER_ARB) + strcpy(debSource, "Shader Compiler"); + else if(source == GL_DEBUG_SOURCE_THIRD_PARTY_ARB) + strcpy(debSource, "Third Party"); + else if(source == GL_DEBUG_SOURCE_APPLICATION_ARB) + strcpy(debSource, "Application"); + else if(source == GL_DEBUG_SOURCE_OTHER_ARB) + strcpy(debSource, "Other"); + + if(type == GL_DEBUG_TYPE_ERROR_ARB) + strcpy(debType, "Error"); + else if(type == GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB) + strcpy(debType, "Deprecated behavior"); + else if(type == GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB) + strcpy(debType, "Undefined behavior"); + else if(type == GL_DEBUG_TYPE_PORTABILITY_ARB) + strcpy(debType, "Portability"); + else if(type == GL_DEBUG_TYPE_PERFORMANCE_ARB) + strcpy(debType, "Performance"); + else if(type == GL_DEBUG_TYPE_OTHER_ARB) + strcpy(debType, "Other"); + + if(severity == GL_DEBUG_SEVERITY_HIGH_ARB) + strcpy(debSev, "High"); + else if(severity == GL_DEBUG_SEVERITY_MEDIUM_ARB) + strcpy(debSev, "Medium"); + else if(severity == GL_DEBUG_SEVERITY_LOW_ARB) + strcpy(debSev, "Low"); + + String output = String()+ "GL ERROR: Source: " + debSource + "\tType: " + debType + "\tID: " + itos(id) + "\tSeverity: " + debSev + "\tMessage: " + message; + + ERR_PRINTS(output); + +} + + +void RasterizerGLES3::initialize() { + + if (OS::get_singleton()->is_stdout_verbose()) { + print_line("Using GLES3 video driver"); + } + +#ifdef GLEW_ENABLED + GLuint res = glewInit(); + ERR_FAIL_COND(res!=GLEW_OK); + if (OS::get_singleton()->is_stdout_verbose()) { + print_line(String("GLES2: Using GLEW ") + (const char*) glewGetString(GLEW_VERSION)); + } + + // Check for GL 2.1 compatibility, if not bail out + if (!glewIsSupported("GL_VERSION_3_0")) { + ERR_PRINT("Your system's graphic drivers seem not to support OpenGL 3.0+ / GLES 3.0, sorry :(\n" + "Try a drivers update, buy a new GPU or try software rendering on Linux; Godot will now crash with a segmentation fault."); + OS::get_singleton()->alert("Your system's graphic drivers seem not to support OpenGL 3.0+ / GLES 3.0, sorry :(\n" + "Godot Engine will self-destruct as soon as you acknowledge this error message.", + "Fatal error: Insufficient OpenGL / GLES drivers"); + // TODO: If it's even possible, we should stop the execution without segfault and memory leaks :) + } +#endif + + glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB); + glDebugMessageCallbackARB(_gl_debug_print, NULL); + glEnable(GL_DEBUG_OUTPUT); + + +/* glDebugMessageControlARB(GL_DEBUG_SOURCE_API_ARB,GL_DEBUG_TYPE_ERROR_ARB,GL_DEBUG_SEVERITY_HIGH_ARB,0,NULL,GL_TRUE); + glDebugMessageControlARB(GL_DEBUG_SOURCE_API_ARB,GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB,GL_DEBUG_SEVERITY_HIGH_ARB,0,NULL,GL_TRUE); + glDebugMessageControlARB(GL_DEBUG_SOURCE_API_ARB,GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB,GL_DEBUG_SEVERITY_HIGH_ARB,0,NULL,GL_TRUE); + glDebugMessageControlARB(GL_DEBUG_SOURCE_API_ARB,GL_DEBUG_TYPE_PORTABILITY_ARB,GL_DEBUG_SEVERITY_HIGH_ARB,0,NULL,GL_TRUE); + glDebugMessageControlARB(GL_DEBUG_SOURCE_API_ARB,GL_DEBUG_TYPE_PERFORMANCE_ARB,GL_DEBUG_SEVERITY_HIGH_ARB,0,NULL,GL_TRUE); + glDebugMessageControlARB(GL_DEBUG_SOURCE_API_ARB,GL_DEBUG_TYPE_OTHER_ARB,GL_DEBUG_SEVERITY_HIGH_ARB,0,NULL,GL_TRUE); + glDebugMessageInsertARB( + + GL_DEBUG_SOURCE_API_ARB, + GL_DEBUG_TYPE_OTHER_ARB, 1, + GL_DEBUG_SEVERITY_HIGH_ARB,5, "hello"); + +*/ + storage->initialize(); + canvas->initialize(); +} + +void RasterizerGLES3::begin_frame(){ + + +} +void RasterizerGLES3::set_current_render_target(RID p_render_target){ + + if (!p_render_target.is_valid() && storage->frame.current_rt && storage->frame.clear_request) { + //handle pending clear request, if the framebuffer was not cleared + glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->front.fbo); + glClearColor( + storage->frame.clear_request_color.r, + storage->frame.clear_request_color.g, + storage->frame.clear_request_color.b, + storage->frame.clear_request_color.a ); + + glClear(GL_COLOR_BUFFER_BIT); + + } + + if (p_render_target.is_valid()) { + RasterizerStorageGLES3::RenderTarget * rt = storage->render_target_owner.getornull(p_render_target); + if (!rt) { + storage->frame.current_rt=NULL; + } + ERR_FAIL_COND(!rt); + storage->frame.current_rt=rt; + storage->frame.clear_request=false; + + glViewport(0,0,rt->width,rt->height); + + } else { + storage->frame.current_rt=NULL; + storage->frame.clear_request=false; + glViewport(0,0,OS::get_singleton()->get_window_size().width,OS::get_singleton()->get_window_size().height); + glBindFramebuffer(GL_FRAMEBUFFER,storage->config.system_fbo); + } +} + +void RasterizerGLES3::restore_render_target() { + + ERR_FAIL_COND(storage->frame.current_rt==NULL); + RasterizerStorageGLES3::RenderTarget * rt = storage->frame.current_rt; + glViewport(0,0,rt->width,rt->height); + +} + +void RasterizerGLES3::clear_render_target(const Color& p_color) { + + ERR_FAIL_COND(!storage->frame.current_rt); + + storage->frame.clear_request=true; + storage->frame.clear_request_color=p_color; + +} + +void RasterizerGLES3::blit_render_target_to_screen(RID p_render_target,const Rect2& p_screen_rect,int p_screen){ + + ERR_FAIL_COND( storage->frame.current_rt ); + + RasterizerStorageGLES3::RenderTarget *rt = storage->render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + + canvas->canvas_begin(); + glBindFramebuffer(GL_FRAMEBUFFER,storage->config.system_fbo); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D,rt->front.color); + canvas->draw_generic_textured_rect(p_screen_rect,Rect2(0,0,1,-1)); + glBindTexture(GL_TEXTURE_2D,0); + canvas->canvas_end(); +} + +void RasterizerGLES3::end_frame(){ + +#if 0 + canvas->canvas_begin(); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + glDisable(GL_BLEND); + glDisable(GL_DEPTH_TEST); + glDisable(GL_CULL_FACE); + + + float vtx[8]={0,0, + 0,1, + 1,1, + 1,0 + }; + + glBindBuffer(GL_ARRAY_BUFFER,0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,0); + + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glVertexAttribPointer( VS::ARRAY_VERTEX, 2 ,GL_FLOAT, false, 0, vtx ); + + +// glBindBuffer(GL_ARRAY_BUFFER,canvas->data.canvas_quad_vertices); +// glEnableVertexAttribArray(VS::ARRAY_VERTEX); +// glVertexAttribPointer( VS::ARRAY_VERTEX, 2 ,GL_FLOAT, false, 0, 0 ); + + glBindVertexArray(canvas->data.canvas_quad_array); + + canvas->draw_generic_textured_rect(Rect2(0,0,15,15),Rect2(0,0,1,1)); +#endif + if (ContextGL::get_singleton()) + ContextGL::get_singleton()->swap_buffers(); +} + +void RasterizerGLES3::finalize(){ + + storage->finalize(); + canvas->finalize(); +} + + +Rasterizer *RasterizerGLES3::_create_current() { + + return memnew( RasterizerGLES3 ); +} + +void RasterizerGLES3::make_current() { + _create_func=_create_current; +} + + +void RasterizerGLES3::register_config() { + + GLOBAL_DEF("rendering/gles3/framebuffer_format",RasterizerStorageGLES3::FBO_FORMAT_FLOAT); + Globals::get_singleton()->set_custom_property_info("rendering/gles3/framebuffer_format",PropertyInfo(Variant::INT,"",PROPERTY_HINT_ENUM,"16 Bits,32 Bits,Half Float")); + GLOBAL_DEF("rendering/gles3/lighting_technique",1); + Globals::get_singleton()->set_custom_property_info("rendering/gles3/lighting_technique",PropertyInfo(Variant::INT,"",PROPERTY_HINT_ENUM,"Forward,Deferred")); + GLOBAL_DEF("rendering/gles3/use_nearest_mipmap_filter",false); + GLOBAL_DEF("rendering/gles3/anisotropic_filter_level",4.0); + + +} + +RasterizerGLES3::RasterizerGLES3() +{ + + storage = memnew( RasterizerStorageGLES3 ); + canvas = memnew( RasterizerCanvasGLES3 ); + canvas->storage=storage; + + +} + +RasterizerGLES3::~RasterizerGLES3() { + + memdelete(storage); + memdelete(canvas); +} diff --git a/drivers/gles3/rasterizer_gles3.h b/drivers/gles3/rasterizer_gles3.h new file mode 100644 index 00000000000..d461664ea22 --- /dev/null +++ b/drivers/gles3/rasterizer_gles3.h @@ -0,0 +1,38 @@ +#ifndef RASTERIZERGLES3_H +#define RASTERIZERGLES3_H + +#include "servers/visual/rasterizer.h" +#include "rasterizer_storage_gles3.h" +#include "rasterizer_canvas_gles3.h" + + +class RasterizerGLES3 : public Rasterizer { + + static Rasterizer *_create_current(); + + RasterizerStorageGLES3 *storage; + RasterizerCanvasGLES3 *canvas; +public: + + virtual RasterizerStorage *get_storage(); + virtual RasterizerCanvas *get_canvas(); + virtual RasterizerScene *get_scene(); + + virtual void initialize(); + virtual void begin_frame(); + virtual void set_current_render_target(RID p_render_target); + virtual void restore_render_target(); + virtual void clear_render_target(const Color& p_color); + virtual void blit_render_target_to_screen(RID p_render_target,const Rect2& p_screen_rect,int p_screen=0); + virtual void end_frame(); + virtual void finalize(); + + static void make_current(); + + + static void register_config(); + RasterizerGLES3(); + ~RasterizerGLES3(); +}; + +#endif // RASTERIZERGLES3_H diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp new file mode 100644 index 00000000000..b29de876a47 --- /dev/null +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -0,0 +1,2045 @@ +#include "rasterizer_storage_gles3.h" +#include "globals.h" + +/* TEXTURE API */ + +#define _EXT_COMPRESSED_RGB_PVRTC_4BPPV1_IMG 0x8C00 +#define _EXT_COMPRESSED_RGB_PVRTC_2BPPV1_IMG 0x8C01 +#define _EXT_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG 0x8C02 +#define _EXT_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG 0x8C03 + +#define _EXT_COMPRESSED_SRGB_PVRTC_2BPPV1_EXT 0x8A54 +#define _EXT_COMPRESSED_SRGB_PVRTC_4BPPV1_EXT 0x8A55 +#define _EXT_COMPRESSED_SRGB_ALPHA_PVRTC_2BPPV1_EXT 0x8A56 +#define _EXT_COMPRESSED_SRGB_ALPHA_PVRTC_4BPPV1_EXT 0x8A57 + + +#define _EXT_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 +#define _EXT_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2 +#define _EXT_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3 + +#define _EXT_COMPRESSED_LUMINANCE_LATC1_EXT 0x8C70 +#define _EXT_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT 0x8C71 +#define _EXT_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT 0x8C72 +#define _EXT_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT 0x8C73 + + +#define _EXT_COMPRESSED_RED_RGTC1_EXT 0x8DBB +#define _EXT_COMPRESSED_RED_RGTC1 0x8DBB +#define _EXT_COMPRESSED_SIGNED_RED_RGTC1 0x8DBC +#define _EXT_COMPRESSED_RG_RGTC2 0x8DBD +#define _EXT_COMPRESSED_SIGNED_RG_RGTC2 0x8DBE +#define _EXT_COMPRESSED_SIGNED_RED_RGTC1_EXT 0x8DBC +#define _EXT_COMPRESSED_RED_GREEN_RGTC2_EXT 0x8DBD +#define _EXT_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT 0x8DBE +#define _EXT_ETC1_RGB8_OES 0x8D64 + + + +#define _EXT_SLUMINANCE_NV 0x8C46 +#define _EXT_SLUMINANCE_ALPHA_NV 0x8C44 +#define _EXT_SRGB8_NV 0x8C41 +#define _EXT_SLUMINANCE8_NV 0x8C47 +#define _EXT_SLUMINANCE8_ALPHA8_NV 0x8C45 + + +#define _EXT_COMPRESSED_SRGB_S3TC_DXT1_NV 0x8C4C +#define _EXT_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_NV 0x8C4D +#define _EXT_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_NV 0x8C4E +#define _EXT_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_NV 0x8C4F + + + +#define _EXT_ATC_RGB_AMD 0x8C92 +#define _EXT_ATC_RGBA_EXPLICIT_ALPHA_AMD 0x8C93 +#define _EXT_ATC_RGBA_INTERPOLATED_ALPHA_AMD 0x87EE + + +#define _TEXTURE_SRGB_DECODE_EXT 0x8A48 +#define _DECODE_EXT 0x8A49 +#define _SKIP_DECODE_EXT 0x8A4A + + +#define _GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE +#define _GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT 0x84FF + + + +Image RasterizerStorageGLES3::_get_gl_image_and_format(const Image& p_image, Image::Format p_format, uint32_t p_flags,GLenum& r_gl_format,GLenum& r_gl_internal_format,GLenum &r_gl_type,bool &r_compressed,bool &srgb) { + + + r_compressed=false; + r_gl_format=0; + Image image=p_image; + srgb=false; + + bool need_decompress=false; + + switch(p_format) { + + case Image::FORMAT_L8: { + r_gl_internal_format=GL_LUMINANCE; + r_gl_format=GL_LUMINANCE; + r_gl_type=GL_UNSIGNED_BYTE; + + } break; + case Image::FORMAT_LA8: { + + r_gl_internal_format=GL_LUMINANCE_ALPHA; + r_gl_format=GL_LUMINANCE_ALPHA; + r_gl_type=GL_UNSIGNED_BYTE; + + } break; + case Image::FORMAT_R8: { + + r_gl_internal_format=GL_R8; + r_gl_format=GL_RED; + r_gl_type=GL_UNSIGNED_BYTE; + + } break; + case Image::FORMAT_RG8: { + + r_gl_internal_format=GL_RG8; + r_gl_format=GL_RG; + r_gl_type=GL_UNSIGNED_BYTE; + + } break; + case Image::FORMAT_RGB8: { + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?GL_SRGB8:GL_RGB8; + r_gl_format=GL_RGB; + r_gl_type=GL_UNSIGNED_BYTE; + srgb=true; + + } break; + case Image::FORMAT_RGBA8: { + + r_gl_format=GL_RGBA; + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?GL_SRGB8_ALPHA8:GL_RGBA8; + r_gl_type=GL_UNSIGNED_BYTE; + srgb=true; + + } break; + case Image::FORMAT_RGB565: { + + r_gl_internal_format=GL_RGB565; + r_gl_format=GL_RGB; + r_gl_type=GL_UNSIGNED_SHORT_5_6_5; + + } break; + case Image::FORMAT_RGBA4444: { + + r_gl_internal_format=GL_RGBA4; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_SHORT_4_4_4_4; + + } break; + case Image::FORMAT_RGBA5551: { + + r_gl_internal_format=GL_RGB5_A1; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_SHORT_5_5_5_1; + + + } break; + case Image::FORMAT_RF: { + + + r_gl_internal_format=GL_R32F; + r_gl_format=GL_RED; + r_gl_type=GL_FLOAT; + + } break; + case Image::FORMAT_RGF: { + + r_gl_internal_format=GL_RG32F; + r_gl_format=GL_RG; + r_gl_type=GL_FLOAT; + + } break; + case Image::FORMAT_RGBF: { + + r_gl_internal_format=GL_RGB32F; + r_gl_format=GL_RGB; + r_gl_type=GL_FLOAT; + + } break; + case Image::FORMAT_RGBAF: { + + r_gl_internal_format=GL_RGBA32F; + r_gl_format=GL_RGBA; + r_gl_type=GL_FLOAT; + + } break; + case Image::FORMAT_RH: { + r_gl_internal_format=GL_R32F; + r_gl_format=GL_RED; + r_gl_type=GL_HALF_FLOAT; + } break; + case Image::FORMAT_RGH: { + r_gl_internal_format=GL_RG32F; + r_gl_format=GL_RG; + r_gl_type=GL_HALF_FLOAT; + + } break; + case Image::FORMAT_RGBH: { + r_gl_internal_format=GL_RGB32F; + r_gl_format=GL_RGB; + r_gl_type=GL_HALF_FLOAT; + + } break; + case Image::FORMAT_RGBAH: { + r_gl_internal_format=GL_RGBA32F; + r_gl_format=GL_RGBA; + r_gl_type=GL_HALF_FLOAT; + + } break; + case Image::FORMAT_DXT1: { + + if (config.s3tc_supported) { + + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?_EXT_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_NV:_EXT_COMPRESSED_RGBA_S3TC_DXT1_EXT; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + } else { + + need_decompress=true; + } + + + } break; + case Image::FORMAT_DXT3: { + + + if (config.s3tc_supported) { + + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?_EXT_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_NV:_EXT_COMPRESSED_RGBA_S3TC_DXT3_EXT; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + } else { + + need_decompress=true; + } + + + } break; + case Image::FORMAT_DXT5: { + + if (config.s3tc_supported) { + + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?_EXT_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_NV:_EXT_COMPRESSED_RGBA_S3TC_DXT5_EXT; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + } else { + + need_decompress=true; + } + + + } break; + case Image::FORMAT_ATI1: { + + if (config.latc_supported) { + + + r_gl_internal_format=_EXT_COMPRESSED_LUMINANCE_LATC1_EXT; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + } else { + + need_decompress=true; + } + + + + } break; + case Image::FORMAT_ATI2: { + + if (config.latc_supported) { + + + r_gl_internal_format=_EXT_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + } else { + + need_decompress=true; + } + + } break; + case Image::FORMAT_BPTC_RGBA: { + + if (config.bptc_supported) { + + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:GL_COMPRESSED_RGBA_BPTC_UNORM; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + } else { + + need_decompress=true; + } + } break; + case Image::FORMAT_BPTC_RGBF: { + + if (config.bptc_supported) { + + + r_gl_internal_format=GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT; + r_gl_format=GL_RGB; + r_gl_type=GL_FLOAT; + r_compressed=true; + } else { + + need_decompress=true; + } + } break; + case Image::FORMAT_BPTC_RGBFU: { + if (config.bptc_supported) { + + + r_gl_internal_format=GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT; + r_gl_format=GL_RGB; + r_gl_type=GL_FLOAT; + r_compressed=true; + } else { + + need_decompress=true; + } + } break; + case Image::FORMAT_PVRTC2: { + + if (config.pvrtc_supported) { + + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?_EXT_COMPRESSED_SRGB_PVRTC_2BPPV1_EXT:_EXT_COMPRESSED_RGB_PVRTC_2BPPV1_IMG; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + } else { + + need_decompress=true; + } + } break; + case Image::FORMAT_PVRTC2A: { + + if (config.pvrtc_supported) { + + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?_EXT_COMPRESSED_SRGB_ALPHA_PVRTC_2BPPV1_EXT:_EXT_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + } else { + + need_decompress=true; + } + + + } break; + case Image::FORMAT_PVRTC4: { + + if (config.pvrtc_supported) { + + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?_EXT_COMPRESSED_SRGB_PVRTC_4BPPV1_EXT:_EXT_COMPRESSED_RGB_PVRTC_4BPPV1_IMG; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + } else { + + need_decompress=true; + } + + } break; + case Image::FORMAT_PVRTC4A: { + + if (config.pvrtc_supported) { + + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?_EXT_COMPRESSED_SRGB_ALPHA_PVRTC_4BPPV1_EXT:_EXT_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + } else { + + need_decompress=true; + } + + + } break; + case Image::FORMAT_ETC: { + + if (config.etc_supported) { + + r_gl_internal_format=_EXT_ETC1_RGB8_OES; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + + } else { + + need_decompress=true; + } + + } break; + case Image::FORMAT_ETC2_R11: { + + if (config.etc2_supported) { + + r_gl_internal_format=GL_COMPRESSED_R11_EAC; + r_gl_format=GL_RED; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + + } else { + + need_decompress=true; + } + } break; + case Image::FORMAT_ETC2_R11S: { + + if (config.etc2_supported) { + + r_gl_internal_format=GL_COMPRESSED_SIGNED_R11_EAC; + r_gl_format=GL_RED; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + + } else { + + need_decompress=true; + } + } break; + case Image::FORMAT_ETC2_RG11: { + + if (config.etc2_supported) { + + r_gl_internal_format=GL_COMPRESSED_RG11_EAC; + r_gl_format=GL_RG; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + + } else { + + need_decompress=true; + } + } break; + case Image::FORMAT_ETC2_RG11S: { + if (config.etc2_supported) { + + r_gl_internal_format=GL_COMPRESSED_SIGNED_RG11_EAC; + r_gl_format=GL_RG; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + + } else { + need_decompress=true; + } + } break; + case Image::FORMAT_ETC2_RGB8: { + + if (config.etc2_supported) { + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?GL_COMPRESSED_SRGB8_ETC2:GL_COMPRESSED_RGB8_ETC2; + r_gl_format=GL_RGB; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + + } else { + + need_decompress=true; + } + } break; + case Image::FORMAT_ETC2_RGBA8: { + + if (config.etc2_supported) { + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:GL_COMPRESSED_RGBA8_ETC2_EAC; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + + } else { + + need_decompress=true; + } + } break; + case Image::FORMAT_ETC2_RGB8A1: { + + if (config.etc2_supported) { + + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2; + r_gl_format=GL_RGBA; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=true; + srgb=true; + + + } else { + + need_decompress=true; + } + } break; + default: { + + ERR_FAIL_V(Image()); + } + } + + if (need_decompress) { + + if (!image.empty()) { + image.decompress(); + ERR_FAIL_COND_V(image.is_compressed(),image); + image.convert(Image::FORMAT_RGBA8); + } + + + r_gl_format=GL_RGBA; + r_gl_internal_format=(config.srgb_decode_supported || p_flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)?GL_SRGB8_ALPHA8:GL_RGBA8; + r_gl_type=GL_UNSIGNED_BYTE; + r_compressed=false; + srgb=true; + + return image; + + } + + + return image; +} + +static const GLenum _cube_side_enum[6]={ + + GL_TEXTURE_CUBE_MAP_NEGATIVE_X, + GL_TEXTURE_CUBE_MAP_POSITIVE_X, + GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, + GL_TEXTURE_CUBE_MAP_POSITIVE_Y, + GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, + GL_TEXTURE_CUBE_MAP_POSITIVE_Z, + +}; + +RID RasterizerStorageGLES3::texture_create() { + + Texture *texture = memnew(Texture); + ERR_FAIL_COND_V(!texture,RID()); + glGenTextures(1, &texture->tex_id); + texture->active=false; + texture->total_data_size=0; + + return texture_owner.make_rid( texture ); + +} + +void RasterizerStorageGLES3::texture_allocate(RID p_texture,int p_width, int p_height,Image::Format p_format,uint32_t p_flags) { + + int components; + GLenum format; + GLenum internal_format; + GLenum type; + + bool compressed; + bool srgb; + + if (p_flags&VS::TEXTURE_FLAG_USED_FOR_STREAMING) { + p_flags&=~VS::TEXTURE_FLAG_MIPMAPS; // no mipies for video + } + + + Texture *texture = texture_owner.get( p_texture ); + ERR_FAIL_COND(!texture); + texture->width=p_width; + texture->height=p_height; + texture->format=p_format; + texture->flags=p_flags; + texture->target = (p_flags & VS::TEXTURE_FLAG_CUBEMAP) ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; + + _get_gl_image_and_format(Image(),texture->format,texture->flags,format,internal_format,type,compressed,srgb); + + texture->alloc_width = texture->width; + texture->alloc_height = texture->height; + + + texture->gl_format_cache=format; + texture->gl_type_cache=type; + texture->gl_internal_format_cache=internal_format; + texture->compressed=compressed; + texture->srgb=srgb; + texture->data_size=0; + texture->mipmaps=1; + + + glActiveTexture(GL_TEXTURE0); + glBindTexture(texture->target, texture->tex_id); + + + if (p_flags&VS::TEXTURE_FLAG_USED_FOR_STREAMING) { + //prealloc if video + glTexImage2D(texture->target, 0, internal_format, p_width, p_height, 0, format, type,NULL); + } + + texture->active=true; +} + +void RasterizerStorageGLES3::texture_set_data(RID p_texture,const Image& p_image,VS::CubeMapSide p_cube_side) { + + Texture * texture = texture_owner.get(p_texture); + + ERR_FAIL_COND(!texture); + ERR_FAIL_COND(!texture->active); + ERR_FAIL_COND(texture->render_target); + ERR_FAIL_COND(texture->format != p_image.get_format() ); + ERR_FAIL_COND( p_image.empty() ); + + GLenum type; + GLenum format; + GLenum internal_format; + bool compressed; + bool srgb; + + + Image img = _get_gl_image_and_format(p_image, p_image.get_format(),texture->flags,format,internal_format,type,compressed,srgb); + + if (config.shrink_textures_x2 && (p_image.has_mipmaps() || !p_image.is_compressed()) && !(texture->flags&VS::TEXTURE_FLAG_USED_FOR_STREAMING)) { + + texture->alloc_height = MAX(1,texture->alloc_height/2); + texture->alloc_width = MAX(1,texture->alloc_width/2); + + if (texture->alloc_width == img.get_width()/2 && texture->alloc_height == img.get_height()/2) { + + img.shrink_x2(); + } else if (img.get_format() <= Image::FORMAT_RGB565) { + + img.resize(texture->alloc_width, texture->alloc_height, Image::INTERPOLATE_BILINEAR); + + } + }; + + + GLenum blit_target = (texture->target == GL_TEXTURE_CUBE_MAP)?_cube_side_enum[p_cube_side]:GL_TEXTURE_2D; + + texture->data_size=img.get_data().size(); + DVector::Read read = img.get_data().read(); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(texture->target, texture->tex_id); + + texture->ignore_mipmaps = compressed && !img.has_mipmaps(); + + if (texture->flags&VS::TEXTURE_FLAG_MIPMAPS && !texture->ignore_mipmaps) + glTexParameteri(texture->target,GL_TEXTURE_MIN_FILTER,config.use_fast_texture_filter?GL_LINEAR_MIPMAP_NEAREST:GL_LINEAR_MIPMAP_LINEAR); + else { + if (texture->flags&VS::TEXTURE_FLAG_FILTER) { + glTexParameteri(texture->target,GL_TEXTURE_MIN_FILTER,GL_LINEAR); + } else { + glTexParameteri(texture->target,GL_TEXTURE_MIN_FILTER,GL_NEAREST); + + } + } + + + if (config.srgb_decode_supported && srgb) { + + if (texture->flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR) { + + glTexParameteri(texture->target,_TEXTURE_SRGB_DECODE_EXT,_DECODE_EXT); + } else { + glTexParameteri(texture->target,_TEXTURE_SRGB_DECODE_EXT,_SKIP_DECODE_EXT); + } + } + + if (texture->flags&VS::TEXTURE_FLAG_FILTER) { + + glTexParameteri(texture->target,GL_TEXTURE_MAG_FILTER,GL_LINEAR); // Linear Filtering + + } else { + + glTexParameteri(texture->target,GL_TEXTURE_MAG_FILTER,GL_NEAREST); // raw Filtering + } + + if ((texture->flags&VS::TEXTURE_FLAG_REPEAT || texture->flags&VS::TEXTURE_FLAG_MIRRORED_REPEAT) && texture->target != GL_TEXTURE_CUBE_MAP) { + + if (texture->flags&VS::TEXTURE_FLAG_MIRRORED_REPEAT){ + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_MIRRORED_REPEAT ); + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_MIRRORED_REPEAT ); + } + else{ + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT ); + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT ); + } + } else { + + //glTexParameterf( texture->target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE ); + glTexParameterf( texture->target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE ); + glTexParameterf( texture->target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE ); + } + + if (config.use_anisotropic_filter) { + + if (texture->flags&VS::TEXTURE_FLAG_ANISOTROPIC_FILTER) { + + glTexParameterf(texture->target, _GL_TEXTURE_MAX_ANISOTROPY_EXT, config.anisotropic_level); + } else { + glTexParameterf(texture->target, _GL_TEXTURE_MAX_ANISOTROPY_EXT, 1); + } + } + + int mipmaps= (texture->flags&VS::TEXTURE_FLAG_MIPMAPS && img.has_mipmaps()) ? img.get_mipmap_count() +1: 1; + + + int w=img.get_width(); + int h=img.get_height(); + + int tsize=0; + for(int i=0;icompressed) { + glPixelStorei(GL_UNPACK_ALIGNMENT, 4); + glCompressedTexImage2D( blit_target, i, format,w,h,0,size,&read[ofs] ); + + } else { + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + if (texture->flags&VS::TEXTURE_FLAG_USED_FOR_STREAMING) { + glTexSubImage2D( blit_target, i, 0,0,w, h,format,type,&read[ofs] ); + } else { + glTexImage2D(blit_target, i, internal_format, w, h, 0, format, type,&read[ofs]); + } + + } + tsize+=size; + + w = MAX(1,w>>1); + h = MAX(1,h>>1); + + } + + info.texture_mem-=texture->total_data_size; + texture->total_data_size=tsize; + info.texture_mem+=texture->total_data_size; + + //printf("texture: %i x %i - size: %i - total: %i\n",texture->width,texture->height,tsize,_rinfo.texture_mem); + + + if (texture->flags&VS::TEXTURE_FLAG_MIPMAPS && mipmaps==1 && !texture->ignore_mipmaps) { + //generate mipmaps if they were requested and the image does not contain them + glGenerateMipmap(texture->target); + } + + texture->mipmaps=mipmaps; + + //texture_set_flags(p_texture,texture->flags); + + +} + +Image RasterizerStorageGLES3::texture_get_data(RID p_texture,VS::CubeMapSide p_cube_side) const { + + Texture * texture = texture_owner.get(p_texture); + + ERR_FAIL_COND_V(!texture,Image()); + ERR_FAIL_COND_V(!texture->active,Image()); + ERR_FAIL_COND_V(texture->data_size==0,Image()); + ERR_FAIL_COND_V(texture->render_target,Image()); + +#ifdef GLEW_ENABLED + + DVector data; + + int data_size = Image::get_image_data_size(texture->width,texture->height,texture->format,texture->mipmaps>1?-1:0); + + data.resize(data_size); + DVector::Write wb = data.write(); + + glActiveTexture(GL_TEXTURE0); + + glBindTexture(texture->target,texture->tex_id); + + for(int i=0;imipmaps;i++) { + + int ofs=0; + if (i>0) { + ofs=Image::get_image_data_size(texture->alloc_width,texture->alloc_height,texture->format,i-1); + } + + if (texture->compressed) { + + glPixelStorei(GL_PACK_ALIGNMENT, 4); + glGetCompressedTexImage(texture->target,i,&wb[ofs]); + + } else { + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glGetTexImage(texture->target,i,texture->gl_format_cache,texture->gl_type_cache,&wb[ofs]); + } + } + + + wb=DVector::Write(); + + Image img(texture->alloc_width,texture->alloc_height,texture->mipmaps>1?true:false,texture->format,data); + + return img; +#else + + ERR_EXPLAIN("Sorry, It's not posible to obtain images back in OpenGL ES"); +#endif +} + +void RasterizerStorageGLES3::texture_set_flags(RID p_texture,uint32_t p_flags) { + + Texture *texture = texture_owner.get( p_texture ); + ERR_FAIL_COND(!texture); + if (texture->render_target) { + + p_flags&=VS::TEXTURE_FLAG_FILTER;//can change only filter + } + + bool had_mipmaps = texture->flags&VS::TEXTURE_FLAG_MIPMAPS; + + glActiveTexture(GL_TEXTURE0); + glBindTexture(texture->target, texture->tex_id); + uint32_t cube = texture->flags & VS::TEXTURE_FLAG_CUBEMAP; + texture->flags=p_flags|cube; // can't remove a cube from being a cube + + + if ((texture->flags&VS::TEXTURE_FLAG_REPEAT || texture->flags&VS::TEXTURE_FLAG_MIRRORED_REPEAT) && texture->target != GL_TEXTURE_CUBE_MAP) { + + if (texture->flags&VS::TEXTURE_FLAG_MIRRORED_REPEAT){ + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_MIRRORED_REPEAT ); + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_MIRRORED_REPEAT ); + } + else { + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT ); + glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT ); + } + } else { + //glTexParameterf( texture->target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE ); + glTexParameterf( texture->target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE ); + glTexParameterf( texture->target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE ); + + } + + + if (config.use_anisotropic_filter) { + + if (texture->flags&VS::TEXTURE_FLAG_ANISOTROPIC_FILTER) { + + glTexParameterf(texture->target, _GL_TEXTURE_MAX_ANISOTROPY_EXT, config.anisotropic_level); + } else { + glTexParameterf(texture->target, _GL_TEXTURE_MAX_ANISOTROPY_EXT, 1); + } + } + + if (texture->flags&VS::TEXTURE_FLAG_MIPMAPS && !texture->ignore_mipmaps) { + if (!had_mipmaps && texture->mipmaps==1) { + glGenerateMipmap(texture->target); + } + glTexParameteri(texture->target,GL_TEXTURE_MIN_FILTER,config.use_fast_texture_filter?GL_LINEAR_MIPMAP_NEAREST:GL_LINEAR_MIPMAP_LINEAR); + + } else{ + if (texture->flags&VS::TEXTURE_FLAG_FILTER) { + glTexParameteri(texture->target,GL_TEXTURE_MIN_FILTER,GL_LINEAR); + } else { + glTexParameteri(texture->target,GL_TEXTURE_MIN_FILTER,GL_NEAREST); + + } + } + + + if (config.srgb_decode_supported && texture->srgb) { + + if (texture->flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR) { + + glTexParameteri(texture->target,_TEXTURE_SRGB_DECODE_EXT,_DECODE_EXT); + } else { + glTexParameteri(texture->target,_TEXTURE_SRGB_DECODE_EXT,_SKIP_DECODE_EXT); + } + } + + if (texture->flags&VS::TEXTURE_FLAG_FILTER) { + + glTexParameteri(texture->target,GL_TEXTURE_MAG_FILTER,GL_LINEAR); // Linear Filtering + + } else { + + glTexParameteri(texture->target,GL_TEXTURE_MAG_FILTER,GL_NEAREST); // raw Filtering + } + +} +uint32_t RasterizerStorageGLES3::texture_get_flags(RID p_texture) const { + + Texture * texture = texture_owner.get(p_texture); + + ERR_FAIL_COND_V(!texture,0); + + return texture->flags; + +} +Image::Format RasterizerStorageGLES3::texture_get_format(RID p_texture) const { + + Texture * texture = texture_owner.get(p_texture); + + ERR_FAIL_COND_V(!texture,Image::FORMAT_L8); + + return texture->format; +} +uint32_t RasterizerStorageGLES3::texture_get_width(RID p_texture) const { + + Texture * texture = texture_owner.get(p_texture); + + ERR_FAIL_COND_V(!texture,0); + + return texture->width; +} +uint32_t RasterizerStorageGLES3::texture_get_height(RID p_texture) const { + + Texture * texture = texture_owner.get(p_texture); + + ERR_FAIL_COND_V(!texture,0); + + return texture->height; +} + + +void RasterizerStorageGLES3::texture_set_size_override(RID p_texture,int p_width, int p_height) { + + Texture * texture = texture_owner.get(p_texture); + + ERR_FAIL_COND(!texture); + ERR_FAIL_COND(texture->render_target); + + ERR_FAIL_COND(p_width<=0 || p_width>16384); + ERR_FAIL_COND(p_height<=0 || p_height>16384); + //real texture size is in alloc width and height + texture->width=p_width; + texture->height=p_height; + +} + +void RasterizerStorageGLES3::texture_set_path(RID p_texture,const String& p_path) { + Texture * texture = texture_owner.get(p_texture); + ERR_FAIL_COND(!texture); + + texture->path=p_path; + +} + +String RasterizerStorageGLES3::texture_get_path(RID p_texture) const{ + + Texture * texture = texture_owner.get(p_texture); + ERR_FAIL_COND_V(!texture,String()); + return texture->path; +} +void RasterizerStorageGLES3::texture_debug_usage(List *r_info){ + + List textures; + texture_owner.get_owned_list(&textures); + + for (List::Element *E=textures.front();E;E=E->next()) { + + Texture *t = texture_owner.get(E->get()); + if (!t) + continue; + VS::TextureInfo tinfo; + tinfo.path=t->path; + tinfo.format=t->format; + tinfo.size.x=t->alloc_width; + tinfo.size.y=t->alloc_height; + tinfo.bytes=t->total_data_size; + r_info->push_back(tinfo); + } + +} + +void RasterizerStorageGLES3::texture_set_shrink_all_x2_on_set_data(bool p_enable) { + + config.shrink_textures_x2=p_enable; +} + + + +/* SHADER API */ + + +RID RasterizerStorageGLES3::shader_create(VS::ShaderMode p_mode){ + + return RID(); +} + +void RasterizerStorageGLES3::shader_set_mode(RID p_shader,VS::ShaderMode p_mode){ + + +} +VS::ShaderMode RasterizerStorageGLES3::shader_get_mode(RID p_shader) const { + + return VS::SHADER_SPATIAL; +} +void RasterizerStorageGLES3::shader_set_code(RID p_shader, const String& p_code){ + + +} +String RasterizerStorageGLES3::shader_get_code(RID p_shader) const{ + + return String(); +} +void RasterizerStorageGLES3::shader_get_param_list(RID p_shader, List *p_param_list) const{ + + +} + +void RasterizerStorageGLES3::shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture){ + + +} +RID RasterizerStorageGLES3::shader_get_default_texture_param(RID p_shader, const StringName& p_name) const{ + + return RID(); +} + + +/* COMMON MATERIAL API */ + +RID RasterizerStorageGLES3::material_create(){ + + return RID(); +} + +void RasterizerStorageGLES3::material_set_shader(RID p_shader_material, RID p_shader){ + + +} +RID RasterizerStorageGLES3::material_get_shader(RID p_shader_material) const{ + + return RID(); +} + +void RasterizerStorageGLES3::material_set_param(RID p_material, const StringName& p_param, const Variant& p_value){ + + +} +Variant RasterizerStorageGLES3::material_get_param(RID p_material, const StringName& p_param) const{ + + return Variant(); +} + +/* MESH API */ + +RID RasterizerStorageGLES3::mesh_create(){ + + return RID(); +} + +void RasterizerStorageGLES3::mesh_add_surface(RID p_mesh,uint32_t p_format,VS::PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes){ + + +} + +void RasterizerStorageGLES3::mesh_set_morph_target_count(RID p_mesh,int p_amount){ + + +} +int RasterizerStorageGLES3::mesh_get_morph_target_count(RID p_mesh) const{ + + return 0; +} + + +void RasterizerStorageGLES3::mesh_set_morph_target_mode(RID p_mesh,VS::MorphTargetMode p_mode){ + + +} +VS::MorphTargetMode RasterizerStorageGLES3::mesh_get_morph_target_mode(RID p_mesh) const{ + + return VS::MORPH_MODE_NORMALIZED; +} + +void RasterizerStorageGLES3::mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material){ + + +} +RID RasterizerStorageGLES3::mesh_surface_get_material(RID p_mesh, int p_surface) const{ + + return RID(); +} + +int RasterizerStorageGLES3::mesh_surface_get_array_len(RID p_mesh, int p_surface) const{ + + return 0; +} +int RasterizerStorageGLES3::mesh_surface_get_array_index_len(RID p_mesh, int p_surface) const{ + + + return 0; +} + +DVector RasterizerStorageGLES3::mesh_surface_get_array(RID p_mesh, int p_surface) const{ + + return DVector(); +} +DVector RasterizerStorageGLES3::mesh_surface_get_index_array(RID p_mesh, int p_surface) const{ + + + return DVector(); +} + + +uint32_t RasterizerStorageGLES3::mesh_surface_get_format(RID p_mesh, int p_surface) const{ + + return 0; +} +VS::PrimitiveType RasterizerStorageGLES3::mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const{ + + return VS::PRIMITIVE_MAX; +} + +void RasterizerStorageGLES3::mesh_remove_surface(RID p_mesh,int p_index){ + + +} +int RasterizerStorageGLES3::mesh_get_surface_count(RID p_mesh) const{ + + return 0; +} + +void RasterizerStorageGLES3::mesh_set_custom_aabb(RID p_mesh,const AABB& p_aabb){ + + +} +AABB RasterizerStorageGLES3::mesh_get_custom_aabb(RID p_mesh) const{ + + return AABB(); +} + +AABB RasterizerStorageGLES3::mesh_get_aabb(RID p_mesh) const{ + + return AABB(); +} +void RasterizerStorageGLES3::mesh_clear(RID p_mesh){ + + +} + +/* MULTIMESH API */ + + +RID RasterizerStorageGLES3::multimesh_create(){ + + return RID(); +} + +void RasterizerStorageGLES3::multimesh_allocate(RID p_multimesh,int p_instances,VS::MultimeshTransformFormat p_transform_format,VS::MultimeshColorFormat p_color_format,bool p_gen_aabb){ + + +} +int RasterizerStorageGLES3::multimesh_get_instance_count(RID p_multimesh) const{ + + return 0; +} + +void RasterizerStorageGLES3::multimesh_set_mesh(RID p_multimesh,RID p_mesh){ + + +} +void RasterizerStorageGLES3::multimesh_set_custom_aabb(RID p_multimesh,const AABB& p_aabb){ + + +} +void RasterizerStorageGLES3::multimesh_instance_set_transform(RID p_multimesh,int p_index,const Transform& p_transform){ + + +} +void RasterizerStorageGLES3::multimesh_instance_set_transform_2d(RID p_multimesh,int p_index,const Matrix32& p_transform){ + + +} +void RasterizerStorageGLES3::multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color){ + + +} + +RID RasterizerStorageGLES3::multimesh_get_mesh(RID p_multimesh) const{ + + + return RID(); +} +AABB RasterizerStorageGLES3::multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const{ + + return AABB(); +} + +Transform RasterizerStorageGLES3::multimesh_instance_get_transform(RID p_multimesh,int p_index) const{ + + return Transform(); +} +Matrix32 RasterizerStorageGLES3::multimesh_instance_get_transform_2d(RID p_multimesh,int p_index) const{ + + + return Matrix32(); +} +Color RasterizerStorageGLES3::multimesh_instance_get_color(RID p_multimesh,int p_index) const{ + + return Color(); +} + +void RasterizerStorageGLES3::multimesh_set_visible_instances(RID p_multimesh,int p_visible){ + + +} +int RasterizerStorageGLES3::multimesh_get_visible_instances(RID p_multimesh) const{ + + return 0; +} + +AABB RasterizerStorageGLES3::multimesh_get_aabb(RID p_mesh) const{ + + return AABB(); +} + +/* IMMEDIATE API */ + +RID RasterizerStorageGLES3::immediate_create(){ + + return RID(); +} +void RasterizerStorageGLES3::immediate_begin(RID p_immediate,VS::PrimitiveType p_rimitive,RID p_texture){ + + +} +void RasterizerStorageGLES3::immediate_vertex(RID p_immediate,const Vector3& p_vertex){ + + +} +void RasterizerStorageGLES3::immediate_vertex_2d(RID p_immediate,const Vector3& p_vertex){ + + +} +void RasterizerStorageGLES3::immediate_normal(RID p_immediate,const Vector3& p_normal){ + + +} +void RasterizerStorageGLES3::immediate_tangent(RID p_immediate,const Plane& p_tangent){ + + +} +void RasterizerStorageGLES3::immediate_color(RID p_immediate,const Color& p_color){ + + +} +void RasterizerStorageGLES3::immediate_uv(RID p_immediate,const Vector2& tex_uv){ + + +} +void RasterizerStorageGLES3::immediate_uv2(RID p_immediate,const Vector2& tex_uv){ + + +} +void RasterizerStorageGLES3::immediate_end(RID p_immediate){ + + +} +void RasterizerStorageGLES3::immediate_clear(RID p_immediate){ + + +} +void RasterizerStorageGLES3::immediate_set_material(RID p_immediate,RID p_material){ + + +} +RID RasterizerStorageGLES3::immediate_get_material(RID p_immediate) const{ + + return RID(); +} + +/* SKELETON API */ + +RID RasterizerStorageGLES3::skeleton_create(){ + + return RID(); +} +void RasterizerStorageGLES3::skeleton_allocate(RID p_skeleton,int p_bones,bool p_2d_skeleton){ + + +} +int RasterizerStorageGLES3::skeleton_get_bone_count(RID p_skeleton) const{ + + return 0; +} +void RasterizerStorageGLES3::skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform){ + + +} +Transform RasterizerStorageGLES3::skeleton_bone_get_transform(RID p_skeleton,int p_bone){ + + return Transform(); +} +void RasterizerStorageGLES3::skeleton_bone_set_transform_2d(RID p_skeleton,int p_bone, const Matrix32& p_transform){ + + +} +Matrix32 RasterizerStorageGLES3::skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone){ + + return Matrix32(); +} + +/* Light API */ + +RID RasterizerStorageGLES3::light_create(VS::LightType p_type){ + + return RID(); +} + +void RasterizerStorageGLES3::light_set_color(RID p_light,const Color& p_color){ + + +} +void RasterizerStorageGLES3::light_set_param(RID p_light,VS::LightParam p_param,float p_value){ + + +} +void RasterizerStorageGLES3::light_set_shadow(RID p_light,bool p_enabled){ + + +} +void RasterizerStorageGLES3::light_set_projector(RID p_light,RID p_texture){ + + +} +void RasterizerStorageGLES3::light_set_attenuation_texure(RID p_light,RID p_texture){ + + +} +void RasterizerStorageGLES3::light_set_negative(RID p_light,bool p_enable){ + + +} +void RasterizerStorageGLES3::light_set_cull_mask(RID p_light,uint32_t p_mask){ + + +} +void RasterizerStorageGLES3::light_set_shader(RID p_light,RID p_shader){ + + +} + + +void RasterizerStorageGLES3::light_directional_set_shadow_mode(RID p_light,VS::LightDirectionalShadowMode p_mode){ + + +} + +/* PROBE API */ + +RID RasterizerStorageGLES3::reflection_probe_create(){ + + return RID(); +} + +void RasterizerStorageGLES3::reflection_probe_set_intensity(RID p_probe, float p_intensity){ + + +} +void RasterizerStorageGLES3::reflection_probe_set_clip(RID p_probe, float p_near, float p_far){ + + +} +void RasterizerStorageGLES3::reflection_probe_set_min_blend_distance(RID p_probe, float p_distance){ + + +} +void RasterizerStorageGLES3::reflection_probe_set_extents(RID p_probe, const Vector3& p_extents){ + + +} +void RasterizerStorageGLES3::reflection_probe_set_origin_offset(RID p_probe, const Vector3& p_offset){ + + +} +void RasterizerStorageGLES3::reflection_probe_set_enable_parallax_correction(RID p_probe, bool p_enable){ + + +} +void RasterizerStorageGLES3::reflection_probe_set_resolution(RID p_probe, int p_resolution){ + + +} +void RasterizerStorageGLES3::reflection_probe_set_hide_skybox(RID p_probe, bool p_hide){ + + +} +void RasterizerStorageGLES3::reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers){ + + +} + + +/* ROOM API */ + +RID RasterizerStorageGLES3::room_create(){ + + return RID(); +} +void RasterizerStorageGLES3::room_add_bounds(RID p_room, const DVector& p_convex_polygon,float p_height,const Transform& p_transform){ + + +} +void RasterizerStorageGLES3::room_clear_bounds(){ + + +} + +/* PORTAL API */ + +// portals are only (x/y) points, forming a convex shape, which its clockwise +// order points outside. (z is 0); + +RID RasterizerStorageGLES3::portal_create(){ + + return RID(); +} +void RasterizerStorageGLES3::portal_set_shape(RID p_portal, const Vector& p_shape){ + + +} +void RasterizerStorageGLES3::portal_set_enabled(RID p_portal, bool p_enabled){ + + +} +void RasterizerStorageGLES3::portal_set_disable_distance(RID p_portal, float p_distance){ + + +} +void RasterizerStorageGLES3::portal_set_disabled_color(RID p_portal, const Color& p_color){ + + +} + + +/* RENDER TARGET */ + + +void RasterizerStorageGLES3::_render_target_clear(RenderTarget *rt) { + + if (rt->front.fbo) { + glDeleteFramebuffers(1,&rt->front.fbo); + glDeleteTextures(1,&rt->front.color); + rt->front.fbo=0; + } + + if (rt->back.fbo) { + glDeleteFramebuffers(1,&rt->back.fbo); + glDeleteTextures(1,&rt->back.color); + rt->back.fbo=0; + } + + if (rt->deferred.fbo) { + glDeleteFramebuffers(1,&rt->deferred.fbo); + glDeleteFramebuffers(1,&rt->deferred.fbo_color); + glDeleteTextures(1,&rt->deferred.albedo_ao); + glDeleteTextures(1,&rt->deferred.normal_special); + glDeleteTextures(1,&rt->deferred.metal_rough_motion); + rt->deferred.fbo=0; + rt->deferred.fbo_color=0; + } + + if (rt->depth) { + glDeleteRenderbuffers(1,&rt->depth); + rt->depth=0; + } + +} + +void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ + + if (rt->width<=0 || rt->height<=0) + return; + + glGenFramebuffers(1, &rt->front.fbo); + glBindFramebuffer(GL_FRAMEBUFFER, rt->front.fbo); + + + glGenRenderbuffers(1, &rt->depth); + glBindRenderbuffer(GL_RENDERBUFFER, rt->depth ); + if (config.fbo_format==FBO_FORMAT_16_BITS) { + glRenderbufferStorage(GL_RENDERBUFFER,GL_DEPTH_COMPONENT16, rt->width, rt->height); + } else { + glRenderbufferStorage(GL_RENDERBUFFER,GL_DEPTH24_STENCIL8, rt->width, rt->height); + } + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rt->depth); + glBindRenderbuffer(GL_RENDERBUFFER, 0 ); + + + glGenTextures(1, &rt->front.color); + glBindTexture(GL_TEXTURE_2D, rt->front.color); + + + GLuint color_internal_format; + GLuint color_format; + GLuint color_type; + + + if (config.fbo_format==FBO_FORMAT_16_BITS) { + + if (rt->flags[RENDER_TARGET_TRANSPARENT]) { + color_internal_format=GL_RGB5_A1; + color_format=GL_RGBA; + color_type=GL_UNSIGNED_SHORT_5_5_5_1; + } else { + color_internal_format=GL_RGB565; + color_format=GL_RGB; + color_type=GL_UNSIGNED_SHORT_5_6_5; + } + + } else if (config.fbo_format==FBO_FORMAT_32_BITS) { + + if (rt->flags[RENDER_TARGET_TRANSPARENT]) { + color_internal_format=GL_RGBA8; + color_format=GL_RGBA; + color_type=GL_UNSIGNED_BYTE; + } else { + color_internal_format=GL_RGB10_A2; + color_format=GL_RGBA; + color_type=GL_UNSIGNED_INT_2_10_10_10_REV; + } + } else if (config.fbo_format==FBO_FORMAT_FLOAT) { + + color_internal_format=GL_RGBA16F; + color_format=GL_RGBA; + color_type=GL_HALF_FLOAT; + } + + glTexImage2D(GL_TEXTURE_2D, 0, color_internal_format, rt->width, rt->height, 0, color_format, color_type, NULL); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->front.color, 0); + + { + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + glBindFramebuffer(GL_FRAMEBUFFER, config.system_fbo); + + ERR_FAIL_COND( status != GL_FRAMEBUFFER_COMPLETE ); + } + + + if (!rt->flags[RENDER_TARGET_NO_SAMPLING]) { + + glGenFramebuffers(1, &rt->back.fbo); + glBindFramebuffer(GL_FRAMEBUFFER, rt->back.fbo); + + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rt->depth); + + glGenTextures(1, &rt->back.color); + glBindTexture(GL_TEXTURE_2D, rt->back.color); + + glTexImage2D(GL_TEXTURE_2D, 0, color_internal_format, rt->width, rt->height, 0, color_format, color_type, NULL); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->back.color, 0); + + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + glBindFramebuffer(GL_FRAMEBUFFER, config.system_fbo); + + if (status != GL_FRAMEBUFFER_COMPLETE) { + _render_target_clear(rt); + ERR_FAIL_COND( status != GL_FRAMEBUFFER_COMPLETE ); + } + } + + + + if (config.fbo_deferred && !rt->flags[RENDER_TARGET_NO_3D]) { + + + //regular fbo + glGenFramebuffers(1, &rt->deferred.fbo); + glBindFramebuffer(GL_FRAMEBUFFER, rt->deferred.fbo); + + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rt->depth); + + glGenTextures(1, &rt->deferred.albedo_ao); + glBindTexture(GL_TEXTURE_2D, rt->deferred.albedo_ao); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, rt->width, rt->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->deferred.albedo_ao, 0); + + glGenTextures(1, &rt->deferred.metal_rough_motion); + glBindTexture(GL_TEXTURE_2D, rt->deferred.metal_rough_motion); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, rt->width, rt->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, rt->deferred.metal_rough_motion, 0); + + glGenTextures(1, &rt->deferred.normal_special); + glBindTexture(GL_TEXTURE_2D, rt->deferred.normal_special); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, rt->width, rt->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT2, GL_TEXTURE_2D, rt->deferred.normal_special, 0); + + + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + glBindFramebuffer(GL_FRAMEBUFFER, config.system_fbo); + + if (status != GL_FRAMEBUFFER_COMPLETE) { + _render_target_clear(rt); + ERR_FAIL_COND( status != GL_FRAMEBUFFER_COMPLETE ); + } + + //regular fbo with color attachment (needed for emission or objects rendered as forward) + + glGenFramebuffers(1, &rt->deferred.fbo_color); + glBindFramebuffer(GL_FRAMEBUFFER, rt->deferred.fbo_color); + + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rt->depth); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->deferred.albedo_ao, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, rt->deferred.metal_rough_motion, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT2, GL_TEXTURE_2D, rt->deferred.normal_special, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT3, GL_TEXTURE_2D, rt->front.color, 0); + + + status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + glBindFramebuffer(GL_FRAMEBUFFER, config.system_fbo); + + if (status != GL_FRAMEBUFFER_COMPLETE) { + _render_target_clear(rt); + ERR_FAIL_COND( status != GL_FRAMEBUFFER_COMPLETE ); + } + } + + +} + + +RID RasterizerStorageGLES3::render_target_create(){ + + RenderTarget *rt = memnew( RenderTarget ); + return render_target_owner.make_rid(rt); +} + +void RasterizerStorageGLES3::render_target_set_size(RID p_render_target,int p_width, int p_height){ + + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + + if (rt->width==p_width && rt->height==p_height) + return; + + _render_target_clear(rt); + rt->width=p_width; + rt->height=p_height; + _render_target_allocate(rt); + +} + + +RID RasterizerStorageGLES3::render_target_get_texture(RID p_render_target) const{ + + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND_V(!rt,RID()); + + + return RID(); +} +Image RasterizerStorageGLES3::render_target_get_image(RID p_render_target) const{ + + return Image(); +} +void RasterizerStorageGLES3::render_target_set_flag(RID p_render_target,RenderTargetFlags p_flag,bool p_value) { + + RenderTarget *rt = render_target_owner.getornull(p_render_target); + ERR_FAIL_COND(!rt); + + rt->flags[p_flag]=p_value; + + switch(p_flag) { + case RENDER_TARGET_NO_3D: + case RENDER_TARGET_TRANSPARENT: { + //must reset for these formats + _render_target_clear(rt); + _render_target_allocate(rt); + + } break; + default: {} + } +} + +bool RasterizerStorageGLES3::render_target_renedered_in_frame(RID p_render_target){ + + return false; +} + +/* CANVAS SHADOW */ + + +RID RasterizerStorageGLES3::canvas_light_shadow_buffer_create(int p_width) { + + CanvasLightShadow *cls = memnew( CanvasLightShadow ); + if (p_width>config.max_texture_size) + p_width=config.max_texture_size; + + cls->size=p_width; + cls->height=16; + + glActiveTexture(GL_TEXTURE0); + + glGenFramebuffers(1, &cls->fbo); + glBindFramebuffer(GL_FRAMEBUFFER, cls->fbo); + + glGenRenderbuffers(1, &cls->depth); + glBindRenderbuffer(GL_RENDERBUFFER, cls->depth ); + glRenderbufferStorage(GL_RENDERBUFFER,GL_DEPTH_COMPONENT24, cls->size, cls->height); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, cls->depth); + glBindRenderbuffer(GL_RENDERBUFFER, 0 ); + + glGenTextures(1,&cls->distance); + glBindTexture(GL_TEXTURE_2D, cls->distance); + if (config.use_rgba_2d_shadows) { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, cls->size, cls->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + } else { + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, cls->size, cls->height, 0, GL_RED, GL_FLOAT, NULL); + } + + + + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, cls->distance, 0); + + + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + //printf("errnum: %x\n",status); + glBindFramebuffer(GL_FRAMEBUFFER, config.system_fbo); + + ERR_FAIL_COND_V( status != GL_FRAMEBUFFER_COMPLETE, RID() ); + + return canvas_light_shadow_owner.make_rid(cls); +} + +/* LIGHT SHADOW MAPPING */ + + +RID RasterizerStorageGLES3::canvas_light_occluder_create() { + + CanvasOccluder *co = memnew( CanvasOccluder ); + co->index_id=0; + co->vertex_id=0; + co->len=0; + + return canvas_occluder_owner.make_rid(co); +} + +void RasterizerStorageGLES3::canvas_light_occluder_set_polylines(RID p_occluder, const DVector& p_lines) { + + CanvasOccluder *co = canvas_occluder_owner.get(p_occluder); + ERR_FAIL_COND(!co); + + co->lines=p_lines; + + if (p_lines.size()!=co->len) { + + if (co->index_id) + glDeleteBuffers(1,&co->index_id); + if (co->vertex_id) + glDeleteBuffers(1,&co->vertex_id); + + co->index_id=0; + co->vertex_id=0; + co->len=0; + + } + + if (p_lines.size()) { + + + + DVector geometry; + DVector indices; + int lc = p_lines.size(); + + geometry.resize(lc*6); + indices.resize(lc*3); + + DVector::Write vw=geometry.write(); + DVector::Write iw=indices.write(); + + + DVector::Read lr=p_lines.read(); + + const int POLY_HEIGHT = 16384; + + for(int i=0;ivertex_id) { + glGenBuffers(1,&co->vertex_id); + glBindBuffer(GL_ARRAY_BUFFER,co->vertex_id); + glBufferData(GL_ARRAY_BUFFER,lc*6*sizeof(real_t),vw.ptr(),GL_STATIC_DRAW); + } else { + + glBindBuffer(GL_ARRAY_BUFFER,co->vertex_id); + glBufferSubData(GL_ARRAY_BUFFER,0,lc*6*sizeof(real_t),vw.ptr()); + + } + + glBindBuffer(GL_ARRAY_BUFFER,0); //unbind + + if (!co->index_id) { + + glGenBuffers(1,&co->index_id); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,co->index_id); + glBufferData(GL_ELEMENT_ARRAY_BUFFER,lc*3*sizeof(uint16_t),iw.ptr(),GL_STATIC_DRAW); + } else { + + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,co->index_id); + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER,0,lc*3*sizeof(uint16_t),iw.ptr()); + } + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,0); //unbind + + co->len=lc; + + } + +} + +bool RasterizerStorageGLES3::free(RID p_rid){ + + if (render_target_owner.owns(p_rid)) { + + RenderTarget *rt = render_target_owner.getornull(p_rid); + _render_target_clear(rt); + render_target_owner.free(p_rid); + memdelete(rt); + + } else if (texture_owner.owns(p_rid)) { + // delete the texture + Texture *texture = texture_owner.get(p_rid); + info.texture_mem-=texture->total_data_size; + texture_owner.free(p_rid); + memdelete(texture); + } else if (canvas_occluder_owner.owns(p_rid)) { + + + CanvasOccluder *co = canvas_occluder_owner.get(p_rid); + if (co->index_id) + glDeleteBuffers(1,&co->index_id); + if (co->vertex_id) + glDeleteBuffers(1,&co->vertex_id); + + canvas_occluder_owner.free(p_rid); + memdelete(co); + + } else if (canvas_light_shadow_owner.owns(p_rid)) { + + CanvasLightShadow *cls = canvas_light_shadow_owner.get(p_rid); + glDeleteFramebuffers(1,&cls->fbo); + glDeleteRenderbuffers(1,&cls->depth); + glDeleteTextures(1,&cls->distance); + canvas_light_shadow_owner.free(p_rid); + memdelete(cls); + } else { + return false; + } + + return true; +} + +//////////////////////////////////////////// + + +void RasterizerStorageGLES3::initialize() { + + config.fbo_format=FBOFormat(int(Globals::get_singleton()->get("rendering/gles3/framebuffer_format"))); + config.fbo_deferred=int(Globals::get_singleton()->get("rendering/gles3/lighting_technique")); + + config.system_fbo=0; + + + //// extensions config + /// + + { + Vector ext= String((const char*)glGetString( GL_EXTENSIONS )).split(" ",false); + for(int i=0;iget("rendering/gles3/use_nearest_mipmap_filter")); + config.use_anisotropic_filter = config.extensions.has("GL_EXT_texture_filter_anisotropic"); + + config.s3tc_supported=config.extensions.has("GL_EXT_texture_compression_dxt1") || config.extensions.has("GL_EXT_texture_compression_s3tc") || config.extensions.has("WEBGL_compressed_texture_s3tc"); + config.etc_supported=config.extensions.has("GL_OES_compressed_ETC1_RGB8_texture"); + config.latc_supported=config.extensions.has("GL_EXT_texture_compression_latc"); + config.bptc_supported=config.extensions.has("GL_ARB_texture_compression_bptc"); +#ifdef GLEW_ENABLED + config.etc2_supported=false; +#else + config.etc2_supported=true; +#endif + config.pvrtc_supported=config.extensions.has("GL_IMG_texture_compression_pvrtc"); + config.srgb_decode_supported=config.extensions.has("GL_EXT_texture_sRGB_decode"); + + + + config.anisotropic_level=1.0; + config.use_anisotropic_filter=config.extensions.has("GL_EXT_texture_filter_anisotropic"); + if (config.use_anisotropic_filter) { + glGetFloatv(_GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT,&config.anisotropic_level); + config.anisotropic_level=MIN(int(Globals::get_singleton()->get("rendering/gles3/anisotropic_filter_level")),config.anisotropic_level); + } + + + frame.clear_request=false; + + shaders.copy.init(); + + { + //default textures + + + glGenTextures(1, &resources.white_tex); + unsigned char whitetexdata[8*8*3]; + for(int i=0;i<8*8*3;i++) { + whitetexdata[i]=255; + } + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D,resources.white_tex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 8, 8, 0, GL_RGB, GL_UNSIGNED_BYTE,whitetexdata); + glGenerateMipmap(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D,0); + + glGenTextures(1, &resources.black_tex); + unsigned char blacktexdata[8*8*3]; + for(int i=0;i<8*8;i++) { + blacktexdata[i]=0; + } + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D,resources.black_tex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 8, 8, 0, GL_RGB, GL_UNSIGNED_BYTE,blacktexdata); + glGenerateMipmap(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D,0); + + glGenTextures(1, &resources.normal_tex); + unsigned char normaltexdata[8*8*3]; + for(int i=0;i<8*8*3;i+=3) { + normaltexdata[i+0]=128; + normaltexdata[i+1]=128; + normaltexdata[i+2]=255; + } + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D,resources.normal_tex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 8, 8, 0, GL_RGB, GL_UNSIGNED_BYTE,normaltexdata); + glGenerateMipmap(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D,0); + + } + + glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS,&config.max_texture_image_units); + glGetIntegerv(GL_MAX_TEXTURE_SIZE,&config.max_texture_size); + +#ifdef GLEW_ENABLED + config.use_rgba_2d_shadows=false; +#else + config.use_rgba_2d_shadows=true; +#endif +} + +void RasterizerStorageGLES3::finalize() { + + glDeleteTextures(1, &resources.white_tex); + glDeleteTextures(1, &resources.black_tex); + glDeleteTextures(1, &resources.normal_tex); + +} + + +RasterizerStorageGLES3::RasterizerStorageGLES3() +{ + +} diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h new file mode 100644 index 00000000000..3b2d7d752c9 --- /dev/null +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -0,0 +1,445 @@ +#ifndef RASTERIZERSTORAGEGLES3_H +#define RASTERIZERSTORAGEGLES3_H + +#include "servers/visual/rasterizer.h" +#include "shader_gles3.h" +#include "shaders/copy.glsl.h" +#include "shaders/canvas.glsl.h" + + + + +class RasterizerStorageGLES3 : public RasterizerStorage { +public: + + enum FBOFormat { + FBO_FORMAT_16_BITS, + FBO_FORMAT_32_BITS, + FBO_FORMAT_FLOAT, + }; + + struct Config { + + FBOFormat fbo_format; + bool fbo_deferred; + GLuint system_fbo; //on some devices, such as apple, screen is rendered to yet another fbo. + + bool shrink_textures_x2; + bool use_fast_texture_filter; + bool use_anisotropic_filter; + + bool s3tc_supported; + bool latc_supported; + bool bptc_supported; + bool etc_supported; + bool etc2_supported; + bool pvrtc_supported; + + bool srgb_decode_supported; + + bool use_rgba_2d_shadows; + + float anisotropic_level; + + int max_texture_image_units; + int max_texture_size; + + Set extensions; + } config; + + struct Shaders { + + CopyShaderGLES3 copy; + } shaders; + + struct Resources { + + GLuint white_tex; + GLuint black_tex; + GLuint normal_tex; + + } resources; + + struct Info { + + uint64_t texture_mem; + + } info; + + +///////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////DATA/////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + +///////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////API//////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////// + + + /* TEXTURE API */ + + struct RenderTarget; + + struct Texture : public RID_Data { + + String path; + uint32_t flags; + int width,height; + int alloc_width, alloc_height; + Image::Format format; + + GLenum target; + GLenum gl_format_cache; + GLenum gl_internal_format_cache; + GLenum gl_type_cache; + int data_size; //original data size, useful for retrieving back + bool compressed; + bool srgb; + int total_data_size; + bool ignore_mipmaps; + + int mipmaps; + + bool active; + GLuint tex_id; + + + RenderTarget *render_target; + + Texture() { + + ignore_mipmaps=false; + render_target=NULL; + flags=width=height=0; + tex_id=0; + data_size=0; + format=Image::FORMAT_L8; + active=false; + compressed=false; + total_data_size=0; + target=GL_TEXTURE_2D; + mipmaps=0; + + } + + ~Texture() { + + if (tex_id!=0) { + + glDeleteTextures(1,&tex_id); + } + } + }; + + mutable RID_Owner texture_owner; + + Image _get_gl_image_and_format(const Image& p_image, Image::Format p_format, uint32_t p_flags, GLenum& r_gl_format, GLenum& r_gl_internal_format, GLenum &r_type, bool &r_compressed, bool &srgb); + + virtual RID texture_create(); + virtual void texture_allocate(RID p_texture,int p_width, int p_height,Image::Format p_format,uint32_t p_flags=VS::TEXTURE_FLAGS_DEFAULT); + virtual void texture_set_data(RID p_texture,const Image& p_image,VS::CubeMapSide p_cube_side=VS::CUBEMAP_LEFT); + virtual Image texture_get_data(RID p_texture,VS::CubeMapSide p_cube_side=VS::CUBEMAP_LEFT) const; + virtual void texture_set_flags(RID p_texture,uint32_t p_flags); + virtual uint32_t texture_get_flags(RID p_texture) const; + virtual Image::Format texture_get_format(RID p_texture) const; + virtual uint32_t texture_get_width(RID p_texture) const; + virtual uint32_t texture_get_height(RID p_texture) const; + virtual void texture_set_size_override(RID p_texture,int p_width, int p_height); + + virtual void texture_set_path(RID p_texture,const String& p_path); + virtual String texture_get_path(RID p_texture) const; + + virtual void texture_set_shrink_all_x2_on_set_data(bool p_enable); + + virtual void texture_debug_usage(List *r_info); + + + /* SHADER API */ + + struct Shader : public RID_Data { + + + }; + + + virtual RID shader_create(VS::ShaderMode p_mode=VS::SHADER_SPATIAL); + + virtual void shader_set_mode(RID p_shader,VS::ShaderMode p_mode); + virtual VS::ShaderMode shader_get_mode(RID p_shader) const; + + virtual void shader_set_code(RID p_shader, const String& p_code); + virtual String shader_get_code(RID p_shader) const; + virtual void shader_get_param_list(RID p_shader, List *p_param_list) const; + + virtual void shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture); + virtual RID shader_get_default_texture_param(RID p_shader, const StringName& p_name) const; + + + /* COMMON MATERIAL API */ + + virtual RID material_create(); + + virtual void material_set_shader(RID p_shader_material, RID p_shader); + virtual RID material_get_shader(RID p_shader_material) const; + + virtual void material_set_param(RID p_material, const StringName& p_param, const Variant& p_value); + virtual Variant material_get_param(RID p_material, const StringName& p_param) const; + + /* MESH API */ + + virtual RID mesh_create(); + + virtual void mesh_add_surface(RID p_mesh,uint32_t p_format,VS::PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes=Vector >()); + + virtual void mesh_set_morph_target_count(RID p_mesh,int p_amount); + virtual int mesh_get_morph_target_count(RID p_mesh) const; + + + virtual void mesh_set_morph_target_mode(RID p_mesh,VS::MorphTargetMode p_mode); + virtual VS::MorphTargetMode mesh_get_morph_target_mode(RID p_mesh) const; + + virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material); + virtual RID mesh_surface_get_material(RID p_mesh, int p_surface) const; + + virtual int mesh_surface_get_array_len(RID p_mesh, int p_surface) const; + virtual int mesh_surface_get_array_index_len(RID p_mesh, int p_surface) const; + + virtual DVector mesh_surface_get_array(RID p_mesh, int p_surface) const; + virtual DVector mesh_surface_get_index_array(RID p_mesh, int p_surface) const; + + + virtual uint32_t mesh_surface_get_format(RID p_mesh, int p_surface) const; + virtual VS::PrimitiveType mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const; + + virtual void mesh_remove_surface(RID p_mesh,int p_index); + virtual int mesh_get_surface_count(RID p_mesh) const; + + virtual void mesh_set_custom_aabb(RID p_mesh,const AABB& p_aabb); + virtual AABB mesh_get_custom_aabb(RID p_mesh) const; + + virtual AABB mesh_get_aabb(RID p_mesh) const; + virtual void mesh_clear(RID p_mesh); + + /* MULTIMESH API */ + + + virtual RID multimesh_create(); + + virtual void multimesh_allocate(RID p_multimesh,int p_instances,VS::MultimeshTransformFormat p_transform_format,VS::MultimeshColorFormat p_color_format,bool p_gen_aabb=true); + virtual int multimesh_get_instance_count(RID p_multimesh) const; + + virtual void multimesh_set_mesh(RID p_multimesh,RID p_mesh); + virtual void multimesh_set_custom_aabb(RID p_multimesh,const AABB& p_aabb); + virtual void multimesh_instance_set_transform(RID p_multimesh,int p_index,const Transform& p_transform); + virtual void multimesh_instance_set_transform_2d(RID p_multimesh,int p_index,const Matrix32& p_transform); + virtual void multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color); + + virtual RID multimesh_get_mesh(RID p_multimesh) const; + virtual AABB multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const; + + virtual Transform multimesh_instance_get_transform(RID p_multimesh,int p_index) const; + virtual Matrix32 multimesh_instance_get_transform_2d(RID p_multimesh,int p_index) const; + virtual Color multimesh_instance_get_color(RID p_multimesh,int p_index) const; + + virtual void multimesh_set_visible_instances(RID p_multimesh,int p_visible); + virtual int multimesh_get_visible_instances(RID p_multimesh) const; + + virtual AABB multimesh_get_aabb(RID p_mesh) const; + + /* IMMEDIATE API */ + + virtual RID immediate_create(); + virtual void immediate_begin(RID p_immediate,VS::PrimitiveType p_rimitive,RID p_texture=RID()); + virtual void immediate_vertex(RID p_immediate,const Vector3& p_vertex); + virtual void immediate_vertex_2d(RID p_immediate,const Vector3& p_vertex); + virtual void immediate_normal(RID p_immediate,const Vector3& p_normal); + virtual void immediate_tangent(RID p_immediate,const Plane& p_tangent); + virtual void immediate_color(RID p_immediate,const Color& p_color); + virtual void immediate_uv(RID p_immediate,const Vector2& tex_uv); + virtual void immediate_uv2(RID p_immediate,const Vector2& tex_uv); + virtual void immediate_end(RID p_immediate); + virtual void immediate_clear(RID p_immediate); + virtual void immediate_set_material(RID p_immediate,RID p_material); + virtual RID immediate_get_material(RID p_immediate) const; + + /* SKELETON API */ + + virtual RID skeleton_create(); + virtual void skeleton_allocate(RID p_skeleton,int p_bones,bool p_2d_skeleton=false); + virtual int skeleton_get_bone_count(RID p_skeleton) const; + virtual void skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform); + virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone); + virtual void skeleton_bone_set_transform_2d(RID p_skeleton,int p_bone, const Matrix32& p_transform); + virtual Matrix32 skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone); + + /* Light API */ + + virtual RID light_create(VS::LightType p_type); + + virtual void light_set_color(RID p_light,const Color& p_color); + virtual void light_set_param(RID p_light,VS::LightParam p_param,float p_value); + virtual void light_set_shadow(RID p_light,bool p_enabled); + virtual void light_set_projector(RID p_light,RID p_texture); + virtual void light_set_attenuation_texure(RID p_light,RID p_texture); + virtual void light_set_negative(RID p_light,bool p_enable); + virtual void light_set_cull_mask(RID p_light,uint32_t p_mask); + virtual void light_set_shader(RID p_light,RID p_shader); + + + virtual void light_directional_set_shadow_mode(RID p_light,VS::LightDirectionalShadowMode p_mode); + + /* PROBE API */ + + virtual RID reflection_probe_create(); + + virtual void reflection_probe_set_intensity(RID p_probe, float p_intensity); + virtual void reflection_probe_set_clip(RID p_probe, float p_near, float p_far); + virtual void reflection_probe_set_min_blend_distance(RID p_probe, float p_distance); + virtual void reflection_probe_set_extents(RID p_probe, const Vector3& p_extents); + virtual void reflection_probe_set_origin_offset(RID p_probe, const Vector3& p_offset); + virtual void reflection_probe_set_enable_parallax_correction(RID p_probe, bool p_enable); + virtual void reflection_probe_set_resolution(RID p_probe, int p_resolution); + virtual void reflection_probe_set_hide_skybox(RID p_probe, bool p_hide); + virtual void reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers); + + + /* ROOM API */ + + virtual RID room_create(); + virtual void room_add_bounds(RID p_room, const DVector& p_convex_polygon,float p_height,const Transform& p_transform); + virtual void room_clear_bounds(); + + /* PORTAL API */ + + // portals are only (x/y) points, forming a convex shape, which its clockwise + // order points outside. (z is 0); + + virtual RID portal_create(); + virtual void portal_set_shape(RID p_portal, const Vector& p_shape); + virtual void portal_set_enabled(RID p_portal, bool p_enabled); + virtual void portal_set_disable_distance(RID p_portal, float p_distance); + virtual void portal_set_disabled_color(RID p_portal, const Color& p_color); + + + /* RENDER TARGET */ + + struct RenderTarget : public RID_Data { + + struct Color { + GLuint fbo; + GLuint color; + } front,back; + + GLuint depth; + + struct Deferred { + GLuint fbo; + GLuint fbo_color; + + GLuint albedo_ao; + GLuint metal_rough_motion; + GLuint normal_special; + } deferred; + + int width,height; + + bool flags[RENDER_TARGET_FLAG_MAX]; + + bool used_in_frame; + + RenderTarget() { + + width=0; + height=0; + depth=0; + front.fbo=0; + back.fbo=0; + deferred.fbo=0; + deferred.fbo_color=0; + used_in_frame=false; + + flags[RENDER_TARGET_VFLIP]=false; + flags[RENDER_TARGET_TRANSPARENT]=false; + flags[RENDER_TARGET_NO_3D]=false; + flags[RENDER_TARGET_NO_SAMPLING]=false; + } + }; + + mutable RID_Owner render_target_owner; + + void _render_target_clear(RenderTarget *rt); + void _render_target_allocate(RenderTarget *rt); + + virtual RID render_target_create(); + virtual void render_target_set_size(RID p_render_target,int p_width, int p_height); + virtual RID render_target_get_texture(RID p_render_target) const; + virtual Image render_target_get_image(RID p_render_target) const; + virtual void render_target_set_flag(RID p_render_target,RenderTargetFlags p_flag,bool p_value); + virtual bool render_target_renedered_in_frame(RID p_render_target); + + /* CANVAS SHADOW */ + + struct CanvasLightShadow : public RID_Data { + + int size; + int height; + GLuint fbo; + GLuint depth; + GLuint distance; //for older devices + }; + + RID_Owner canvas_light_shadow_owner; + + virtual RID canvas_light_shadow_buffer_create(int p_width); + + /* LIGHT SHADOW MAPPING */ + + struct CanvasOccluder : public RID_Data { + + GLuint vertex_id; // 0 means, unconfigured + GLuint index_id; // 0 means, unconfigured + DVector lines; + int len; + }; + + RID_Owner canvas_occluder_owner; + + virtual RID canvas_light_occluder_create(); + virtual void canvas_light_occluder_set_polylines(RID p_occluder, const DVector& p_lines); + + virtual bool free(RID p_rid); + + + struct Frame { + + RenderTarget *current_rt; + + bool clear_request; + Color clear_request_color; + int canvas_draw_commands; + } frame; + + void initialize(); + void finalize(); + + + + RasterizerStorageGLES3(); +}; + + +#endif // RASTERIZERSTORAGEGLES3_H diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp new file mode 100644 index 00000000000..f8c0234943b --- /dev/null +++ b/drivers/gles3/shader_gles3.cpp @@ -0,0 +1,754 @@ +/*************************************************************************/ +/* shader_gles2.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +#include "shader_gles3.h" + + +#include "print_string.h" + +//#define DEBUG_OPENGL + +#ifdef DEBUG_OPENGL + +#define DEBUG_TEST_ERROR(m_section)\ +{\ + uint32_t err = glGetError();\ + if (err) {\ + print_line("OpenGL Error #"+itos(err)+" at: "+m_section);\ + }\ +} +#else + +#define DEBUG_TEST_ERROR(m_section) + +#endif + +ShaderGLES3 *ShaderGLES3::active=NULL; + + + +//#define DEBUG_SHADER + +#ifdef DEBUG_SHADER + +#define DEBUG_PRINT(m_text) print_line(m_text); + +#else + +#define DEBUG_PRINT(m_text) + +#endif + + +void ShaderGLES3::bind_uniforms() { + + if (!uniforms_dirty) { + return; + }; + + // upload default uniforms + const Map::Element *E =uniform_defaults.front(); + + while(E) { + int idx=E->key(); + int location=version->uniform_location[idx]; + + if (location<0) { + E=E->next(); + continue; + + } + + const Variant &v=E->value(); + _set_uniform_variant(location, v); + //print_line("uniform "+itos(location)+" value "+v+ " type "+Variant::get_type_name(v.get_type())); + E=E->next(); + }; + + const Map::Element* C = uniform_cameras.front(); + while (C) { + + int location = version->uniform_location[C->key()]; + if (location<0) { + C=C->next(); + continue; + } + + glUniformMatrix4fv(location,1,false,&(C->get().matrix[0][0])); + C = C->next(); + }; + + uniforms_dirty = false; +}; + +GLint ShaderGLES3::get_uniform_location(int p_idx) const { + + ERR_FAIL_COND_V(!version, -1); + + return version->uniform_location[p_idx]; +}; + +bool ShaderGLES3::bind() { + + if (active!=this || !version || new_conditional_version.key!=conditional_version.key) { + conditional_version=new_conditional_version; + version = get_current_version(); + } else { + + return false; + } + + ERR_FAIL_COND_V(!version,false); + + glUseProgram( version->id ); + + DEBUG_TEST_ERROR("Use Program"); + + active=this; + uniforms_dirty = true; +/* + * why on earth is this code here? + for (int i=0;i& p_code) { + + + int line=1; + String total_code; + + for(int i=0;i lines = String(total_code).split("\n"); + + for(int j=0;jversion==_v->code_version) + return _v; + } else { + return _v; + } + + } + + + + if (!_v) + version_map[conditional_version]=Version(); + + + Version &v = version_map[conditional_version]; + + if (!_v) { + + v.uniform_location = memnew_arr( GLint, uniform_count ); + + } else { + if (v.ok) { + //bye bye shaders + glDeleteShader( v.vert_id ); + glDeleteShader( v.frag_id ); + glDeleteProgram( v.id ); + v.id=0; + } + + } + + v.ok=false; + /* SETUP CONDITIONALS */ + + Vector strings; +#ifdef GLEW_ENABLED + //strings.push_back("#version 330\n"); + strings.push_back("#version 300 es\n"); +#else + strings.push_back("#version 300 es\n"); //ATI requieres this before anything +#endif + + + int define_line_ofs=1; + + for(int j=0;j0 ) { + //do custom code related stuff + + ERR_FAIL_COND_V( !custom_code_map.has( conditional_version.code_version ), NULL ); + cc=&custom_code_map[conditional_version.code_version]; + v.code_version=cc->version; + define_line_ofs+=2; + } + + + /* CREATE PROGRAM */ + + v.id = glCreateProgram(); + + ERR_FAIL_COND_V(v.id==0, NULL); + + /* VERTEX SHADER */ + + + if (cc) { + for(int i=0;icustom_defines.size();i++) { + + strings.push_back(cc->custom_defines[i]); + DEBUG_PRINT("CD #"+itos(i)+": "+String(cc->custom_defines[i])); + } + } + + int strings_base_size=strings.size(); + + //vertex precision is high + strings.push_back("precision highp float;\n"); + strings.push_back("precision highp int;\n"); + +#if 0 + if (cc) { + + String _code_string = "#define VERTEX_SHADER_CODE "+cc->vertex+"\n"; + String _code_globals = "#define VERTEX_SHADER_GLOBALS "+cc->vertex_globals+"\n"; + + code_string=_code_string.ascii(); + code_globals=_code_globals.ascii(); + DEBUG_PRINT( code_globals.get_data() ); + DEBUG_PRINT( code_string.get_data() ); + strings.push_back(code_globals); + strings.push_back(code_string); + } +#endif + + + strings.push_back(vertex_code0.get_data()); + if (cc) { + code_globals=cc->vertex_globals.ascii(); + strings.push_back(code_globals.get_data()); + } + strings.push_back(vertex_code1.get_data()); + + if (cc) { + code_string=cc->vertex.ascii(); + strings.push_back(code_string.get_data()); + } + + strings.push_back(vertex_code2.get_data()); +#ifdef DEBUG_SHADER + + DEBUG_PRINT("\nVertex Code:\n\n"+String(code_string.get_data())); + for(int i=0;ifragment+"\n"; + String _code_globals = "#define FRAGMENT_SHADER_GLOBALS "+cc->fragment_globals+"\n"; + + code_string=_code_string.ascii(); + code_globals=_code_globals.ascii(); + DEBUG_PRINT( code_globals.get_data() ); + DEBUG_PRINT( code_string.get_data() ); + strings.push_back(code_globals); + strings.push_back(code_string); + } +#endif + + + strings.push_back(fragment_code0.get_data()); + if (cc) { + code_globals=cc->fragment_globals.ascii(); + strings.push_back(code_globals.get_data()); + } + strings.push_back(fragment_code1.get_data()); + + if (cc) { + code_string=cc->fragment.ascii(); + strings.push_back(code_string.get_data()); + } + + strings.push_back(fragment_code2.get_data()); + + if (cc) { + code_string2=cc->light.ascii(); + strings.push_back(code_string2.get_data()); + } + + strings.push_back(fragment_code3.get_data()); + +#ifdef DEBUG_SHADER + DEBUG_PRINT("\nFragment Code:\n\n"+String(code_string.get_data())); + for(int i=0;i=0) { + if (texunit_pairs[i].index<0) { + glUniform1i(loc,max_image_units+texunit_pairs[i].index); //negative, goes down + } else { + + glUniform1i(loc,texunit_pairs[i].index); + } + } + } + + // assign uniform block bind points + for (int i=0;i=0) + glUniformBlockBinding(v.id,loc,ubo_pairs[i].index); + } + + if ( cc ) { + + v.custom_uniform_locations.resize(cc->custom_uniforms.size()); + for(int i=0;icustom_uniforms.size();i++) { + + v.custom_uniform_locations[i]=glGetUniformLocation(v.id,String(cc->custom_uniforms[i]).ascii().get_data()); + } + } + + glUseProgram(0); + + + v.ok=true; + + return &v; +} + +GLint ShaderGLES3::get_uniform_location(const String& p_name) const { + + ERR_FAIL_COND_V(!version,-1); + return glGetUniformLocation(version->id,p_name.ascii().get_data()); +} + + +void ShaderGLES3::setup(const char** p_conditional_defines, int p_conditional_count,const char** p_uniform_names,int p_uniform_count, const AttributePair* p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count,const char*p_vertex_code, const char *p_fragment_code,int p_vertex_code_start,int p_fragment_code_start) { + + ERR_FAIL_COND(version); + conditional_version.key=0; + new_conditional_version.key=0; + uniform_count=p_uniform_count; + conditional_count=p_conditional_count; + conditional_defines=p_conditional_defines; + uniform_names=p_uniform_names; + vertex_code=p_vertex_code; + fragment_code=p_fragment_code; + texunit_pairs=p_texunit_pairs; + texunit_pair_count=p_texunit_pair_count; + vertex_code_start=p_vertex_code_start; + fragment_code_start=p_fragment_code_start; + attribute_pairs=p_attribute_pairs; + attribute_pair_count=p_attribute_count; + ubo_pairs=p_ubo_pairs; + ubo_count=p_ubo_pair_count; + + //split vertex and shader code (thank you, retarded shader compiler programmers from you know what company). + { + String globals_tag="\nVERTEX_SHADER_GLOBALS"; + String code_tag="\nVERTEX_SHADER_CODE"; + String code = vertex_code; + int cpos = code.find(globals_tag); + if (cpos==-1) { + vertex_code0=code.ascii(); + } else { + vertex_code0=code.substr(0,cpos).ascii(); + code = code.substr(cpos+globals_tag.length(),code.length()); + + cpos = code.find(code_tag); + + if (cpos==-1) { + vertex_code1=code.ascii(); + } else { + + vertex_code1=code.substr(0,cpos).ascii(); + vertex_code2=code.substr(cpos+code_tag.length(),code.length()).ascii(); + } + } + } + + { + String globals_tag="\nFRAGMENT_SHADER_GLOBALS"; + String code_tag="\nFRAGMENT_SHADER_CODE"; + String light_code_tag="\nLIGHT_SHADER_CODE"; + String code = fragment_code; + int cpos = code.find(globals_tag); + if (cpos==-1) { + fragment_code0=code.ascii(); + } else { + fragment_code0=code.substr(0,cpos).ascii(); + code = code.substr(cpos+globals_tag.length(),code.length()); + + cpos = code.find(code_tag); + + if (cpos==-1) { + fragment_code1=code.ascii(); + } else { + + fragment_code1=code.substr(0,cpos).ascii(); + String code2 = code.substr(cpos+code_tag.length(),code.length()); + + cpos = code2.find(light_code_tag); + if (cpos==-1) { + fragment_code2=code2.ascii(); + } else { + + fragment_code2=code2.substr(0,cpos).ascii(); + fragment_code3 = code2.substr(cpos+light_code_tag.length(),code2.length()).ascii(); + } + } + } + } + +} + +void ShaderGLES3::finish() { + + const VersionKey *V=NULL; + while((V=version_map.next(V))) { + + Version &v=version_map[*V]; + glDeleteShader( v.vert_id ); + glDeleteShader( v.frag_id ); + glDeleteProgram( v.id ); + memdelete_arr( v.uniform_location ); + + } + +} + + +void ShaderGLES3::clear_caches() { + + const VersionKey *V=NULL; + while((V=version_map.next(V))) { + + Version &v=version_map[*V]; + glDeleteShader( v.vert_id ); + glDeleteShader( v.frag_id ); + glDeleteProgram( v.id ); + memdelete_arr( v.uniform_location ); + } + + version_map.clear(); + + custom_code_map.clear(); + version=NULL; + last_custom_code=1; + uniforms_dirty = true; + +} + +uint32_t ShaderGLES3::create_custom_shader() { + + custom_code_map[last_custom_code]=CustomCode(); + custom_code_map[last_custom_code].version=1; + return last_custom_code++; +} + +void ShaderGLES3::set_custom_shader_code(uint32_t p_code_id, const String& p_vertex, const String& p_vertex_globals,const String& p_fragment,const String& p_light, const String& p_fragment_globals,const Vector& p_uniforms,const Vector &p_custom_defines) { + + ERR_FAIL_COND(!custom_code_map.has(p_code_id)); + CustomCode *cc=&custom_code_map[p_code_id]; + + cc->vertex=p_vertex; + cc->vertex_globals=p_vertex_globals; + cc->fragment=p_fragment; + cc->fragment_globals=p_fragment_globals; + cc->light=p_light; + cc->custom_uniforms=p_uniforms; + cc->custom_defines=p_custom_defines; + cc->version++; +} + +void ShaderGLES3::set_custom_shader(uint32_t p_code_id) { + + new_conditional_version.code_version=p_code_id; +} + +void ShaderGLES3::free_custom_shader(uint32_t p_code_id) { + + /* if (! custom_code_map.has( p_code_id )) { + print_line("no code id "+itos(p_code_id)); + } else { + print_line("freed code id "+itos(p_code_id)); + + }*/ + + ERR_FAIL_COND(! custom_code_map.has( p_code_id )); + if (conditional_version.code_version==p_code_id) + conditional_version.code_version=0; //bye + + custom_code_map.erase(p_code_id); + +} + + + +ShaderGLES3::ShaderGLES3() { + version=NULL; + last_custom_code=1; + uniforms_dirty = true; + + glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS,&max_image_units); +} + + +ShaderGLES3::~ShaderGLES3() { + + finish(); +} + + + diff --git a/drivers/gles3/shader_gles3.h b/drivers/gles3/shader_gles3.h new file mode 100644 index 00000000000..7aaf65d4509 --- /dev/null +++ b/drivers/gles3/shader_gles3.h @@ -0,0 +1,377 @@ +/*************************************************************************/ +/* shader_gles2.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +#ifndef SHADER_GLES3_H +#define SHADER_GLES3_H + + +#include "platform_config.h" +#ifndef GLES3_INCLUDE_H +#include +#else +#include GLES3_INCLUDE_H +#endif + +#include "hash_map.h" +#include "map.h" +#include "variant.h" +#include "camera_matrix.h" + +/** + @author Juan Linietsky +*/ + + +class ShaderGLES3 { +protected: + + struct Enum { + + uint64_t mask; + uint64_t shift; + const char *defines[16]; + }; + + struct EnumValue { + + uint64_t set_mask; + uint64_t clear_mask; + }; + + struct AttributePair { + + const char *name; + int index; + }; + + struct UniformPair { + const char* name; + Variant::Type type_hint; + }; + + struct TexUnitPair { + + const char *name; + int index; + }; + + struct UBOPair { + + const char *name; + int index; + }; + + bool uniforms_dirty; +private: + + //@TODO Optimize to a fixed set of shader pools and use a LRU + int uniform_count; + int texunit_pair_count; + int conditional_count; + int ubo_count; + int vertex_code_start; + int fragment_code_start; + int attribute_pair_count; + + struct CustomCode { + + String vertex; + String vertex_globals; + String fragment; + String fragment_globals; + String light; + uint32_t version; + Vector custom_uniforms; + Vector custom_defines; + + }; + + + struct Version { + + GLuint id; + GLuint vert_id; + GLuint frag_id; + GLint *uniform_location; + Vector custom_uniform_locations; + uint32_t code_version; + bool ok; + Version() { code_version=0; ok=false; uniform_location=NULL; } + }; + + Version *version; + + union VersionKey { + + struct { + uint32_t version; + uint32_t code_version; + }; + uint64_t key; + bool operator==(const VersionKey& p_key) const { return key==p_key.key; } + bool operator<(const VersionKey& p_key) const { return key version_map; + + HashMap custom_code_map; + uint32_t last_custom_code; + + + VersionKey conditional_version; + VersionKey new_conditional_version; + + virtual String get_shader_name() const=0; + + const char** conditional_defines; + const char** uniform_names; + const AttributePair *attribute_pairs; + const TexUnitPair *texunit_pairs; + const UBOPair *ubo_pairs; + const char* vertex_code; + const char* fragment_code; + CharString fragment_code0; + CharString fragment_code1; + CharString fragment_code2; + CharString fragment_code3; + + CharString vertex_code0; + CharString vertex_code1; + CharString vertex_code2; + + Version * get_current_version(); + + static ShaderGLES3 *active; + + int max_image_units; + + _FORCE_INLINE_ void _set_uniform_variant(GLint p_uniform,const Variant& p_value) { + + if (p_uniform<0) + return; // do none + switch(p_value.get_type()) { + + case Variant::BOOL: + case Variant::INT: { + + int val=p_value; + glUniform1i( p_uniform, val ); + } break; + case Variant::REAL: { + + real_t val=p_value; + glUniform1f( p_uniform, val ); + } break; + case Variant::COLOR: { + + Color val=p_value; + glUniform4f( p_uniform, val.r, val.g,val.b,val.a ); + } break; + case Variant::VECTOR2: { + + Vector2 val=p_value; + glUniform2f( p_uniform, val.x,val.y ); + } break; + case Variant::VECTOR3: { + + Vector3 val=p_value; + glUniform3f( p_uniform, val.x,val.y,val.z ); + } break; + case Variant::PLANE: { + + Plane val=p_value; + glUniform4f( p_uniform, val.normal.x,val.normal.y,val.normal.z,val.d ); + } break; + case Variant::QUAT: { + + Quat val=p_value; + glUniform4f( p_uniform, val.x,val.y,val.z,val.w ); + } break; + + case Variant::MATRIX32: { + + Matrix32 tr=p_value; + GLfloat matrix[16]={ /* build a 16x16 matrix */ + tr.elements[0][0], + tr.elements[0][1], + 0, + 0, + tr.elements[1][0], + tr.elements[1][1], + 0, + 0, + 0, + 0, + 1, + 0, + tr.elements[2][0], + tr.elements[2][1], + 0, + 1 + }; + + glUniformMatrix4fv(p_uniform,1,false,matrix); + + } break; + case Variant::MATRIX3: + case Variant::TRANSFORM: { + + Transform tr=p_value; + GLfloat matrix[16]={ /* build a 16x16 matrix */ + tr.basis.elements[0][0], + tr.basis.elements[1][0], + tr.basis.elements[2][0], + 0, + tr.basis.elements[0][1], + tr.basis.elements[1][1], + tr.basis.elements[2][1], + 0, + tr.basis.elements[0][2], + tr.basis.elements[1][2], + tr.basis.elements[2][2], + 0, + tr.origin.x, + tr.origin.y, + tr.origin.z, + 1 + }; + + + glUniformMatrix4fv(p_uniform,1,false,matrix); + } break; + default: { ERR_FAIL(); } // do nothing + + } + } + + Map uniform_defaults; + Map uniform_cameras; + + +protected: + + _FORCE_INLINE_ int _get_uniform(int p_which) const; + _FORCE_INLINE_ void _set_conditional(int p_which, bool p_value); + + void setup(const char** p_conditional_defines, int p_conditional_count,const char** p_uniform_names,int p_uniform_count, const AttributePair* p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count,const char*p_vertex_code, const char *p_fragment_code,int p_vertex_code_start,int p_fragment_code_start); + + ShaderGLES3(); +public: + + enum { + CUSTOM_SHADER_DISABLED=0 + }; + + GLint get_uniform_location(const String& p_name) const; + GLint get_uniform_location(int p_uniform) const; + + static _FORCE_INLINE_ ShaderGLES3 *get_active() { return active; }; + bool bind(); + void unbind(); + void bind_uniforms(); + + + inline GLuint get_program() const { return version?version->id:0; } + + void clear_caches(); + + uint32_t create_custom_shader(); + void set_custom_shader_code(uint32_t p_id,const String& p_vertex, const String& p_vertex_globals,const String& p_fragment,const String& p_p_light,const String& p_fragment_globals,const Vector& p_uniforms,const Vector &p_custom_defines); + void set_custom_shader(uint32_t p_id); + void free_custom_shader(uint32_t p_id); + + void set_uniform_default(int p_idx, const Variant& p_value) { + + if (p_value.get_type()==Variant::NIL) { + + uniform_defaults.erase(p_idx); + } else { + + uniform_defaults[p_idx]=p_value; + } + uniforms_dirty = true; + } + + uint32_t get_version() const { return new_conditional_version.version; } + + void set_uniform_camera(int p_idx, const CameraMatrix& p_mat) { + + uniform_cameras[p_idx] = p_mat; + uniforms_dirty = true; + }; + + _FORCE_INLINE_ void set_custom_uniform(int p_idx, const Variant& p_value) { + + ERR_FAIL_COND(!version); + ERR_FAIL_INDEX(p_idx,version->custom_uniform_locations.size()); + _set_uniform_variant( version->custom_uniform_locations[p_idx], p_value ); + } + + _FORCE_INLINE_ GLint get_custom_uniform_location(int p_idx) { + + ERR_FAIL_COND_V(!version,-1); + ERR_FAIL_INDEX_V(p_idx,version->custom_uniform_locations.size(),-1); + return version->custom_uniform_locations[p_idx]; + } + + virtual void init()=0; + void finish(); + + virtual ~ShaderGLES3(); + +}; + + +// called a lot, made inline + + +int ShaderGLES3::_get_uniform(int p_which) const { + + ERR_FAIL_INDEX_V( p_which, uniform_count,-1 ); + ERR_FAIL_COND_V( !version, -1 ); + return version->uniform_location[p_which]; +} + +void ShaderGLES3::_set_conditional(int p_which, bool p_value) { + + ERR_FAIL_INDEX(p_which,conditional_count); + if (p_value) + new_conditional_version.version|=(1<135.0*PI/180.0) { + point = -light_vec; + sh = 0.5+(1.0/8.0); + } else if (angle_to_light>0.0) { + + point = vec2(light_vec.y,-light_vec.x); + sh = 0.25+(1.0/8.0); + } else { + + point = vec2(-light_vec.y,light_vec.x); + sh = 0.75+(1.0/8.0); + + } + + + highp vec4 s = shadow_matrix * vec4(point,0.0,1.0); + s.xyz/=s.w; + su=s.x*0.5+0.5; + sz=s.z*0.5+0.5; + //sz=lightlength(light_vec); + + highp float shadow_attenuation=0.0; + +#ifdef USE_RGBA_SHADOWS + +#define SHADOW_DEPTH(m_tex,m_uv) dot(texture2D((m_tex),(m_uv)),vec4(1.0 / (256.0 * 256.0 * 256.0),1.0 / (256.0 * 256.0),1.0 / 256.0,1) ) + +#else + +#define SHADOW_DEPTH(m_tex,m_uv) (texture2D((m_tex),(m_uv)).r) + +#endif + + + +#ifdef SHADOW_USE_GRADIENT + +#define SHADOW_TEST(m_ofs) { highp float sd = SHADOW_DEPTH(shadow_texture,vec2(m_ofs,sh)); shadow_attenuation+=1.0-smoothstep(sd,sd+shadow_gradient,sz); } + +#else + +#define SHADOW_TEST(m_ofs) { highp float sd = SHADOW_DEPTH(shadow_texture,vec2(m_ofs,sh)); shadow_attenuation+=step(sz,sd); } + +#endif + + +#ifdef SHADOW_FILTER_NEAREST + + SHADOW_TEST(su+shadowpixel_size); + +#endif + + +#ifdef SHADOW_FILTER_PCF3 + + SHADOW_TEST(su+shadowpixel_size); + SHADOW_TEST(su); + SHADOW_TEST(su-shadowpixel_size); + shadow_attenuation/=3.0; + +#endif + + +#ifdef SHADOW_FILTER_PCF5 + + SHADOW_TEST(su+shadowpixel_size*3.0); + SHADOW_TEST(su+shadowpixel_size*2.0); + SHADOW_TEST(su+shadowpixel_size); + SHADOW_TEST(su); + SHADOW_TEST(su-shadowpixel_size); + SHADOW_TEST(su-shadowpixel_size*2.0); + SHADOW_TEST(su-shadowpixel_size*3.0); + shadow_attenuation/=5.0; + +#endif + + +#ifdef SHADOW_FILTER_PCF9 + + SHADOW_TEST(su+shadowpixel_size*4.0); + SHADOW_TEST(su+shadowpixel_size*3.0); + SHADOW_TEST(su+shadowpixel_size*2.0); + SHADOW_TEST(su+shadowpixel_size); + SHADOW_TEST(su); + SHADOW_TEST(su-shadowpixel_size); + SHADOW_TEST(su-shadowpixel_size*2.0); + SHADOW_TEST(su-shadowpixel_size*3.0); + SHADOW_TEST(su-shadowpixel_size*4.0); + shadow_attenuation/=9.0; + +#endif + +#ifdef SHADOW_FILTER_PCF13 + + SHADOW_TEST(su+shadowpixel_size*6.0); + SHADOW_TEST(su+shadowpixel_size*5.0); + SHADOW_TEST(su+shadowpixel_size*4.0); + SHADOW_TEST(su+shadowpixel_size*3.0); + SHADOW_TEST(su+shadowpixel_size*2.0); + SHADOW_TEST(su+shadowpixel_size); + SHADOW_TEST(su); + SHADOW_TEST(su-shadowpixel_size); + SHADOW_TEST(su-shadowpixel_size*2.0); + SHADOW_TEST(su-shadowpixel_size*3.0); + SHADOW_TEST(su-shadowpixel_size*4.0); + SHADOW_TEST(su-shadowpixel_size*5.0); + SHADOW_TEST(su-shadowpixel_size*6.0); + shadow_attenuation/=13.0; + +#endif + + +#if defined(USE_OUTPUT_SHADOW_COLOR) + color=mix(shadow_color,color,shadow_attenuation); +#else + //color*=shadow_attenuation; + color=mix(light_shadow_color,color,shadow_attenuation); +#endif +//use shadows +#endif + } + +//use lighting +#endif +// color.rgb*=color.a; + frag_color = color; + +} + diff --git a/drivers/gles3/shaders/canvas_shadow.glsl b/drivers/gles3/shaders/canvas_shadow.glsl new file mode 100644 index 00000000000..c757990de09 --- /dev/null +++ b/drivers/gles3/shaders/canvas_shadow.glsl @@ -0,0 +1,49 @@ +[vertex] + + + +uniform highp mat4 projection_matrix; +uniform highp mat4 light_matrix; +uniform highp mat4 world_matrix; +uniform highp float distance_norm; + +layout(location=0) in highp vec3 vertex; + +out highp vec4 position_interp; + +void main() { + + gl_Position = projection_matrix * (light_matrix * (world_matrix * vec4(vertex,1.0))); + position_interp=gl_Position; +} + +[fragment] + +in highp vec4 position_interp; + +#ifdef USE_RGBA_SHADOWS + +layout(location=0) out lowp vec4 distance_buf; + +#else + +layout(location=0) out highp float distance_buf; + +#endif + +void main() { + + highp float depth = ((position_interp.z / position_interp.w) + 1.0) * 0.5 + 0.0;//bias; + +#ifdef USE_RGBA_SHADOWS + + highp vec4 comp = fract(depth * vec4(256.0 * 256.0 * 256.0, 256.0 * 256.0, 256.0, 1.0)); + comp -= comp.xxyz * vec4(0, 1.0 / 256.0, 1.0 / 256.0, 1.0 / 256.0); + distance_buf=comp; +#else + + distance_buf=depth; + +#endif +} + diff --git a/drivers/gles3/shaders/copy.glsl b/drivers/gles3/shaders/copy.glsl new file mode 100644 index 00000000000..aba280186ac --- /dev/null +++ b/drivers/gles3/shaders/copy.glsl @@ -0,0 +1,52 @@ +[vertex] + + +layout(location=0) in highp vec4 vertex_attrib; +#ifdef USE_CUBEMAP +layout(location=4) in vec3 cube_in; +#else +layout(location=4) in vec2 uv_in; // attrib:4 +#endif +layout(location=5) in vec2 uv2_in; // attrib:5 + +#ifdef USE_CUBEMAP +out vec3 cube_interp; +#else +out vec2 uv_interp; +#endif + +out vec2 uv2_interp; + +void main() { + +#ifdef USE_CUBEMAP + cube_interp = cube_in; +#else + uv_interp = uv_in; +#endif + uv2_interp = uv2_in; + gl_Position = vertex_attrib; +} + +[fragment] + + +#ifdef USE_CUBEMAP +in vec3 cube_interp; +uniform samplerCube source_cube; +#else +in vec2 uv_interp; +uniform sampler2D source; +#endif + +in vec2 uv2_interp; + +layout(location = 0) vec4 frag_color; //color:0 + +void main() { + + //vec4 color = color_interp; + + frag_color = color; +} + diff --git a/drivers/jpegd/image_loader_jpegd.cpp b/drivers/jpegd/image_loader_jpegd.cpp index 496334605d4..2ea97480343 100644 --- a/drivers/jpegd/image_loader_jpegd.cpp +++ b/drivers/jpegd/image_loader_jpegd.cpp @@ -83,9 +83,9 @@ Error jpeg_load_image_from_buffer(Image *p_image,const uint8_t* p_buffer, int p_ Image::Format fmt; if (comps==1) - fmt=Image::FORMAT_GRAYSCALE; + fmt=Image::FORMAT_L8; else - fmt=Image::FORMAT_RGBA; + fmt=Image::FORMAT_RGBA8; dw = DVector::Write(); p_image->create(image_width,image_height,0,fmt,data); diff --git a/drivers/png/image_loader_png.cpp b/drivers/png/image_loader_png.cpp index 4967b0f9dfb..cd4f2d6529a 100644 --- a/drivers/png/image_loader_png.cpp +++ b/drivers/png/image_loader_png.cpp @@ -113,25 +113,36 @@ Error ImageLoaderPNG::_load_image(void *rf_up,png_rw_ptr p_func,Image *p_image) printf("Color type:%i\n", color); */ + bool update_info=false; + if (depth<8) { //only bit dept 8 per channel is handled png_set_packing(png); + update_info=true; + }; + if (png_get_color_type(png,info)==PNG_COLOR_TYPE_PALETTE) { + png_set_palette_to_rgb(png); + update_info=true; + } + if (depth > 8) { png_set_strip_16(png); - png_read_update_info(png, info); + update_info=true; } if (png_get_valid(png,info,PNG_INFO_tRNS)) { // png_set_expand_gray_1_2_4_to_8(png); png_set_tRNS_to_alpha(png); + update_info=true; + } + + if (update_info) { png_read_update_info(png, info); png_get_IHDR(png, info, &width, &height, &depth, &color, NULL, NULL, NULL); } - int palette_colors = 0; - int palette_components = 0; int components = 0; Image::Format fmt; @@ -140,38 +151,24 @@ Error ImageLoaderPNG::_load_image(void *rf_up,png_rw_ptr p_func,Image *p_image) case PNG_COLOR_TYPE_GRAY: { - fmt=Image::FORMAT_GRAYSCALE; + fmt=Image::FORMAT_L8; components=1; } break; case PNG_COLOR_TYPE_GRAY_ALPHA: { - fmt=Image::FORMAT_GRAYSCALE_ALPHA; + fmt=Image::FORMAT_LA8; components=2; } break; case PNG_COLOR_TYPE_RGB: { - fmt=Image::FORMAT_RGB; + fmt=Image::FORMAT_RGB8; components=3; } break; case PNG_COLOR_TYPE_RGB_ALPHA: { - fmt=Image::FORMAT_RGBA; + fmt=Image::FORMAT_RGBA8; components=4; - } break; - case PNG_COLOR_TYPE_PALETTE: { - - int ntrans = 0; - png_get_tRNS(png, info, NULL, &ntrans, NULL); - //printf("transparent colors %i\n", ntrans); - - fmt = ntrans > 0 ? Image::FORMAT_INDEXED_ALPHA : Image::FORMAT_INDEXED; - palette_components = ntrans > 0 ? 4 : 3; - components = 1; - - png_colorp colors; - png_get_PLTE(png, info, &colors, &palette_colors); - - } break; + } break; default: { ERR_PRINT("INVALID PNG TYPE"); @@ -185,7 +182,7 @@ Error ImageLoaderPNG::_load_image(void *rf_up,png_rw_ptr p_func,Image *p_image) DVector dstbuff; - dstbuff.resize( rowsize * height + palette_components * 256 ); // alloc the entire palette? - yes always + dstbuff.resize( rowsize * height ); DVector::Write dstbuff_write = dstbuff.write(); @@ -199,38 +196,6 @@ Error ImageLoaderPNG::_load_image(void *rf_up,png_rw_ptr p_func,Image *p_image) png_read_image(png, (png_bytep*)row_p); - if (palette_colors) { - - uint8_t *r_pal = &data[components*width*height]; // end of the array - png_colorp colors; - int num; - png_get_PLTE(png, info, &colors, &num); - - int ofs = 0; - for (int i=0; i < palette_colors; i++) { - - r_pal[ofs + 0] = colors[i].red; - r_pal[ofs + 1] = colors[i].green; - r_pal[ofs + 2] = colors[i].blue; - if (palette_components == 4) { - r_pal[ofs + 3] = 255; - }; - ofs += palette_components; - }; - - if (fmt == Image::FORMAT_INDEXED_ALPHA) { - png_color_16p alphas; - png_bytep alpha_idx; - int count; - png_get_tRNS(png, info, &alpha_idx, &count, &alphas); - for (int i=0; i> 8; - r_pal[i*4+3] = alpha_idx[i]; - }; - }; - }; memdelete_arr( row_p ); @@ -324,11 +289,11 @@ static DVector _lossless_pack_png(const Image& p_image) { Image img = p_image; - if (img.get_format() > Image::FORMAT_INDEXED_ALPHA) + if (img.is_compressed()) img.decompress(); - ERR_FAIL_COND_V(img.get_format() > Image::FORMAT_INDEXED_ALPHA, DVector()); + ERR_FAIL_COND_V(img.is_compressed(), DVector()); png_structp png_ptr; png_infop info_ptr; @@ -365,22 +330,22 @@ static DVector _lossless_pack_png(const Image& p_image) { switch(img.get_format()) { - case Image::FORMAT_GRAYSCALE: { + case Image::FORMAT_L8: { pngf=PNG_COLOR_TYPE_GRAY; cs=1; } break; - case Image::FORMAT_GRAYSCALE_ALPHA: { + case Image::FORMAT_LA8: { pngf=PNG_COLOR_TYPE_GRAY_ALPHA; cs=2; } break; - case Image::FORMAT_RGB: { + case Image::FORMAT_RGB8: { pngf=PNG_COLOR_TYPE_RGB; cs=3; } break; - case Image::FORMAT_RGBA: { + case Image::FORMAT_RGBA8: { pngf=PNG_COLOR_TYPE_RGB_ALPHA; cs=4; @@ -389,12 +354,12 @@ static DVector _lossless_pack_png(const Image& p_image) { if (img.detect_alpha()) { - img.convert(Image::FORMAT_RGBA); + img.convert(Image::FORMAT_RGBA8); pngf=PNG_COLOR_TYPE_RGB_ALPHA; cs=4; } else { - img.convert(Image::FORMAT_RGB); + img.convert(Image::FORMAT_RGB8); pngf=PNG_COLOR_TYPE_RGB; cs=3; } diff --git a/drivers/png/resource_saver_png.cpp b/drivers/png/resource_saver_png.cpp index 2578fe9eb68..c641f6ee85a 100644 --- a/drivers/png/resource_saver_png.cpp +++ b/drivers/png/resource_saver_png.cpp @@ -96,10 +96,10 @@ Error ResourceSaverPNG::save(const String &p_path,const RES& p_resource,uint32_t Error ResourceSaverPNG::save_image(const String &p_path, Image &p_img) { - if (p_img.get_format() > Image::FORMAT_INDEXED_ALPHA) + if (p_img.is_compressed()) p_img.decompress(); - ERR_FAIL_COND_V(p_img.get_format() > Image::FORMAT_INDEXED_ALPHA, ERR_INVALID_PARAMETER); + ERR_FAIL_COND_V(p_img.is_compressed(), ERR_INVALID_PARAMETER); png_structp png_ptr; png_infop info_ptr; @@ -138,22 +138,22 @@ Error ResourceSaverPNG::save_image(const String &p_path, Image &p_img) { switch(p_img.get_format()) { - case Image::FORMAT_GRAYSCALE: { + case Image::FORMAT_L8: { pngf=PNG_COLOR_TYPE_GRAY; cs=1; } break; - case Image::FORMAT_GRAYSCALE_ALPHA: { + case Image::FORMAT_LA8: { pngf=PNG_COLOR_TYPE_GRAY_ALPHA; cs=2; } break; - case Image::FORMAT_RGB: { + case Image::FORMAT_RGB8: { pngf=PNG_COLOR_TYPE_RGB; cs=3; } break; - case Image::FORMAT_RGBA: { + case Image::FORMAT_RGBA8: { pngf=PNG_COLOR_TYPE_RGB_ALPHA; cs=4; @@ -162,12 +162,12 @@ Error ResourceSaverPNG::save_image(const String &p_path, Image &p_img) { if (p_img.detect_alpha()) { - p_img.convert(Image::FORMAT_RGBA); + p_img.convert(Image::FORMAT_RGBA8); pngf=PNG_COLOR_TYPE_RGB_ALPHA; cs=4; } else { - p_img.convert(Image::FORMAT_RGB); + p_img.convert(Image::FORMAT_RGB8); pngf=PNG_COLOR_TYPE_RGB; cs=3; } diff --git a/drivers/pvr/texture_loader_pvr.cpp b/drivers/pvr/texture_loader_pvr.cpp index 3ab32405120..b2063292e91 100644 --- a/drivers/pvr/texture_loader_pvr.cpp +++ b/drivers/pvr/texture_loader_pvr.cpp @@ -116,33 +116,33 @@ RES ResourceFormatPVR::load(const String &p_path,const String& p_original_path,E switch(flags&0xFF) { case 0x18: - case 0xC: format=(flags&PVR_HAS_ALPHA)?Image::FORMAT_PVRTC2_ALPHA:Image::FORMAT_PVRTC2; break; + case 0xC: format=(flags&PVR_HAS_ALPHA)?Image::FORMAT_PVRTC2A:Image::FORMAT_PVRTC2; break; case 0x19: - case 0xD: format=(flags&PVR_HAS_ALPHA)?Image::FORMAT_PVRTC4_ALPHA:Image::FORMAT_PVRTC4; break; + case 0xD: format=(flags&PVR_HAS_ALPHA)?Image::FORMAT_PVRTC4A:Image::FORMAT_PVRTC4; break; case 0x16: - format=Image::FORMAT_GRAYSCALE; break; + format=Image::FORMAT_L8; break; case 0x17: - format=Image::FORMAT_GRAYSCALE_ALPHA; break; + format=Image::FORMAT_LA8; break; case 0x20: case 0x80: case 0x81: - format=Image::FORMAT_BC1; break; + format=Image::FORMAT_DXT1; break; case 0x21: case 0x22: case 0x82: case 0x83: - format=Image::FORMAT_BC2; break; + format=Image::FORMAT_DXT3; break; case 0x23: case 0x24: case 0x84: case 0x85: - format=Image::FORMAT_BC3; break; + format=Image::FORMAT_DXT5; break; case 0x4: case 0x15: - format=Image::FORMAT_RGB; break; + format=Image::FORMAT_RGB8; break; case 0x5: case 0x12: - format=Image::FORMAT_RGBA; break; + format=Image::FORMAT_RGBA8; break; case 0x36: format=Image::FORMAT_ETC; break; default: @@ -198,24 +198,24 @@ static void _compress_pvrtc4(Image * p_img) { bool make_mipmaps=false; if (img.get_width()%8 || img.get_height()%8) { - make_mipmaps=img.get_mipmaps()>0; + make_mipmaps=img.has_mipmaps(); img.resize(img.get_width()+(8-(img.get_width()%8)),img.get_height()+(8-(img.get_height()%8))); } - img.convert(Image::FORMAT_RGBA); - if (img.get_mipmaps()==0 && make_mipmaps) + img.convert(Image::FORMAT_RGBA8); + if (!img.has_mipmaps() && make_mipmaps) img.generate_mipmaps(); bool use_alpha=img.detect_alpha(); Image new_img; - new_img.create(img.get_width(),img.get_height(),true,use_alpha?Image::FORMAT_PVRTC4_ALPHA:Image::FORMAT_PVRTC4); + new_img.create(img.get_width(),img.get_height(),true,use_alpha?Image::FORMAT_PVRTC4A:Image::FORMAT_PVRTC4); DVector data=new_img.get_data(); { DVector::Write wr=data.write(); DVector::Read r=img.get_data().read(); - for(int i=0;i<=new_img.get_mipmaps();i++) { + for(int i=0;i<=new_img.get_mipmap_count();i++) { int ofs,size,w,h; img.get_mipmap_offset_size_and_dimensions(i,ofs,size,w,h); @@ -234,7 +234,7 @@ static void _compress_pvrtc4(Image * p_img) { } - *p_img = Image(new_img.get_width(),new_img.get_height(),new_img.get_mipmaps(),new_img.get_format(),data); + *p_img = Image(new_img.get_width(),new_img.get_height(),new_img.has_mipmaps(),new_img.get_format(),data); } @@ -673,9 +673,9 @@ static void _pvrtc_decompress(Image* p_img) { // decompress_pvrtc((PVRTCBlock*)p_comp_img,p_2bit,p_width,p_height,1,p_dst); // } - ERR_FAIL_COND( p_img->get_format()!=Image::FORMAT_PVRTC2 && p_img->get_format()!=Image::FORMAT_PVRTC2_ALPHA && p_img->get_format()!=Image::FORMAT_PVRTC4 && p_img->get_format()!=Image::FORMAT_PVRTC4_ALPHA); + ERR_FAIL_COND( p_img->get_format()!=Image::FORMAT_PVRTC2 && p_img->get_format()!=Image::FORMAT_PVRTC2A && p_img->get_format()!=Image::FORMAT_PVRTC4 && p_img->get_format()!=Image::FORMAT_PVRTC4A); - bool _2bit = (p_img->get_format()==Image::FORMAT_PVRTC2 || p_img->get_format()==Image::FORMAT_PVRTC2_ALPHA ); + bool _2bit = (p_img->get_format()==Image::FORMAT_PVRTC2 || p_img->get_format()==Image::FORMAT_PVRTC2A ); DVector data = p_img->get_data(); DVector::Read r = data.read(); @@ -694,8 +694,8 @@ static void _pvrtc_decompress(Image* p_img) { w=DVector::Write(); r=DVector::Read(); - bool make_mipmaps=p_img->get_mipmaps()>0; - Image newimg(p_img->get_width(),p_img->get_height(),0,Image::FORMAT_RGBA,newdata); + bool make_mipmaps=p_img->has_mipmaps(); + Image newimg(p_img->get_width(),p_img->get_height(),false,Image::FORMAT_RGBA8,newdata); if (make_mipmaps) newimg.generate_mipmaps(); *p_img=newimg; diff --git a/drivers/squish/image_compress_squish.cpp b/drivers/squish/image_compress_squish.cpp index 95de83d5a5f..2cab6e6059d 100644 --- a/drivers/squish/image_compress_squish.cpp +++ b/drivers/squish/image_compress_squish.cpp @@ -35,7 +35,7 @@ void image_compress_squish(Image *p_image) { int w=p_image->get_width(); int h=p_image->get_height(); - if (p_image->get_mipmaps() == 0) { + if (!p_image->has_mipmaps() ) { ERR_FAIL_COND( !w || w % 4 != 0); ERR_FAIL_COND( !h || h % 4 != 0); } else { @@ -43,26 +43,26 @@ void image_compress_squish(Image *p_image) { ERR_FAIL_COND( !h || h !=nearest_power_of_2(h) ); }; - if (p_image->get_format()>=Image::FORMAT_BC1) + if (p_image->get_format()>=Image::FORMAT_DXT1) return; //do not compress, already compressed int shift=0; int squish_comp=squish::kColourRangeFit; Image::Format target_format; - if (p_image->get_format()==Image::FORMAT_GRAYSCALE_ALPHA) { + if (p_image->get_format()==Image::FORMAT_LA8) { //compressed normalmap - target_format = Image::FORMAT_BC3; squish_comp|=squish::kDxt5;; + target_format = Image::FORMAT_DXT5; squish_comp|=squish::kDxt5;; } else if (p_image->detect_alpha()!=Image::ALPHA_NONE) { - target_format = Image::FORMAT_BC2; squish_comp|=squish::kDxt3;; + target_format = Image::FORMAT_DXT3; squish_comp|=squish::kDxt3;; } else { - target_format = Image::FORMAT_BC1; shift=1; squish_comp|=squish::kDxt1;; + target_format = Image::FORMAT_DXT1; shift=1; squish_comp|=squish::kDxt1;; } - p_image->convert(Image::FORMAT_RGBA); //always expects rgba + p_image->convert(Image::FORMAT_RGBA8); //always expects rgba - int mm_count = p_image->get_mipmaps(); + int mm_count = p_image->get_mipmap_count(); DVector data; int target_size = Image::get_image_data_size(w,h,target_format,mm_count); @@ -85,7 +85,7 @@ void image_compress_squish(Image *p_image) { rb = DVector::Read(); wb = DVector::Write(); - p_image->create(p_image->get_width(),p_image->get_height(),p_image->get_mipmaps(),target_format,data); + p_image->create(p_image->get_width(),p_image->get_height(),p_image->has_mipmaps(),target_format,data); } diff --git a/drivers/theora/video_stream_theora.cpp b/drivers/theora/video_stream_theora.cpp index 1f3832ec16c..7f7449de94e 100644 --- a/drivers/theora/video_stream_theora.cpp +++ b/drivers/theora/video_stream_theora.cpp @@ -105,7 +105,7 @@ void VideoStreamPlaybackTheora::video_write(void){ dst[p++] = 255; }; } - format = Image::FORMAT_RGBA; + format = Image::FORMAT_RGBA8; } // */ @@ -132,10 +132,10 @@ void VideoStreamPlaybackTheora::video_write(void){ yuv420_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[2].data, (uint8_t*)yuv[1].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); }; - format = Image::FORMAT_RGBA; + format = Image::FORMAT_RGBA8; } - Image img(size.x,size.y,0,Image::FORMAT_RGBA,frame_data); //zero copy image creation + Image img(size.x,size.y,0,Image::FORMAT_RGBA8,frame_data); //zero copy image creation texture->set_data(img); //zero copy send to visual server @@ -204,7 +204,7 @@ void VideoStreamPlaybackTheora::video_write(void){ } } - format = Image::FORMAT_RGBA; + format = Image::FORMAT_RGBA8; } else { @@ -472,7 +472,7 @@ void VideoStreamPlaybackTheora::set_file(const String& p_file) { size.x = w; size.y = h; - texture->create(w,h,Image::FORMAT_RGBA,Texture::FLAG_FILTER|Texture::FLAG_VIDEO_SURFACE); + texture->create(w,h,Image::FORMAT_RGBA8,Texture::FLAG_FILTER|Texture::FLAG_VIDEO_SURFACE); }else{ /* tear down the partial theora setup */ diff --git a/drivers/webp/image_loader_webp.cpp b/drivers/webp/image_loader_webp.cpp index 68bb857293f..d0b92851535 100644 --- a/drivers/webp/image_loader_webp.cpp +++ b/drivers/webp/image_loader_webp.cpp @@ -41,9 +41,9 @@ static DVector _webp_lossy_pack(const Image& p_image,float p_quality) { Image img=p_image; if (img.detect_alpha()) - img.convert(Image::FORMAT_RGBA); + img.convert(Image::FORMAT_RGBA8); else - img.convert(Image::FORMAT_RGB); + img.convert(Image::FORMAT_RGB8); Size2 s(img.get_width(),img.get_height()); DVector data = img.get_data(); @@ -51,7 +51,7 @@ static DVector _webp_lossy_pack(const Image& p_image,float p_quality) { uint8_t *dst_buff=NULL; size_t dst_size=0; - if (img.get_format()==Image::FORMAT_RGB) { + if (img.get_format()==Image::FORMAT_RGB8) { dst_size = WebPEncodeRGB(r.ptr(),s.width,s.height,3*s.width,CLAMP(p_quality*100.0,0,100.0),&dst_buff); } else { @@ -108,7 +108,7 @@ static Image _webp_lossy_unpack(const DVector& p_buffer) { dst_w = DVector::Write(); - return Image(features.width,features.height,0,features.has_alpha?Image::FORMAT_RGBA:Image::FORMAT_RGB,dst_image); + return Image(features.width,features.height,0,features.has_alpha?Image::FORMAT_RGBA8:Image::FORMAT_RGB8,dst_image); } @@ -160,7 +160,7 @@ Error ImageLoaderWEBP::load_image(Image *p_image,FileAccess *f) { src_r = DVector::Read(); dst_w = DVector::Write(); - *p_image = Image(features.width,features.height,0,features.has_alpha?Image::FORMAT_RGBA:Image::FORMAT_RGB,dst_image); + *p_image = Image(features.width,features.height,0,features.has_alpha?Image::FORMAT_RGBA8:Image::FORMAT_RGB8,dst_image); return OK; diff --git a/methods.py b/methods.py index c4951c69bdf..7ad42a559a7 100755 --- a/methods.py +++ b/methods.py @@ -123,7 +123,7 @@ def build_glsl_header( filename ): uline=line[:line.lower().find("//")] uline = uline[uline.find("uniform")+len("uniform"):]; uline = uline.replace(";",""); - uline = uline.replace("{",""); + uline = uline.replace("{","").strip(); lines = uline.split(",") for x in lines: @@ -767,9 +767,31 @@ def include_file_in_legacygl_header( filename, header_data, depth ): header_data.texunits+=[(x,texunit)] header_data.texunit_names+=[x] + elif (line.find("uniform")!=-1 and line.lower().find("ubo:")!=-1): + #uniform buffer object + ubostr = line[line.find(":")+1:].strip() + ubo = str(int(ubostr )) + uline=line[:line.lower().find("//")] + uline = uline[uline.find("uniform")+len("uniform"):]; + uline = uline.replace("highp",""); + uline = uline.replace(";",""); + uline = uline.replace("{","").strip(); + lines = uline.split(",") + for x in lines: + + x = x.strip() + x = x[ x.rfind(" ")+1: ] + if (x.find("[")!=-1): + #unfiorm array + x = x[ :x.find("[") ] + + if (not x in header_data.ubo_names): + header_data.ubos+=[(x,ubo)] + header_data.ubo_names+=[x] - elif (line.find("uniform")!=-1): + + elif (line.find("uniform")!=-1 and line.find("{")==-1 and line.find(";")!=-1): uline = line.replace("uniform",""); uline = uline.replace(";",""); lines = uline.split(",") @@ -785,7 +807,7 @@ def include_file_in_legacygl_header( filename, header_data, depth ): header_data.uniforms+=[x] - if ((line.strip().find("in ")==0 or line.strip().find("attribute ")==0) and line.find("attrib:")!=-1): + if ( line.strip().find("attribute ")==0 and line.find("attrib:")!=-1): uline = line.replace("in ",""); uline = uline.replace("attribute ",""); uline = uline.replace("highp ",""); @@ -1036,6 +1058,7 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): else: fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") + if output_attribs: if (len(header_data.attributes)): @@ -1055,6 +1078,14 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): else: fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n") + if (len(header_data.ubos)): + fd.write("\t\tstatic UBOPair _ubo_pairs[]={\n") + for x in header_data.ubos: + fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic UBOPair *_ubo_pairs=NULL;\n") + fd.write("\t\tstatic const char _vertex_code[]={\n") for x in header_data.vertex_lines: for i in range(len(x)): @@ -1077,9 +1108,9 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): fd.write("\t\tstatic const int _fragment_code_start="+str(header_data.fragment_offset)+";\n") if output_attribs: - fd.write("\t\tsetup(_conditional_strings,"+str(len(header_data.conditionals))+",_uniform_strings,"+str(len(header_data.uniforms))+",_attribute_pairs,"+str(len(header_data.attributes))+", _texunit_pairs,"+str(len(header_data.texunits))+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") + fd.write("\t\tsetup(_conditional_strings,"+str(len(header_data.conditionals))+",_uniform_strings,"+str(len(header_data.uniforms))+",_attribute_pairs,"+str(len(header_data.attributes))+", _texunit_pairs,"+str(len(header_data.texunits))+",_ubo_pairs,"+str(len(header_data.ubos))+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") else: - fd.write("\t\tsetup(_conditional_strings,"+str(len(header_data.conditionals))+",_uniform_strings,"+str(len(header_data.uniforms))+",_texunit_pairs,"+str(len(header_data.texunits))+",_enums,"+str(len(header_data.enums))+",_enum_values,"+str(enum_value_count)+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") + fd.write("\t\tsetup(_conditional_strings,"+str(len(header_data.conditionals))+",_uniform_strings,"+str(len(header_data.uniforms))+",_texunit_pairs,"+str(len(header_data.texunits))+",_enums,"+str(len(header_data.enums))+",_enum_values,"+str(enum_value_count)+",_ubo_pairs,"+str(len(header_data.ubos))+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") fd.write("\t};\n\n") @@ -1111,6 +1142,11 @@ def build_gles2_headers( target, source, env ): for x in source: build_legacygl_header(str(x), include="drivers/gles2/shader_gles2.h", class_suffix = "GLES2", output_attribs = True) + +def build_gles3_headers( target, source, env ): + + for x in source: + build_legacygl_header(str(x), include="drivers/gles3/shader_gles3.h", class_suffix = "GLES3", output_attribs = True) def update_version(): diff --git a/modules/gridmap/config.py b/modules/gridmap/config.py index ea7e83378a9..9826689218d 100644 --- a/modules/gridmap/config.py +++ b/modules/gridmap/config.py @@ -1,7 +1,7 @@ def can_build(platform): - return True + return False def configure(env): diff --git a/platform/iphone/rasterizer_iphone.cpp b/platform/iphone/rasterizer_iphone.cpp index 99e83343d09..5478569a426 100644 --- a/platform/iphone/rasterizer_iphone.cpp +++ b/platform/iphone/rasterizer_iphone.cpp @@ -134,21 +134,21 @@ static Image _get_gl_image_and_format(const Image& p_image, Image::Format p_form switch(p_format) { - case Image::FORMAT_GRAYSCALE: { + case Image::FORMAT_L8: { r_gl_components=1; r_gl_format=GL_LUMINANCE; } break; case Image::FORMAT_INTENSITY: { - image.convert(Image::FORMAT_RGBA); + image.convert(Image::FORMAT_RGBA8); r_gl_components=4; r_gl_format=GL_RGBA; r_has_alpha_cache=true; } break; - case Image::FORMAT_GRAYSCALE_ALPHA: { + case Image::FORMAT_LA8: { - image.convert(Image::FORMAT_RGBA); + image.convert(Image::FORMAT_RGBA8); r_gl_components=4; r_gl_format=GL_RGBA; r_has_alpha_cache=true; @@ -156,7 +156,7 @@ static Image _get_gl_image_and_format(const Image& p_image, Image::Format p_form case Image::FORMAT_INDEXED: { - image.convert(Image::FORMAT_RGB); + image.convert(Image::FORMAT_RGB8); r_gl_components=3; r_gl_format=GL_RGB; @@ -164,17 +164,17 @@ static Image _get_gl_image_and_format(const Image& p_image, Image::Format p_form case Image::FORMAT_INDEXED_ALPHA: { - image.convert(Image::FORMAT_RGBA); + image.convert(Image::FORMAT_RGBA8); r_gl_components=4; r_gl_format=GL_RGB; r_has_alpha_cache=true; } break; - case Image::FORMAT_RGB: { + case Image::FORMAT_RGB8: { r_gl_components=3; r_gl_format=GL_RGB; } break; - case Image::FORMAT_RGBA: { + case Image::FORMAT_RGBA8: { r_gl_components=4; r_gl_format=GL_RGBA; @@ -344,7 +344,7 @@ Image::Format RasterizerIPhone::texture_get_format(RID p_texture) const { Texture * texture = texture_owner.get(p_texture); - ERR_FAIL_COND_V(!texture,Image::FORMAT_GRAYSCALE); + ERR_FAIL_COND_V(!texture,Image::FORMAT_L8); return texture->format; } diff --git a/platform/iphone/rasterizer_iphone.h b/platform/iphone/rasterizer_iphone.h index add656b190a..fcbb339ab30 100644 --- a/platform/iphone/rasterizer_iphone.h +++ b/platform/iphone/rasterizer_iphone.h @@ -74,7 +74,7 @@ class RasterizerIPhone : public Rasterizer { flags=width=height=0; tex_id=0; - format=Image::FORMAT_GRAYSCALE; + format=Image::FORMAT_L8; gl_components_cache=0; format_has_alpha=false; has_alpha=false; diff --git a/platform/osx/export/export.cpp b/platform/osx/export/export.cpp index 30f4c58150a..c55bf0eadb5 100644 --- a/platform/osx/export/export.cpp +++ b/platform/osx/export/export.cpp @@ -207,7 +207,7 @@ void EditorExportPlatformOSX::_make_icon(const Image& p_icon,Vector& ic while(size>=16) { Image copy = p_icon; - copy.convert(Image::FORMAT_RGBA); + copy.convert(Image::FORMAT_RGBA8); copy.resize(size,size); it->create_from_image(copy); String path = EditorSettings::get_singleton()->get_settings_path()+"/tmp/icon.png"; diff --git a/platform/osx/os_osx.mm b/platform/osx/os_osx.mm index cc893cc7a04..5a5ddccd606 100644 --- a/platform/osx/os_osx.mm +++ b/platform/osx/os_osx.mm @@ -1313,7 +1313,7 @@ void OS_OSX::set_window_title(const String& p_title) { void OS_OSX::set_icon(const Image& p_icon) { Image img=p_icon; - img.convert(Image::FORMAT_RGBA); + img.convert(Image::FORMAT_RGBA8); NSBitmapImageRep *imgrep= [[[NSBitmapImageRep alloc] initWithBitmapDataPlanes: NULL pixelsWide: p_icon.get_width() pixelsHigh: p_icon.get_height() diff --git a/platform/osx/platform_config.h b/platform/osx/platform_config.h index 86505206aea..dc04e9b4f15 100644 --- a/platform/osx/platform_config.h +++ b/platform/osx/platform_config.h @@ -27,5 +27,7 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ #include + #define GLES2_INCLUDE_H "gl_context/GL/glew.h" +#define GLES3_INCLUDE_H "gl_context/GL/glew.h" #define PTHREAD_RENAME_SELF diff --git a/platform/server/os_server.cpp b/platform/server/os_server.cpp index 5404980ff37..cb00fdaac84 100644 --- a/platform/server/os_server.cpp +++ b/platform/server/os_server.cpp @@ -26,8 +26,8 @@ /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ -#include "servers/visual/visual_server_raster.h" -#include "servers/visual/rasterizer_dummy.h" +//#include "servers/visual/visual_server_raster.h" +//#include "servers/visual/rasterizer_dummy.h" #include "os_server.h" #include #include @@ -57,9 +57,9 @@ void OS_Server::initialize(const VideoMode& p_desired,int p_video_driver,int p_a current_videomode=p_desired; main_loop=NULL; - rasterizer = memnew( RasterizerDummy ); + //rasterizer = memnew( RasterizerDummy ); - visual_server = memnew( VisualServerRaster(rasterizer) ); + //visual_server = memnew( VisualServerRaster(rasterizer) ); AudioDriverManagerSW::get_driver(p_audio_driver)->set_singleton(); @@ -114,7 +114,7 @@ void OS_Server::finalize() { visual_server->finish(); memdelete(visual_server); - memdelete(rasterizer); + //memdelete(rasterizer); physics_server->finish(); memdelete(physics_server); diff --git a/platform/server/os_server.h b/platform/server/os_server.h index 2081d5f2f4b..ed5111d8310 100644 --- a/platform/server/os_server.h +++ b/platform/server/os_server.h @@ -51,7 +51,7 @@ class OS_Server : public OS_Unix { - Rasterizer *rasterizer; +// Rasterizer *rasterizer; VisualServer *visual_server; VideoMode current_videomode; List args; diff --git a/platform/windows/os_windows.cpp b/platform/windows/os_windows.cpp index 35d90a83088..1af7cb2a498 100644 --- a/platform/windows/os_windows.cpp +++ b/platform/windows/os_windows.cpp @@ -2081,8 +2081,8 @@ void OS_Windows::set_icon(const Image& p_icon) { Image icon=p_icon; - if (icon.get_format()!=Image::FORMAT_RGBA) - icon.convert(Image::FORMAT_RGBA); + if (icon.get_format()!=Image::FORMAT_RGBA8) + icon.convert(Image::FORMAT_RGBA8); int w = icon.get_width(); int h = icon.get_height(); diff --git a/platform/windows/platform_config.h b/platform/windows/platform_config.h index 9e207508161..296a778ac82 100644 --- a/platform/windows/platform_config.h +++ b/platform/windows/platform_config.h @@ -31,5 +31,6 @@ //#include //#endif #define GLES2_INCLUDE_H "gl_context/GL/glew.h" +#define GLES3_INCLUDE_H "gl_context/GL/glew.h" diff --git a/platform/x11/context_gl_x11.cpp b/platform/x11/context_gl_x11.cpp index cd325dfc998..ccf7a801eff 100644 --- a/platform/x11/context_gl_x11.cpp +++ b/platform/x11/context_gl_x11.cpp @@ -140,6 +140,7 @@ Error ContextGL_X11::initialize() { static int context_attribs[] = { GLX_CONTEXT_MAJOR_VERSION_ARB, 3, GLX_CONTEXT_MINOR_VERSION_ARB, 0, + GLX_CONTEXT_FLAGS_ARB, GLX_CONTEXT_DEBUG_BIT_ARB, None }; diff --git a/platform/x11/detect.py b/platform/x11/detect.py index ba232f6d4e7..6e3a8af676e 100644 --- a/platform/x11/detect.py +++ b/platform/x11/detect.py @@ -198,9 +198,9 @@ def configure(env): import methods - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + #env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + #env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + #env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) #env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) if (env["use_static_cpp"]=="yes"): diff --git a/platform/x11/os_x11.cpp b/platform/x11/os_x11.cpp index 5f1ab5b4aa3..d2d6e88c4bc 100644 --- a/platform/x11/os_x11.cpp +++ b/platform/x11/os_x11.cpp @@ -27,7 +27,7 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ #include "servers/visual/visual_server_raster.h" -#include "drivers/gles2/rasterizer_gles2.h" +#include "drivers/gles3/rasterizer_gles3.h" #include "os_x11.h" #include "key_mapping_x11.h" #include @@ -74,7 +74,7 @@ int OS_X11::get_video_driver_count() const { } const char * OS_X11::get_video_driver_name(int p_driver) const { - return "GLES2"; + return "GLES3"; } OS::VideoMode OS_X11::get_default_video_mode() const { @@ -203,19 +203,22 @@ void OS_X11::initialize(const VideoMode& p_desired,int p_video_driver,int p_audi //print_line("def videomode "+itos(current_videomode.width)+","+itos(current_videomode.height)); #if defined(OPENGL_ENABLED) || defined(LEGACYGL_ENABLED) + context_gl = memnew( ContextGL_X11( x11_display, x11_window,current_videomode, false ) ); context_gl->initialize(); - rasterizer = memnew( RasterizerGLES2 ); + RasterizerGLES3::register_config(); + + RasterizerGLES3::make_current(); #endif - visual_server = memnew( VisualServerRaster(rasterizer) ); - + visual_server = memnew( VisualServerRaster ); +#if 0 if (get_render_thread_mode()!=RENDER_THREAD_UNSAFE) { visual_server =memnew(VisualServerWrapMT(visual_server,get_render_thread_mode()==RENDER_SEPARATE_THREAD)); } - +#endif // borderless fullscreen window mode if (current_videomode.fullscreen) { // needed for lxde/openbox, possibly others @@ -487,7 +490,7 @@ void OS_X11::finalize() { visual_server->finish(); memdelete(visual_server); - memdelete(rasterizer); + //memdelete(rasterizer); physics_server->finish(); memdelete(physics_server); @@ -1878,7 +1881,7 @@ void OS_X11::set_icon(const Image& p_icon) { if (!p_icon.empty()) { Image img=p_icon; - img.convert(Image::FORMAT_RGBA); + img.convert(Image::FORMAT_RGBA8); int w = img.get_width(); int h = img.get_height(); diff --git a/platform/x11/os_x11.h b/platform/x11/os_x11.h index b27f71406ab..b351309d80a 100644 --- a/platform/x11/os_x11.h +++ b/platform/x11/os_x11.h @@ -34,7 +34,7 @@ #include "drivers/unix/os_unix.h" #include "context_gl_x11.h" #include "servers/visual_server.h" -#include "servers/visual/visual_server_wrap_mt.h" +//#include "servers/visual/visual_server_wrap_mt.h" #include "servers/visual/rasterizer.h" #include "servers/physics_server.h" #include "servers/audio/audio_server_sw.h" @@ -99,7 +99,7 @@ class OS_X11 : public OS_Unix { #if defined(OPENGL_ENABLED) || defined(LEGACYGL_ENABLED) ContextGL_X11 *context_gl; #endif - Rasterizer *rasterizer; + //Rasterizer *rasterizer; VisualServer *visual_server; VideoMode current_videomode; List args; diff --git a/platform/x11/platform_config.h b/platform/x11/platform_config.h index 3b47b2c92d7..1873e712ec7 100644 --- a/platform/x11/platform_config.h +++ b/platform/x11/platform_config.h @@ -35,5 +35,6 @@ #endif #define GLES2_INCLUDE_H "gl_context/GL/glew.h" +#define GLES3_INCLUDE_H "gl_context/GL/glew.h" diff --git a/scene/2d/canvas_item.cpp b/scene/2d/canvas_item.cpp index ed1d606ba8c..07ee1ff7530 100644 --- a/scene/2d/canvas_item.cpp +++ b/scene/2d/canvas_item.cpp @@ -42,9 +42,6 @@ bool CanvasItemMaterial::_set(const StringName& p_name, const Variant& p_value) if (p_name==SceneStringNames::get_singleton()->shader_shader) { set_shader(p_value); return true; - } else if (p_name==SceneStringNames::get_singleton()->shading_mode) { - set_shading_mode(ShadingMode(p_value.operator int())); - return true; } else { if (shader.is_valid()) { @@ -58,7 +55,7 @@ bool CanvasItemMaterial::_set(const StringName& p_name, const Variant& p_value) } } if (pr) { - VisualServer::get_singleton()->canvas_item_material_set_shader_param(material,pr,p_value); + VisualServer::get_singleton()->material_set_param(material,pr,p_value); return true; } } @@ -74,18 +71,14 @@ bool CanvasItemMaterial::_get(const StringName& p_name,Variant &r_ret) const { r_ret=get_shader(); return true; - } else if (p_name==SceneStringNames::get_singleton()->shading_mode) { - - r_ret=shading_mode; - return true; } else { if (shader.is_valid()) { StringName pr = shader->remap_param(p_name); if (pr) { - r_ret=VisualServer::get_singleton()->canvas_item_material_get_shader_param(material,pr); + r_ret=VisualServer::get_singleton()->material_get_param(material,pr); return true; } } @@ -100,7 +93,6 @@ bool CanvasItemMaterial::_get(const StringName& p_name,Variant &r_ret) const { void CanvasItemMaterial::_get_property_list( List *p_list) const { p_list->push_back( PropertyInfo( Variant::OBJECT, "shader/shader", PROPERTY_HINT_RESOURCE_TYPE,"CanvasItemShader,CanvasItemShaderGraph" ) ); - p_list->push_back( PropertyInfo( Variant::INT, "shader/shading_mode",PROPERTY_HINT_ENUM,"Normal,Unshaded,Light Only") ); if (!shader.is_null()) { @@ -119,7 +111,7 @@ void CanvasItemMaterial::set_shader(const Ref& p_shader) { if (shader.is_valid()) rid=shader->get_rid(); - VS::get_singleton()->canvas_item_material_set_shader(material,rid); + VS::get_singleton()->material_set_shader(material,rid); _change_notify(); //properties for shader exposed emit_changed(); } @@ -131,12 +123,12 @@ Ref CanvasItemMaterial::get_shader() const{ void CanvasItemMaterial::set_shader_param(const StringName& p_param,const Variant& p_value){ - VS::get_singleton()->canvas_item_material_set_shader_param(material,p_param,p_value); + VS::get_singleton()->material_set_param(material,p_param,p_value); } Variant CanvasItemMaterial::get_shader_param(const StringName& p_param) const{ - return VS::get_singleton()->canvas_item_material_get_shader_param(material,p_param); + return VS::get_singleton()->material_get_param(material,p_param); } RID CanvasItemMaterial::get_rid() const { @@ -144,16 +136,6 @@ RID CanvasItemMaterial::get_rid() const { return material; } -void CanvasItemMaterial::set_shading_mode(ShadingMode p_mode) { - - shading_mode=p_mode; - VS::get_singleton()->canvas_item_material_set_shading_mode(material,VS::CanvasItemShadingMode(p_mode)); -} - -CanvasItemMaterial::ShadingMode CanvasItemMaterial::get_shading_mode() const { - return shading_mode; -} - void CanvasItemMaterial::_bind_methods() { @@ -161,12 +143,7 @@ void CanvasItemMaterial::_bind_methods() { ObjectTypeDB::bind_method(_MD("get_shader:Shader"),&CanvasItemMaterial::get_shader); ObjectTypeDB::bind_method(_MD("set_shader_param","param","value"),&CanvasItemMaterial::set_shader_param); ObjectTypeDB::bind_method(_MD("get_shader_param","param"),&CanvasItemMaterial::get_shader_param); - ObjectTypeDB::bind_method(_MD("set_shading_mode","mode"),&CanvasItemMaterial::set_shading_mode); - ObjectTypeDB::bind_method(_MD("get_shading_mode"),&CanvasItemMaterial::get_shading_mode); - BIND_CONSTANT( SHADING_NORMAL ); - BIND_CONSTANT( SHADING_UNSHADED ); - BIND_CONSTANT( SHADING_ONLY_LIGHT ); } @@ -189,13 +166,13 @@ void CanvasItemMaterial::get_argument_options(const StringName& p_function,int p CanvasItemMaterial::CanvasItemMaterial() { - material=VS::get_singleton()->canvas_item_material_create(); - shading_mode=SHADING_NORMAL; + + } CanvasItemMaterial::~CanvasItemMaterial(){ - VS::get_singleton()->free(material); + } @@ -374,6 +351,8 @@ Matrix32 CanvasItem::get_global_transform_with_canvas() const { return last_valid->canvas_layer->get_transform() * xform; else if (is_inside_tree()) return get_viewport()->get_canvas_transform() * xform; + + return xform; } Matrix32 CanvasItem::get_global_transform() const { @@ -394,42 +373,17 @@ Matrix32 CanvasItem::get_global_transform() const { } - -void CanvasItem::_queue_sort_children() { - - if (pending_children_sort) - return; - - pending_children_sort=true; - MessageQueue::get_singleton()->push_call(this,"_sort_children"); -} - -void CanvasItem::_sort_children() { - - pending_children_sort=false; +void CanvasItem::_toplevel_raise_self() { if (!is_inside_tree()) return; - for(int i=0;icanvas_item_set_draw_index(canvas_item,canvas_layer->get_sort_index()); + else + VisualServer::get_singleton()->canvas_item_set_draw_index(canvas_item,get_viewport()->gui_get_canvas_sort_index()); - Node *n = get_child(i); - CanvasItem *ci=n->cast_to(); - if (ci) { - if (ci->toplevel || ci->group!="") - continue; - VisualServer::get_singleton()->canvas_item_raise(n->cast_to()->canvas_item); - } - } -} - -void CanvasItem::_raise_self() { - - if (!is_inside_tree()) - return; - - VisualServer::get_singleton()->canvas_item_raise(canvas_item); } @@ -461,14 +415,19 @@ void CanvasItem::_enter_canvas() { group = "root_canvas"+itos(canvas.get_id()); add_to_group(group); - get_tree()->call_group(SceneTree::GROUP_CALL_UNIQUE,group,"_raise_self"); + if (canvas_layer) + canvas_layer->reset_sort_index(); + else + get_viewport()->gui_reset_canvas_sort_index(); + + get_tree()->call_group(SceneTree::GROUP_CALL_UNIQUE,group,"_toplevel_raise_self"); } else { CanvasItem *parent = get_parent_item(); canvas_layer=parent->canvas_layer; VisualServer::get_singleton()->canvas_item_set_parent(canvas_item,parent->get_canvas_item()); - parent->_queue_sort_children(); + VisualServer::get_singleton()->canvas_item_set_draw_index(canvas_item,get_index()); } pending_update=false; @@ -495,7 +454,6 @@ void CanvasItem::_notification(int p_what) { case NOTIFICATION_ENTER_TREE: { first_draw=true; - pending_children_sort=false; if (get_parent()) { CanvasItem *ci = get_parent()->cast_to(); if (ci) @@ -508,13 +466,15 @@ void CanvasItem::_notification(int p_what) { } break; case NOTIFICATION_MOVED_IN_PARENT: { + if (!is_inside_tree()) + break; if (group!="") { - get_tree()->call_group(SceneTree::GROUP_CALL_UNIQUE,group,"_raise_self"); + get_tree()->call_group(SceneTree::GROUP_CALL_UNIQUE,group,"_toplevel_raise_self"); } else { CanvasItem *p = get_parent_item(); ERR_FAIL_COND(!p); - p->_queue_sort_children(); + VisualServer::get_singleton()->canvas_item_set_draw_index(canvas_item,get_index()); } @@ -569,15 +529,15 @@ void CanvasItem::update() { MessageQueue::get_singleton()->push_call(this,"_update_callback"); } -void CanvasItem::set_opacity(float p_opacity) { +void CanvasItem::set_modulate(const Color& p_modulate) { - opacity=p_opacity; - VisualServer::get_singleton()->canvas_item_set_opacity(canvas_item,opacity); + modulate=p_modulate; + VisualServer::get_singleton()->canvas_item_set_modulate(canvas_item,modulate); } -float CanvasItem::get_opacity() const { +Color CanvasItem::get_modulate() const { - return opacity; + return modulate; } @@ -614,29 +574,17 @@ CanvasItem *CanvasItem::get_parent_item() const { } -void CanvasItem::set_self_opacity(float p_self_opacity) { +void CanvasItem::set_self_modulate(const Color& p_self_modulate) { - self_opacity=p_self_opacity; - VisualServer::get_singleton()->canvas_item_set_self_opacity(canvas_item,self_opacity); + self_modulate=p_self_modulate; + VisualServer::get_singleton()->canvas_item_set_self_modulate(canvas_item,self_modulate); } -float CanvasItem::get_self_opacity() const { +Color CanvasItem::get_self_modulate() const { - return self_opacity; + return self_modulate; } -void CanvasItem::set_blend_mode(BlendMode p_blend_mode) { - - ERR_FAIL_INDEX(p_blend_mode,5); - blend_mode=p_blend_mode; - VisualServer::get_singleton()->canvas_item_set_blend_mode(canvas_item,VS::MaterialBlendMode(blend_mode)); - -} - -CanvasItem::BlendMode CanvasItem::get_blend_mode() const { - - return blend_mode; -} void CanvasItem::set_light_mask(int p_light_mask) { @@ -913,7 +861,7 @@ void CanvasItem::set_draw_behind_parent(bool p_enable) { if (behind==p_enable) return; behind=p_enable; - VisualServer::get_singleton()->canvas_item_set_on_top(canvas_item,!behind); + VisualServer::get_singleton()->canvas_item_set_draw_behind_parent(canvas_item,behind); } @@ -983,8 +931,7 @@ Vector2 CanvasItem::get_local_mouse_pos() const{ void CanvasItem::_bind_methods() { - ObjectTypeDB::bind_method(_MD("_sort_children"),&CanvasItem::_sort_children); - ObjectTypeDB::bind_method(_MD("_raise_self"),&CanvasItem::_raise_self); + ObjectTypeDB::bind_method(_MD("_toplevel_raise_self"),&CanvasItem::_toplevel_raise_self); ObjectTypeDB::bind_method(_MD("_update_callback"),&CanvasItem::_update_callback); ObjectTypeDB::bind_method(_MD("_set_visible_"),&CanvasItem::_set_visible_); ObjectTypeDB::bind_method(_MD("_is_visible_"),&CanvasItem::_is_visible_); @@ -1011,16 +958,13 @@ void CanvasItem::_bind_methods() { ObjectTypeDB::bind_method(_MD("set_as_toplevel","enable"),&CanvasItem::set_as_toplevel); ObjectTypeDB::bind_method(_MD("is_set_as_toplevel"),&CanvasItem::is_set_as_toplevel); - ObjectTypeDB::bind_method(_MD("set_blend_mode","blend_mode"),&CanvasItem::set_blend_mode); - ObjectTypeDB::bind_method(_MD("get_blend_mode"),&CanvasItem::get_blend_mode); - ObjectTypeDB::bind_method(_MD("set_light_mask","light_mask"),&CanvasItem::set_light_mask); ObjectTypeDB::bind_method(_MD("get_light_mask"),&CanvasItem::get_light_mask); - ObjectTypeDB::bind_method(_MD("set_opacity","opacity"),&CanvasItem::set_opacity); - ObjectTypeDB::bind_method(_MD("get_opacity"),&CanvasItem::get_opacity); - ObjectTypeDB::bind_method(_MD("set_self_opacity","self_opacity"),&CanvasItem::set_self_opacity); - ObjectTypeDB::bind_method(_MD("get_self_opacity"),&CanvasItem::get_self_opacity); + ObjectTypeDB::bind_method(_MD("set_modulate","modulate"),&CanvasItem::set_modulate); + ObjectTypeDB::bind_method(_MD("get_modulate"),&CanvasItem::get_modulate); + ObjectTypeDB::bind_method(_MD("set_self_modulate","self_modulate"),&CanvasItem::set_self_modulate); + ObjectTypeDB::bind_method(_MD("get_self_modulate"),&CanvasItem::get_self_modulate); ObjectTypeDB::bind_method(_MD("set_draw_behind_parent","enable"),&CanvasItem::set_draw_behind_parent); ObjectTypeDB::bind_method(_MD("is_draw_behind_parent_enabled"),&CanvasItem::is_draw_behind_parent_enabled); @@ -1069,12 +1013,11 @@ void CanvasItem::_bind_methods() { BIND_VMETHOD(MethodInfo("_draw")); ADD_PROPERTYNO( PropertyInfo(Variant::BOOL,"visibility/visible"), _SCS("_set_visible_"),_SCS("_is_visible_") ); - ADD_PROPERTYNO( PropertyInfo(Variant::REAL,"visibility/opacity",PROPERTY_HINT_RANGE, "0,1,0.01"), _SCS("set_opacity"),_SCS("get_opacity") ); - ADD_PROPERTYNO( PropertyInfo(Variant::REAL,"visibility/self_opacity",PROPERTY_HINT_RANGE, "0,1,0.01"), _SCS("set_self_opacity"),_SCS("get_self_opacity") ); + ADD_PROPERTYNO( PropertyInfo(Variant::REAL,"visibility/modulate",PROPERTY_HINT_RANGE, "0,1,0.01"), _SCS("set_modulate"),_SCS("get_modulate") ); + ADD_PROPERTYNO( PropertyInfo(Variant::REAL,"visibility/self_modulate",PROPERTY_HINT_RANGE, "0,1,0.01"), _SCS("set_self_modulate"),_SCS("get_self_modulate") ); ADD_PROPERTYNZ( PropertyInfo(Variant::BOOL,"visibility/behind_parent"), _SCS("set_draw_behind_parent"),_SCS("is_draw_behind_parent_enabled") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"visibility/on_top",PROPERTY_HINT_NONE,"",0), _SCS("_set_on_top"),_SCS("_is_on_top") ); //compatibility - ADD_PROPERTYNZ( PropertyInfo(Variant::INT,"visibility/blend_mode",PROPERTY_HINT_ENUM, "Mix,Add,Sub,Mul,PMAlpha"), _SCS("set_blend_mode"),_SCS("get_blend_mode") ); ADD_PROPERTYNO( PropertyInfo(Variant::INT,"visibility/light_mask",PROPERTY_HINT_ALL_FLAGS), _SCS("set_light_mask"),_SCS("get_light_mask") ); ADD_PROPERTYNZ( PropertyInfo(Variant::OBJECT,"material/material",PROPERTY_HINT_RESOURCE_TYPE, "CanvasItemMaterial"), _SCS("set_material"),_SCS("get_material") ); ADD_PROPERTYNZ( PropertyInfo(Variant::BOOL,"material/use_parent"), _SCS("set_use_parent_material"),_SCS("get_use_parent_material") ); @@ -1176,12 +1119,10 @@ CanvasItem::CanvasItem() : xform_change(this) { canvas_item=VisualServer::get_singleton()->canvas_item_create(); hidden=false; pending_update=false; - opacity=1; - self_opacity=1; + modulate=Color(1,1,1,1); + self_modulate=Color(1,1,1,1); toplevel=false; - pending_children_sort=false; first_draw=false; - blend_mode=BLEND_MODE_MIX; drawing=false; behind=false; block_transform_notify=false; diff --git a/scene/2d/canvas_item.h b/scene/2d/canvas_item.h index 7849a661859..b65c1a60b44 100644 --- a/scene/2d/canvas_item.h +++ b/scene/2d/canvas_item.h @@ -33,6 +33,7 @@ #include "scene/resources/texture.h" #include "scene/main/scene_main_loop.h" #include "scene/resources/shader.h" +#include "scene/resources/material.h" class CanvasLayer; class Viewport; @@ -40,22 +41,19 @@ class Font; class StyleBox; -class CanvasItemMaterial : public Resource{ +class CanvasItemMaterial : public Material{ - OBJ_TYPE(CanvasItemMaterial,Resource); + OBJ_TYPE(CanvasItemMaterial,Material); RID material; Ref shader; public: - enum ShadingMode { + /*enum ShadingMode { SHADING_NORMAL, SHADING_UNSHADED, SHADING_ONLY_LIGHT, - }; + };*/ protected: - - ShadingMode shading_mode; - bool _set(const StringName& p_name, const Variant& p_value); bool _get(const StringName& p_name,Variant &r_ret) const; void _get_property_list( List *p_list) const; @@ -72,15 +70,12 @@ public: void set_shader_param(const StringName& p_param,const Variant& p_value); Variant get_shader_param(const StringName& p_param) const; - void set_shading_mode(ShadingMode p_mode); - ShadingMode get_shading_mode() const; - virtual RID get_rid() const; CanvasItemMaterial(); ~CanvasItemMaterial(); }; -VARIANT_ENUM_CAST( CanvasItemMaterial::ShadingMode ); + class CanvasItem : public Node { @@ -107,8 +102,8 @@ private: CanvasLayer *canvas_layer; - float opacity; - float self_opacity; + Color modulate; + Color self_modulate; List children_items; List::Element *C; @@ -120,7 +115,6 @@ private: bool hidden; bool pending_update; bool toplevel; - bool pending_children_sort; bool drawing; bool block_transform_notify; bool behind; @@ -133,7 +127,7 @@ private: mutable bool global_invalid; - void _raise_self(); + void _toplevel_raise_self(); void _propagate_visibility_changed(bool p_visible); @@ -145,9 +139,6 @@ private: void _enter_canvas(); void _exit_canvas(); - void _queue_sort_children(); - void _sort_children(); - void _notify_transform(CanvasItem *p_node); void _set_on_top(bool p_on_top) { set_draw_behind_parent(!p_on_top); } @@ -193,17 +184,14 @@ public: void update(); - void set_blend_mode(BlendMode p_blend_mode); - BlendMode get_blend_mode() const; - virtual void set_light_mask(int p_light_mask); int get_light_mask() const; - void set_opacity(float p_opacity); - float get_opacity() const; + void set_modulate(const Color& p_modulate); + Color get_modulate() const; - void set_self_opacity(float p_self_opacity); - float get_self_opacity() const; + void set_self_modulate(const Color& p_self_modulate); + Color get_self_modulate() const; /* DRAWING API */ diff --git a/scene/2d/light_2d.cpp b/scene/2d/light_2d.cpp index f37cef673d1..8e5b8ba0a48 100644 --- a/scene/2d/light_2d.cpp +++ b/scene/2d/light_2d.cpp @@ -208,26 +208,26 @@ int Light2D::get_layer_range_max() const { return layer_max; } -void Light2D::set_item_mask( int p_mask) { +void Light2D::set_item_cull_mask( int p_mask) { item_mask=p_mask; - VS::get_singleton()->canvas_light_set_item_mask(canvas_light,item_mask); + VS::get_singleton()->canvas_light_set_item_cull_mask(canvas_light,item_mask); } -int Light2D::get_item_mask() const { +int Light2D::get_item_cull_mask() const { return item_mask; } -void Light2D::set_item_shadow_mask( int p_mask) { +void Light2D::set_item_shadow_cull_mask( int p_mask) { item_shadow_mask=p_mask; - VS::get_singleton()->canvas_light_set_item_shadow_mask(canvas_light,item_shadow_mask); + VS::get_singleton()->canvas_light_set_item_shadow_cull_mask(canvas_light,item_shadow_mask); } -int Light2D::get_item_shadow_mask() const { +int Light2D::get_item_shadow_cull_mask() const { return item_shadow_mask; } @@ -265,17 +265,30 @@ int Light2D::get_shadow_buffer_size() const { return shadow_buffer_size; } -void Light2D::set_shadow_esm_multiplier( float p_multiplier) { +void Light2D::set_shadow_gradient_length( float p_multiplier) { - shadow_esm_multiplier=p_multiplier; - VS::get_singleton()->canvas_light_set_shadow_esm_multiplier(canvas_light,p_multiplier); + shadow_gradient_length=p_multiplier; + VS::get_singleton()->canvas_light_set_shadow_gradient_length(canvas_light,p_multiplier); } -float Light2D::get_shadow_esm_multiplier() const{ +float Light2D::get_shadow_gradient_length() const{ - return shadow_esm_multiplier; + return shadow_gradient_length; } + +void Light2D::set_shadow_filter( ShadowFilter p_filter) { + shadow_filter=p_filter; + VS::get_singleton()->canvas_light_set_shadow_filter(canvas_light,VS::CanvasLightShadowFilter(p_filter )); +} + +Light2D::ShadowFilter Light2D::get_shadow_filter() const { + + return shadow_filter; +} + + + void Light2D::set_shadow_color( const Color& p_shadow_color) { shadow_color=p_shadow_color; VS::get_singleton()->canvas_light_set_shadow_color(canvas_light,shadow_color); @@ -360,11 +373,11 @@ void Light2D::_bind_methods() { ObjectTypeDB::bind_method(_MD("get_layer_range_max"),&Light2D::get_layer_range_max); - ObjectTypeDB::bind_method(_MD("set_item_mask","item_mask"),&Light2D::set_item_mask); - ObjectTypeDB::bind_method(_MD("get_item_mask"),&Light2D::get_item_mask); + ObjectTypeDB::bind_method(_MD("set_item_cull_mask","item_cull_mask"),&Light2D::set_item_cull_mask); + ObjectTypeDB::bind_method(_MD("get_item_cull_mask"),&Light2D::get_item_cull_mask); - ObjectTypeDB::bind_method(_MD("set_item_shadow_mask","item_shadow_mask"),&Light2D::set_item_shadow_mask); - ObjectTypeDB::bind_method(_MD("get_item_shadow_mask"),&Light2D::get_item_shadow_mask); + ObjectTypeDB::bind_method(_MD("set_item_shadow_cull_mask","item_shadow_cull_mask"),&Light2D::set_item_shadow_cull_mask); + ObjectTypeDB::bind_method(_MD("get_item_shadow_cull_mask"),&Light2D::get_item_shadow_cull_mask); ObjectTypeDB::bind_method(_MD("set_mode","mode"),&Light2D::set_mode); ObjectTypeDB::bind_method(_MD("get_mode"),&Light2D::get_mode); @@ -375,8 +388,11 @@ void Light2D::_bind_methods() { ObjectTypeDB::bind_method(_MD("set_shadow_buffer_size","size"),&Light2D::set_shadow_buffer_size); ObjectTypeDB::bind_method(_MD("get_shadow_buffer_size"),&Light2D::get_shadow_buffer_size); - ObjectTypeDB::bind_method(_MD("set_shadow_esm_multiplier","multiplier"),&Light2D::set_shadow_esm_multiplier); - ObjectTypeDB::bind_method(_MD("get_shadow_esm_multiplier"),&Light2D::get_shadow_esm_multiplier); + ObjectTypeDB::bind_method(_MD("set_shadow_gradient_length","multiplier"),&Light2D::set_shadow_gradient_length); + ObjectTypeDB::bind_method(_MD("get_shadow_gradient_length"),&Light2D::get_shadow_gradient_length); + + ObjectTypeDB::bind_method(_MD("set_shadow_filter","filter"),&Light2D::set_shadow_filter); + ObjectTypeDB::bind_method(_MD("get_shadow_filter"),&Light2D::get_shadow_filter); ObjectTypeDB::bind_method(_MD("set_shadow_color","shadow_color"),&Light2D::set_shadow_color); ObjectTypeDB::bind_method(_MD("get_shadow_color"),&Light2D::get_shadow_color); @@ -394,12 +410,13 @@ void Light2D::_bind_methods() { ADD_PROPERTY( PropertyInfo(Variant::INT,"range/z_max",PROPERTY_HINT_RANGE,itos(VS::CANVAS_ITEM_Z_MIN)+","+itos(VS::CANVAS_ITEM_Z_MAX)+",1"),_SCS("set_z_range_max"),_SCS("get_z_range_max")); ADD_PROPERTY( PropertyInfo(Variant::INT,"range/layer_min",PROPERTY_HINT_RANGE,"-512,512,1"),_SCS("set_layer_range_min"),_SCS("get_layer_range_min")); ADD_PROPERTY( PropertyInfo(Variant::INT,"range/layer_max",PROPERTY_HINT_RANGE,"-512,512,1"),_SCS("set_layer_range_max"),_SCS("get_layer_range_max")); - ADD_PROPERTY( PropertyInfo(Variant::INT,"range/item_mask",PROPERTY_HINT_ALL_FLAGS),_SCS("set_item_mask"),_SCS("get_item_mask")); + ADD_PROPERTY( PropertyInfo(Variant::INT,"range/item_cull_mask",PROPERTY_HINT_ALL_FLAGS),_SCS("set_item_cull_mask"),_SCS("get_item_cull_mask")); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"shadow/enabled"),_SCS("set_shadow_enabled"),_SCS("is_shadow_enabled")); ADD_PROPERTY( PropertyInfo(Variant::COLOR,"shadow/color"),_SCS("set_shadow_color"),_SCS("get_shadow_color")); ADD_PROPERTY( PropertyInfo(Variant::INT,"shadow/buffer_size",PROPERTY_HINT_RANGE,"32,16384,1"),_SCS("set_shadow_buffer_size"),_SCS("get_shadow_buffer_size")); - ADD_PROPERTY( PropertyInfo(Variant::REAL,"shadow/esm_multiplier",PROPERTY_HINT_RANGE,"1,4096,0.1"),_SCS("set_shadow_esm_multiplier"),_SCS("get_shadow_esm_multiplier")); - ADD_PROPERTY( PropertyInfo(Variant::INT,"shadow/item_mask",PROPERTY_HINT_ALL_FLAGS),_SCS("set_item_shadow_mask"),_SCS("get_item_shadow_mask")); + ADD_PROPERTY( PropertyInfo(Variant::REAL,"shadow/gradient_length",PROPERTY_HINT_RANGE,"1,4096,0.1"),_SCS("set_shadow_gradient_length"),_SCS("get_shadow_gradient_length")); + ADD_PROPERTY( PropertyInfo(Variant::REAL,"shadow/filter",PROPERTY_HINT_ENUM,"None,PCF3,PCF5,PCF9,PCF13"),_SCS("set_shadow_filter"),_SCS("get_shadow_filter")); + ADD_PROPERTY( PropertyInfo(Variant::INT,"shadow/item_cull_mask",PROPERTY_HINT_ALL_FLAGS),_SCS("set_item_shadow_cull_mask"),_SCS("get_item_shadow_cull_mask")); BIND_CONSTANT( MODE_ADD ); BIND_CONSTANT( MODE_SUB ); @@ -425,9 +442,10 @@ Light2D::Light2D() { item_shadow_mask=1; mode=MODE_ADD; shadow_buffer_size=2048; - shadow_esm_multiplier=80; + shadow_gradient_length=0; energy=1.0; shadow_color=Color(0,0,0,0); + shadow_filter=SHADOW_FILTER_NONE; } diff --git a/scene/2d/light_2d.h b/scene/2d/light_2d.h index c03ef96eff0..75dcf67936a 100644 --- a/scene/2d/light_2d.h +++ b/scene/2d/light_2d.h @@ -42,6 +42,14 @@ public: MODE_MASK, }; + enum ShadowFilter { + SHADOW_FILTER_NONE, + SHADOW_FILTER_PCF3, + SHADOW_FILTER_PCF5, + SHADOW_FILTER_PCF9, + SHADOW_FILTER_PCF13, + }; + private: RID canvas_light; bool enabled; @@ -58,10 +66,12 @@ private: int item_mask; int item_shadow_mask; int shadow_buffer_size; - float shadow_esm_multiplier; + float shadow_gradient_length; Mode mode; Ref texture; Vector2 texture_offset; + ShadowFilter shadow_filter; + void _update_light_visibility(); protected: @@ -108,11 +118,11 @@ public: void set_layer_range_max( int p_max_layer); int get_layer_range_max() const; - void set_item_mask( int p_mask); - int get_item_mask() const; + void set_item_cull_mask( int p_mask); + int get_item_cull_mask() const; - void set_item_shadow_mask( int p_mask); - int get_item_shadow_mask() const; + void set_item_shadow_cull_mask( int p_mask); + int get_item_shadow_cull_mask() const; void set_mode( Mode p_mode ); Mode get_mode() const; @@ -123,8 +133,11 @@ public: void set_shadow_buffer_size( int p_size ); int get_shadow_buffer_size() const; - void set_shadow_esm_multiplier( float p_multiplier); - float get_shadow_esm_multiplier() const; + void set_shadow_gradient_length( float p_multiplier); + float get_shadow_gradient_length() const; + + void set_shadow_filter( ShadowFilter p_filter); + ShadowFilter get_shadow_filter() const; void set_shadow_color( const Color& p_shadow_color); Color get_shadow_color() const; @@ -139,5 +152,7 @@ public: }; VARIANT_ENUM_CAST(Light2D::Mode); +VARIANT_ENUM_CAST(Light2D::ShadowFilter); + #endif // LIGHT_2D_H diff --git a/scene/2d/sprite.cpp b/scene/2d/sprite.cpp index c5b338bf59c..00ce59a6f68 100644 --- a/scene/2d/sprite.cpp +++ b/scene/2d/sprite.cpp @@ -376,7 +376,7 @@ Sprite::Sprite() { /// /// - +#if 0 void ViewportSprite::edit_set_pivot(const Point2& p_pivot) { set_offset(p_pivot); @@ -588,3 +588,4 @@ ViewportSprite::ViewportSprite() { centered=true; modulate=Color(1,1,1,1); } +#endif diff --git a/scene/2d/sprite.h b/scene/2d/sprite.h index 32d3f476d1c..2e7b753a04e 100644 --- a/scene/2d/sprite.h +++ b/scene/2d/sprite.h @@ -107,6 +107,7 @@ public: Sprite(); }; +#if 0 class ViewportSprite : public Node2D { OBJ_TYPE( ViewportSprite, Node2D ); @@ -149,4 +150,5 @@ public: ViewportSprite(); }; +#endif #endif // SPRITE_H diff --git a/scene/2d/tile_map.cpp b/scene/2d/tile_map.cpp index 1a4f88c30e1..972ce8378a0 100644 --- a/scene/2d/tile_map.cpp +++ b/scene/2d/tile_map.cpp @@ -477,7 +477,7 @@ void TileMap::_update_dirty_quadrants() { _fix_cell_transform(xform,c,shape_ofs+center_ofs,s); - if (debug_canvas_item) { + if (debug_canvas_item.is_valid()) { vs->canvas_item_add_set_transform(debug_canvas_item,xform); shape->draw(debug_canvas_item,debug_collision_color); @@ -488,7 +488,7 @@ void TileMap::_update_dirty_quadrants() { } } - if (debug_canvas_item) { + if (debug_canvas_item.is_valid()) { vs->canvas_item_add_set_transform(debug_canvas_item,Matrix32()); } @@ -541,26 +541,19 @@ void TileMap::_update_dirty_quadrants() { if (quadrant_order_dirty) { + int index=-0x80000000; //always must be drawn below children for (Map::Element *E=quadrant_map.front();E;E=E->next()) { Quadrant &q=E->get(); for (List::Element *E=q.canvas_items.front();E;E=E->next()) { - VS::get_singleton()->canvas_item_raise(E->get()); + VS::get_singleton()->canvas_item_set_draw_index(E->get(),index++); } } quadrant_order_dirty=false; } - for(int i=0;icast_to(); - - if (c) - VS::get_singleton()->canvas_item_raise(c->get_canvas_item()); - } - _recompute_rect_cache(); } diff --git a/scene/3d/baked_light_instance.cpp b/scene/3d/baked_light_instance.cpp index ca3a309568e..07f0e4ee571 100644 --- a/scene/3d/baked_light_instance.cpp +++ b/scene/3d/baked_light_instance.cpp @@ -29,6 +29,7 @@ #include "baked_light_instance.h" #include "scene/scene_string_names.h" +#if 0 RID BakedLightInstance::get_baked_light_instance() const { @@ -179,3 +180,4 @@ BakedLightSampler::~BakedLightSampler(){ VS::get_singleton()->free(base); } +#endif diff --git a/scene/3d/baked_light_instance.h b/scene/3d/baked_light_instance.h index 002e55df1d8..15f04fea311 100644 --- a/scene/3d/baked_light_instance.h +++ b/scene/3d/baked_light_instance.h @@ -32,6 +32,7 @@ #include "scene/3d/visual_instance.h" #include "scene/resources/baked_light.h" +#if 0 class BakedLightBaker; @@ -101,5 +102,5 @@ public: VARIANT_ENUM_CAST( BakedLightSampler::Param ); - +#endif #endif // BAKED_LIGHT_H diff --git a/scene/3d/camera.cpp b/scene/3d/camera.cpp index e76c0938fb2..1ce07b0264b 100644 --- a/scene/3d/camera.cpp +++ b/scene/3d/camera.cpp @@ -96,8 +96,8 @@ bool Camera::_set(const StringName& p_name, const Variant& p_value) { } else { clear_current(); } - } else if (p_name=="visible_layers") { - set_visible_layers(p_value); + } else if (p_name=="cull_mask") { + set_cull_mask(p_value); } else if (p_name=="environment") { set_environment(p_value); } else @@ -130,8 +130,8 @@ bool Camera::_get(const StringName& p_name,Variant &r_ret) const { } else { r_ret=is_current(); } - } else if (p_name=="visible_layers") { - r_ret=get_visible_layers(); + } else if (p_name=="cull_mask") { + r_ret=get_cull_mask(); } else if (p_name=="h_offset") { r_ret=get_h_offset(); } else if (p_name=="v_offset") { @@ -176,7 +176,7 @@ void Camera::_get_property_list( List *p_list) const { p_list->push_back( PropertyInfo( Variant::REAL, "far" , PROPERTY_HINT_EXP_RANGE, "0.01,4096.0,0.01") ); p_list->push_back( PropertyInfo( Variant::INT, "keep_aspect",PROPERTY_HINT_ENUM,"Keep Width,Keep Height") ); p_list->push_back( PropertyInfo( Variant::BOOL, "current" ) ); - p_list->push_back( PropertyInfo( Variant::INT, "visible_layers",PROPERTY_HINT_ALL_FLAGS ) ); + p_list->push_back( PropertyInfo( Variant::INT, "cull_mask",PROPERTY_HINT_ALL_FLAGS ) ); p_list->push_back( PropertyInfo( Variant::OBJECT, "environment",PROPERTY_HINT_RESOURCE_TYPE,"Environment" ) ); p_list->push_back( PropertyInfo( Variant::REAL, "h_offset" ) ); p_list->push_back( PropertyInfo( Variant::REAL, "v_offset" ) ); @@ -342,91 +342,6 @@ bool Camera::_can_gizmo_scale() const { } -RES Camera::_get_gizmo_geometry() const { - - - Ref surface_tool( memnew( SurfaceTool )); - - Ref mat( memnew( FixedMaterial )); - - mat->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(1.0,0.5,1.0,0.5) ); - mat->set_line_width(4); - mat->set_flag(Material::FLAG_DOUBLE_SIDED,true); - mat->set_flag(Material::FLAG_UNSHADED,true); - //mat->set_hint(Material::HINT_NO_DEPTH_DRAW,true); - - surface_tool->begin(Mesh::PRIMITIVE_LINES); - surface_tool->set_material(mat); - - switch(mode) { - - case PROJECTION_PERSPECTIVE: { - - - - Vector3 side=Vector3( Math::sin(Math::deg2rad(fov)), 0, -Math::cos(Math::deg2rad(fov)) ); - Vector3 nside=side; - nside.x=-nside.x; - Vector3 up=Vector3(0,side.x,0); - - -#define ADD_TRIANGLE( m_a, m_b, m_c)\ -{\ - surface_tool->add_vertex(m_a);\ - surface_tool->add_vertex(m_b);\ - surface_tool->add_vertex(m_b);\ - surface_tool->add_vertex(m_c);\ - surface_tool->add_vertex(m_c);\ - surface_tool->add_vertex(m_a);\ -} - - ADD_TRIANGLE( Vector3(), side+up, side-up ); - ADD_TRIANGLE( Vector3(), nside+up, nside-up ); - ADD_TRIANGLE( Vector3(), side+up, nside+up ); - ADD_TRIANGLE( Vector3(), side-up, nside-up ); - - side.x*=0.25; - nside.x*=0.25; - Vector3 tup( 0, up.y*3/2,side.z); - ADD_TRIANGLE( tup, side+up, nside+up ); - - } break; - case PROJECTION_ORTHOGONAL: { - -#define ADD_QUAD( m_a, m_b, m_c, m_d)\ -{\ - surface_tool->add_vertex(m_a);\ - surface_tool->add_vertex(m_b);\ - surface_tool->add_vertex(m_b);\ - surface_tool->add_vertex(m_c);\ - surface_tool->add_vertex(m_c);\ - surface_tool->add_vertex(m_d);\ - surface_tool->add_vertex(m_d);\ - surface_tool->add_vertex(m_a);\ -} - - float hsize=size*0.5; - Vector3 right(hsize,0,0); - Vector3 up(0,hsize,0); - Vector3 back(0,0,-1.0); - Vector3 front(0,0,0); - - ADD_QUAD( -up-right,-up+right,up+right,up-right); - ADD_QUAD( -up-right+back,-up+right+back,up+right+back,up-right+back); - ADD_QUAD( up+right,up+right+back,up-right+back,up-right); - ADD_QUAD( -up+right,-up+right+back,-up-right+back,-up-right); - - right.x*=0.25; - Vector3 tup( 0, up.y*3/2,back.z ); - ADD_TRIANGLE( tup, right+up+back, -right+up+back ); - - } break; - - } - - return surface_tool->commit(); - -} Vector3 Camera::project_ray_normal(const Point2& p_pos) const { @@ -648,8 +563,8 @@ void Camera::_bind_methods() { ObjectTypeDB::bind_method( _MD("get_zfar"),&Camera::get_zfar ); ObjectTypeDB::bind_method( _MD("get_znear"),&Camera::get_znear ); ObjectTypeDB::bind_method( _MD("get_projection"),&Camera::get_projection ); - ObjectTypeDB::bind_method( _MD("set_visible_layers","mask"),&Camera::set_visible_layers ); - ObjectTypeDB::bind_method( _MD("get_visible_layers"),&Camera::get_visible_layers ); + ObjectTypeDB::bind_method( _MD("set_cull_mask","mask"),&Camera::set_cull_mask ); + ObjectTypeDB::bind_method( _MD("get_cull_mask"),&Camera::get_cull_mask ); ObjectTypeDB::bind_method(_MD("set_environment","env:Environment"),&Camera::set_environment); ObjectTypeDB::bind_method(_MD("get_environment:Environment"),&Camera::get_environment); ObjectTypeDB::bind_method(_MD("set_keep_aspect_mode","mode"),&Camera::set_keep_aspect_mode); @@ -690,13 +605,13 @@ Camera::Projection Camera::get_projection() const { return mode; } -void Camera::set_visible_layers(uint32_t p_layers) { +void Camera::set_cull_mask(uint32_t p_layers) { layers=p_layers; - VisualServer::get_singleton()->camera_set_visible_layers(camera,layers); + VisualServer::get_singleton()->camera_set_cull_mask(camera,layers); } -uint32_t Camera::get_visible_layers() const{ +uint32_t Camera::get_cull_mask() const{ return layers; } @@ -757,7 +672,7 @@ Camera::Camera() { layers=0xfffff; v_offset=0; h_offset=0; - VisualServer::get_singleton()->camera_set_visible_layers(camera,layers); + VisualServer::get_singleton()->camera_set_cull_mask(camera,layers); //active=false; } diff --git a/scene/3d/camera.h b/scene/3d/camera.h index 30c6928245e..3ea0bd711cc 100644 --- a/scene/3d/camera.h +++ b/scene/3d/camera.h @@ -75,7 +75,7 @@ private: Ref environment; virtual bool _can_gizmo_scale() const; - virtual RES _get_gizmo_geometry() const; + //void _camera_make_current(Node *p_camera); @@ -126,8 +126,8 @@ public: bool is_position_behind(const Vector3& p_pos) const; Vector3 project_position(const Point2& p_point) const; - void set_visible_layers(uint32_t p_layers); - uint32_t get_visible_layers() const; + void set_cull_mask(uint32_t p_layers); + uint32_t get_cull_mask() const; Vector get_frustum() const; diff --git a/scene/3d/light.cpp b/scene/3d/light.cpp index 227bb3a59d8..d98f1000204 100644 --- a/scene/3d/light.cpp +++ b/scene/3d/light.cpp @@ -32,75 +32,6 @@ #include "scene/resources/surface_tool.h" -static const char* _light_param_names[VS::LIGHT_PARAM_MAX]={ - "params/spot_attenuation", - "params/spot_angle", - "params/radius", - "params/energy", - "params/attenuation", - "shadow/darkening", - "shadow/z_offset", - "shadow/z_slope_scale", - "shadow/esm_multiplier", - "shadow/blur_passes" -}; - -void Light::set_parameter(Parameter p_param, float p_value) { - - ERR_FAIL_INDEX(p_param, PARAM_MAX); - vars[p_param]=p_value; - VisualServer::get_singleton()->light_set_param(light,(VisualServer::LightParam)p_param,p_value); - if (p_param==PARAM_RADIUS || p_param==PARAM_SPOT_ANGLE) - update_gizmo(); - _change_notify(_light_param_names[p_param]); -// _change_notify(_param_names[p_param]); -} - -float Light::get_parameter(Parameter p_param) const { - - ERR_FAIL_INDEX_V(p_param, PARAM_MAX, 0); - return vars[p_param]; - -} - -void Light::set_color(LightColor p_color, const Color& p_value) { - - ERR_FAIL_INDEX(p_color, 3); - colors[p_color]=p_value; - VisualServer::get_singleton()->light_set_color(light,(VisualServer::LightColor)p_color,p_value); - //_change_notify(_color_names[p_color]); - -} -Color Light::get_color(LightColor p_color) const { - - ERR_FAIL_INDEX_V(p_color, 3, Color()); - return colors[p_color]; - -} - - -void Light::set_project_shadows(bool p_enabled) { - - shadows=p_enabled; - VisualServer::get_singleton()->light_set_shadow(light, p_enabled); - _change_notify("shadow"); -} -bool Light::has_project_shadows() const { - - return shadows; -} - -void Light::set_projector(const Ref& p_projector) { - - projector=p_projector; - VisualServer::get_singleton()->light_set_projector(light, projector.is_null()?RID():projector->get_rid()); -} - -Ref Light::get_projector() const { - - return projector; -} - bool Light::_can_gizmo_scale() const { @@ -108,220 +39,9 @@ bool Light::_can_gizmo_scale() const { } -static void _make_sphere(int p_lats, int p_lons, float p_radius, Ref p_tool) { - - - p_tool->begin(Mesh::PRIMITIVE_TRIANGLES); - - for(int i = 1; i <= p_lats; i++) { - double lat0 = Math_PI * (-0.5 + (double) (i - 1) / p_lats); - double z0 = Math::sin(lat0); - double zr0 = Math::cos(lat0); - - double lat1 = Math_PI * (-0.5 + (double) i / p_lats); - double z1 = Math::sin(lat1); - double zr1 = Math::cos(lat1); - - for(int j = p_lons; j >= 1; j--) { - - double lng0 = 2 * Math_PI * (double) (j - 1) / p_lons; - double x0 = Math::cos(lng0); - double y0 = Math::sin(lng0); - - double lng1 = 2 * Math_PI * (double) (j) / p_lons; - double x1 = Math::cos(lng1); - double y1 = Math::sin(lng1); - - - Vector3 v[4]={ - Vector3(x1 * zr0, z0, y1 *zr0), - Vector3(x1 * zr1, z1, y1 *zr1), - Vector3(x0 * zr1, z1, y0 *zr1), - Vector3(x0 * zr0, z0, y0 *zr0) - }; - -#define ADD_POINT(m_idx) \ - p_tool->add_normal(v[m_idx]);\ - p_tool->add_vertex(v[m_idx]*p_radius); - - ADD_POINT(0); - ADD_POINT(1); - ADD_POINT(2); - - ADD_POINT(2); - ADD_POINT(3); - ADD_POINT(0); - } - } - -} - -RES Light::_get_gizmo_geometry() const { - - - Ref mat_area( memnew( FixedMaterial )); - - mat_area->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(0.7,0.6,0.0,0.05) ); - mat_area->set_parameter( FixedMaterial::PARAM_EMISSION,Color(0.7,0.7,0.7) ); - mat_area->set_blend_mode( Material::BLEND_MODE_ADD ); - mat_area->set_flag(Material::FLAG_DOUBLE_SIDED,true); -// mat_area->set_hint(Material::HINT_NO_DEPTH_DRAW,true); - - Ref mat_light( memnew( FixedMaterial )); - - mat_light->set_parameter( FixedMaterial::PARAM_DIFFUSE, Color(1.0,1.0,0.8,0.9) ); - mat_light->set_flag(Material::FLAG_UNSHADED,true); - - Ref< Mesh > mesh; - - Ref surftool( memnew( SurfaceTool )); - - switch(type) { - - case VisualServer::LIGHT_DIRECTIONAL: { - - - mat_area->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(0.9,0.8,0.1,0.8) ); - mat_area->set_blend_mode( Material::BLEND_MODE_MIX); - mat_area->set_flag(Material::FLAG_DOUBLE_SIDED,false); - mat_area->set_flag(Material::FLAG_UNSHADED,true); - - _make_sphere( 5,5,0.6, surftool ); - surftool->set_material(mat_light); - mesh=surftool->commit(mesh); - - // float radius=1; - - surftool->begin(Mesh::PRIMITIVE_TRIANGLES); - - const int arrow_points=5; - Vector3 arrow[arrow_points]={ - Vector3(0,0,2), - Vector3(1,1,2), - Vector3(1,1,-1), - Vector3(2,2,-1), - Vector3(0,0,-3) - }; - - int arrow_sides=4; - - - for(int i = 0; i < arrow_sides ; i++) { - - - Matrix3 ma(Vector3(0,0,1),Math_PI*2*float(i)/arrow_sides); - Matrix3 mb(Vector3(0,0,1),Math_PI*2*float(i+1)/arrow_sides); - - - for(int j=0;jadd_normal(n); - surftool->add_vertex(points[0]); - surftool->add_normal(n); - surftool->add_vertex(points[1]); - surftool->add_normal(n); - surftool->add_vertex(points[2]); - - surftool->add_normal(n); - surftool->add_vertex(points[0]); - surftool->add_normal(n); - surftool->add_vertex(points[2]); - surftool->add_normal(n); - surftool->add_vertex(points[3]); - - - } - - - } - - surftool->set_material(mat_area); - mesh=surftool->commit(mesh); - - - - } break; - case VisualServer::LIGHT_OMNI: { - - - _make_sphere( 20,20,vars[PARAM_RADIUS], surftool ); - surftool->set_material(mat_area); - mesh=surftool->commit(mesh); - _make_sphere(5,5, 0.1, surftool ); - surftool->set_material(mat_light); - mesh=surftool->commit(mesh); - } break; - - case VisualServer::LIGHT_SPOT: { - - _make_sphere( 5,5,0.1, surftool ); - surftool->set_material(mat_light); - mesh=surftool->commit(mesh); - - // make cone - int points=24; - float len=vars[PARAM_RADIUS]; - float size=Math::tan(Math::deg2rad(vars[PARAM_SPOT_ANGLE]))*len; - - surftool->begin(Mesh::PRIMITIVE_TRIANGLES); - - for(int i = 0; i < points; i++) { - - float x0=Math::sin(i * Math_PI * 2 / points); - float y0=Math::cos(i * Math_PI * 2 / points); - float x1=Math::sin((i+1) * Math_PI * 2 / points); - float y1=Math::cos((i+1) * Math_PI * 2 / points); - - Vector3 v1=Vector3(x0*size,y0*size,-len).normalized()*len; - Vector3 v2=Vector3(x1*size,y1*size,-len).normalized()*len; - - Vector3 v3=Vector3(0,0,0); - Vector3 v4=Vector3(0,0,v1.z); - - Vector3 n = Plane(v1,v2,v3).normal; - - - surftool->add_normal(n); - surftool->add_vertex(v1); - surftool->add_normal(n); - surftool->add_vertex(v2); - surftool->add_normal(n); - surftool->add_vertex(v3); - - n=Vector3(0,0,-1); - - surftool->add_normal(n); - surftool->add_vertex(v1); - surftool->add_normal(n); - surftool->add_vertex(v2); - surftool->add_normal(n); - surftool->add_vertex(v4); - - - } - - surftool->set_material(mat_area); - mesh=surftool->commit(mesh); - - - } break; - } - - return mesh; -} - - AABB Light::get_aabb() const { +#if 0 if (type==VisualServer::LIGHT_DIRECTIONAL) { return AABB( Vector3(-1,-1,-1), Vector3(2, 2, 2 ) ); @@ -336,7 +56,7 @@ AABB Light::get_aabb() const { float size=Math::tan(Math::deg2rad(vars[PARAM_SPOT_ANGLE]))*len; return AABB( Vector3( -size,-size,-len ), Vector3( size*2, size*2, len ) ); } - +#endif return AABB(); } @@ -346,89 +66,6 @@ DVector Light::get_faces(uint32_t p_usage_flags) const { } -void Light::set_operator(Operator p_op) { - ERR_FAIL_INDEX(p_op,2); - op=p_op; - VisualServer::get_singleton()->light_set_operator(light,VS::LightOp(op)); - -} - -void Light::set_bake_mode(BakeMode p_bake_mode) { - - bake_mode=p_bake_mode; -} - -Light::BakeMode Light::get_bake_mode() const { - - return bake_mode; -} - - -Light::Operator Light::get_operator() const { - - return op; -} - -void Light::approximate_opengl_attenuation(float p_constant, float p_linear, float p_quadratic,float p_radius_treshold) { - - //this is horrible and must never be used - - float a = p_quadratic * p_radius_treshold; - float b = p_linear * p_radius_treshold; - float c = p_constant * p_radius_treshold -1; - - float radius=10000; - - if(a == 0) { // solve linear - float d = Math::abs(-c/b); - if(d=0) { - - root = sqrt(root); - - float solution1 = fabs( (-b + root) / denominator); - float solution2 = fabs( (-b - root) / denominator); - - if(solution1 > radius) - solution1 = radius; - - if(solution2 > radius) - solution2 = radius; - - radius = (solution1 > solution2 ? solution1 : solution2); - } - } - } - - float energy=1.0; - - /*if (p_constant>0) - energy=1.0/p_constant; //energy is this - else - energy=8.0; // some high number.. -*/ - - if (radius==10000) - radius=100; //bug? - - set_parameter(PARAM_RADIUS,radius); - set_parameter(PARAM_ENERGY,energy); - -} - void Light::_update_visibility() { @@ -436,7 +73,7 @@ void Light::_update_visibility() { return; -bool editor_ok=true; + bool editor_ok=true; #ifdef TOOLS_ENABLED if (editor_only) { @@ -448,7 +85,7 @@ bool editor_ok=true; } #endif - VS::get_singleton()->instance_light_set_enabled(get_instance(),is_visible() && enabled && editor_ok); + //VS::get_singleton()->instance_light_set_enabled(get_instance(),is_visible() && editor_ok); _change_notify("geometry/visible"); } @@ -461,16 +98,6 @@ void Light::_notification(int p_what) { } } -void Light::set_enabled(bool p_enabled) { - - enabled=p_enabled; - _update_visibility(); -} - -bool Light::is_enabled() const{ - - return enabled; -} void Light::set_editor_only(bool p_editor_only) { @@ -486,69 +113,16 @@ bool Light::is_editor_only() const{ void Light::_bind_methods() { - ObjectTypeDB::bind_method(_MD("set_parameter","variable","value"), &Light::set_parameter ); - ObjectTypeDB::bind_method(_MD("get_parameter","variable"), &Light::get_parameter ); - ObjectTypeDB::bind_method(_MD("set_color","color","value"), &Light::set_color ); - ObjectTypeDB::bind_method(_MD("get_color","color"), &Light::get_color ); - ObjectTypeDB::bind_method(_MD("set_project_shadows","enable"), &Light::set_project_shadows ); - ObjectTypeDB::bind_method(_MD("has_project_shadows"), &Light::has_project_shadows ); - ObjectTypeDB::bind_method(_MD("set_projector","projector:Texture"), &Light::set_projector ); - ObjectTypeDB::bind_method(_MD("get_projector:Texture"), &Light::get_projector ); - ObjectTypeDB::bind_method(_MD("set_operator","operator"), &Light::set_operator ); - ObjectTypeDB::bind_method(_MD("get_operator"), &Light::get_operator ); - ObjectTypeDB::bind_method(_MD("set_bake_mode","bake_mode"), &Light::set_bake_mode ); - ObjectTypeDB::bind_method(_MD("get_bake_mode"), &Light::get_bake_mode ); - ObjectTypeDB::bind_method(_MD("set_enabled","enabled"), &Light::set_enabled ); - ObjectTypeDB::bind_method(_MD("is_enabled"), &Light::is_enabled ); + ObjectTypeDB::bind_method(_MD("set_editor_only","editor_only"), &Light::set_editor_only ); ObjectTypeDB::bind_method(_MD("is_editor_only"), &Light::is_editor_only ); - ADD_PROPERTY( PropertyInfo( Variant::BOOL, "params/enabled"), _SCS("set_enabled"), _SCS("is_enabled")); ADD_PROPERTY( PropertyInfo( Variant::BOOL, "params/editor_only"), _SCS("set_editor_only"), _SCS("is_editor_only")); - ADD_PROPERTY( PropertyInfo( Variant::INT, "params/bake_mode",PROPERTY_HINT_ENUM,"Disabled,Indirect,Indirect+Shadows,Full"), _SCS("set_bake_mode"), _SCS("get_bake_mode")); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/energy", PROPERTY_HINT_EXP_RANGE, "0,64,0.01"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_ENERGY ); - /* - if (type == VisualServer::LIGHT_OMNI || type == VisualServer::LIGHT_SPOT) { - ADD_PROPERTY( PropertyInfo( Variant::REAL, "params/radius", PROPERTY_HINT_RANGE, "0.01,4096,0.01")); - ADD_PROPERTY( PropertyInfo( Variant::REAL, "params/attenuation", PROPERTY_HINT_RANGE, "0,8,0.01")); - } - - if (type == VisualServer::LIGHT_SPOT) { - ADD_PROPERTY( PropertyInfo( Variant::REAL, "params/spot_angle", PROPERTY_HINT_RANGE, "0.01,90.0,0.01")); - ADD_PROPERTY( PropertyInfo( Variant::REAL, "params/spot_attenuation", PROPERTY_HINT_RANGE, "0,8,0.01")); - - }*/ - - ADD_PROPERTYI( PropertyInfo( Variant::COLOR, "colors/diffuse"), _SCS("set_color"), _SCS("get_color"),COLOR_DIFFUSE); - ADD_PROPERTYI( PropertyInfo( Variant::COLOR, "colors/specular"), _SCS("set_color"), _SCS("get_color"),COLOR_SPECULAR); - ADD_PROPERTY( PropertyInfo( Variant::BOOL, "shadow/shadow"), _SCS("set_project_shadows"), _SCS("has_project_shadows")); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "shadow/darkening", PROPERTY_HINT_RANGE, "0,1,0.01"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_SHADOW_DARKENING ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "shadow/z_offset", PROPERTY_HINT_RANGE, "0,128,0.001"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_SHADOW_Z_OFFSET); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "shadow/z_slope_scale", PROPERTY_HINT_RANGE, "0,128,0.001"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_SHADOW_Z_SLOPE_SCALE); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "shadow/esm_multiplier", PROPERTY_HINT_RANGE, "1.0,512.0,0.1"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_SHADOW_ESM_MULTIPLIER); - ADD_PROPERTYI( PropertyInfo( Variant::INT, "shadow/blur_passes", PROPERTY_HINT_RANGE, "0,4,1"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_SHADOW_BLUR_PASSES); - ADD_PROPERTY( PropertyInfo( Variant::OBJECT, "projector",PROPERTY_HINT_RESOURCE_TYPE,"Texture"), _SCS("set_projector"), _SCS("get_projector")); - ADD_PROPERTY( PropertyInfo( Variant::INT, "operator",PROPERTY_HINT_ENUM,"Add,Sub"), _SCS("set_operator"), _SCS("get_operator")); - BIND_CONSTANT( PARAM_RADIUS ); - BIND_CONSTANT( PARAM_ENERGY ); - BIND_CONSTANT( PARAM_ATTENUATION ); - BIND_CONSTANT( PARAM_SPOT_ANGLE ); - BIND_CONSTANT( PARAM_SPOT_ATTENUATION ); - BIND_CONSTANT( PARAM_SHADOW_DARKENING ); - BIND_CONSTANT( PARAM_SHADOW_Z_OFFSET ); - BIND_CONSTANT( COLOR_DIFFUSE ); - BIND_CONSTANT( COLOR_SPECULAR ); - - BIND_CONSTANT( BAKE_MODE_DISABLED ); - BIND_CONSTANT( BAKE_MODE_INDIRECT ); - BIND_CONSTANT( BAKE_MODE_INDIRECT_AND_SHADOWS ); - BIND_CONSTANT( BAKE_MODE_FULL ); - } @@ -558,27 +132,8 @@ Light::Light(VisualServer::LightType p_type) { type=p_type; light=VisualServer::get_singleton()->light_create(p_type); - set_parameter(PARAM_SPOT_ATTENUATION,1.0); - set_parameter(PARAM_SPOT_ANGLE,30.0); - set_parameter(PARAM_RADIUS,2.0); - set_parameter(PARAM_ENERGY,1.0); - set_parameter(PARAM_ATTENUATION,1.0); - set_parameter(PARAM_SHADOW_DARKENING,0.0); - set_parameter(PARAM_SHADOW_Z_OFFSET,0.05); - set_parameter(PARAM_SHADOW_Z_SLOPE_SCALE,0); - set_parameter(PARAM_SHADOW_ESM_MULTIPLIER,60); - set_parameter(PARAM_SHADOW_BLUR_PASSES,1); - - set_color( COLOR_DIFFUSE, Color(1,1,1)); - set_color( COLOR_SPECULAR, Color(1,1,1)); - - op=OPERATOR_ADD; - set_project_shadows( false ); - set_base(light); - enabled=true; editor_only=false; - bake_mode=BAKE_MODE_DISABLED; } @@ -598,59 +153,15 @@ Light::~Light() { ///////////////////////////////////////// -void DirectionalLight::set_shadow_mode(ShadowMode p_mode) { - - shadow_mode=p_mode; - VS::get_singleton()->light_directional_set_shadow_mode(light,(VS::LightDirectionalShadowMode)p_mode); - -} - -DirectionalLight::ShadowMode DirectionalLight::get_shadow_mode() const{ - - return shadow_mode; -} - -void DirectionalLight::set_shadow_param(ShadowParam p_param, float p_value) { - - ERR_FAIL_INDEX(p_param,3); - shadow_param[p_param]=p_value; - VS::get_singleton()->light_directional_set_shadow_param(light,VS::LightDirectionalShadowParam(p_param),p_value); -} - -float DirectionalLight::get_shadow_param(ShadowParam p_param) const { - ERR_FAIL_INDEX_V(p_param,3,0); - return shadow_param[p_param]; -} - void DirectionalLight::_bind_methods() { - ObjectTypeDB::bind_method(_MD("set_shadow_mode","mode"),&DirectionalLight::set_shadow_mode); - ObjectTypeDB::bind_method(_MD("get_shadow_mode"),&DirectionalLight::get_shadow_mode); - ObjectTypeDB::bind_method(_MD("set_shadow_param","param","value"),&DirectionalLight::set_shadow_param); - ObjectTypeDB::bind_method(_MD("get_shadow_param","param"),&DirectionalLight::get_shadow_param); - ADD_PROPERTY( PropertyInfo(Variant::INT,"shadow/mode",PROPERTY_HINT_ENUM,"Orthogonal,Perspective,PSSM 2 Splits,PSSM 4 Splits"),_SCS("set_shadow_mode"),_SCS("get_shadow_mode")); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"shadow/max_distance",PROPERTY_HINT_EXP_RANGE,"0.00,99999,0.01"),_SCS("set_shadow_param"),_SCS("get_shadow_param"), SHADOW_PARAM_MAX_DISTANCE); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"shadow/split_weight",PROPERTY_HINT_RANGE,"0.01,1.0,0.01"),_SCS("set_shadow_param"),_SCS("get_shadow_param"), SHADOW_PARAM_PSSM_SPLIT_WEIGHT); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"shadow/zoffset_scale",PROPERTY_HINT_RANGE,"0.01,1024.0,0.01"),_SCS("set_shadow_param"),_SCS("get_shadow_param"), SHADOW_PARAM_PSSM_ZOFFSET_SCALE); - - BIND_CONSTANT( SHADOW_ORTHOGONAL ); - BIND_CONSTANT( SHADOW_PERSPECTIVE ); - BIND_CONSTANT( SHADOW_PARALLEL_2_SPLITS ); - BIND_CONSTANT( SHADOW_PARALLEL_4_SPLITS ); - BIND_CONSTANT( SHADOW_PARAM_MAX_DISTANCE ); - BIND_CONSTANT( SHADOW_PARAM_PSSM_SPLIT_WEIGHT ); - BIND_CONSTANT( SHADOW_PARAM_PSSM_ZOFFSET_SCALE ); } DirectionalLight::DirectionalLight() : Light( VisualServer::LIGHT_DIRECTIONAL ) { - shadow_mode=SHADOW_ORTHOGONAL; - shadow_param[SHADOW_PARAM_MAX_DISTANCE]=0; - shadow_param[SHADOW_PARAM_PSSM_SPLIT_WEIGHT]=0.5; - shadow_param[SHADOW_PARAM_PSSM_ZOFFSET_SCALE]=2.0; } @@ -658,19 +169,11 @@ DirectionalLight::DirectionalLight() : Light( VisualServer::LIGHT_DIRECTIONAL ) void OmniLight::_bind_methods() { - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/radius", PROPERTY_HINT_EXP_RANGE, "0.2,4096,0.01"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_RADIUS ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/attenuation", PROPERTY_HINT_EXP_EASING, "attenuation"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_ATTENUATION ); } void SpotLight::_bind_methods() { - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/radius", PROPERTY_HINT_EXP_RANGE, "0.2,4096,0.01"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_RADIUS ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/attenuation", PROPERTY_HINT_EXP_EASING, "attenuation"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_ATTENUATION ); - - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/spot_angle", PROPERTY_HINT_RANGE, "0.01,89.9,0.01"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_SPOT_ANGLE ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/spot_attenuation", PROPERTY_HINT_EXP_EASING, "spot_attenuation"), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_SPOT_ATTENUATION ); - } diff --git a/scene/3d/light.h b/scene/3d/light.h index b25c6a44b53..3c31d90d4c0 100644 --- a/scene/3d/light.h +++ b/scene/3d/light.h @@ -44,57 +44,12 @@ class Light : public VisualInstance { public: - enum Parameter { - PARAM_RADIUS=VisualServer::LIGHT_PARAM_RADIUS, - PARAM_ENERGY=VisualServer::LIGHT_PARAM_ENERGY, - PARAM_ATTENUATION=VisualServer::LIGHT_PARAM_ATTENUATION, - PARAM_SPOT_ANGLE=VisualServer::LIGHT_PARAM_SPOT_ANGLE, - PARAM_SPOT_ATTENUATION=VisualServer::LIGHT_PARAM_SPOT_ATTENUATION, - PARAM_SHADOW_DARKENING=VisualServer::LIGHT_PARAM_SHADOW_DARKENING, - PARAM_SHADOW_Z_OFFSET=VisualServer::LIGHT_PARAM_SHADOW_Z_OFFSET, - PARAM_SHADOW_Z_SLOPE_SCALE=VisualServer::LIGHT_PARAM_SHADOW_Z_SLOPE_SCALE, - PARAM_SHADOW_ESM_MULTIPLIER=VisualServer::LIGHT_PARAM_SHADOW_ESM_MULTIPLIER, - PARAM_SHADOW_BLUR_PASSES=VisualServer::LIGHT_PARAM_SHADOW_BLUR_PASSES, - PARAM_MAX=VisualServer::LIGHT_PARAM_MAX - }; - enum LightColor { - - COLOR_DIFFUSE=VisualServer::LIGHT_COLOR_DIFFUSE, - COLOR_SPECULAR=VisualServer::LIGHT_COLOR_SPECULAR - }; - - enum BakeMode { - - BAKE_MODE_DISABLED, - BAKE_MODE_INDIRECT, - BAKE_MODE_INDIRECT_AND_SHADOWS, - BAKE_MODE_FULL - - }; - - - enum Operator { - - OPERATOR_ADD, - OPERATOR_SUB - }; private: - - Ref projector; - float vars[PARAM_MAX]; - Color colors[3]; - - - BakeMode bake_mode; - VisualServer::LightType type; - bool shadows; - bool enabled; + VS::LightType type; bool editor_only; - Operator op; - void _update_visibility(); // bind helpers @@ -103,8 +58,7 @@ protected: RID light; virtual bool _can_gizmo_scale() const; - virtual RES _get_gizmo_geometry() const; - + static void _bind_methods(); void _notification(int p_what); @@ -114,44 +68,17 @@ public: VS::LightType get_light_type() const { return type; } - void set_parameter(Parameter p_var, float p_value); - float get_parameter(Parameter p_var) const; - - void set_color(LightColor p_color,const Color& p_value); - Color get_color(LightColor p_color) const; - - void set_project_shadows(bool p_enabled); - bool has_project_shadows() const; - - void set_projector(const Ref& p_projector); - Ref get_projector() const; - - void set_operator(Operator p_op); - Operator get_operator() const; - - void set_bake_mode(BakeMode p_bake_mode); - BakeMode get_bake_mode() const; - - void set_enabled(bool p_enabled); - bool is_enabled() const; - void set_editor_only(bool p_editor_only); bool is_editor_only() const; virtual AABB get_aabb() const; virtual DVector get_faces(uint32_t p_usage_flags) const; - void approximate_opengl_attenuation(float p_constant, float p_linear, float p_quadratic, float p_radius_treshold=0.5); - Light(); ~Light(); }; -VARIANT_ENUM_CAST( Light::Parameter ); -VARIANT_ENUM_CAST( Light::LightColor ); -VARIANT_ENUM_CAST( Light::Operator ); -VARIANT_ENUM_CAST( Light::BakeMode); class DirectionalLight : public Light { @@ -160,39 +87,18 @@ class DirectionalLight : public Light { public: - enum ShadowMode { - SHADOW_ORTHOGONAL, - SHADOW_PERSPECTIVE, - SHADOW_PARALLEL_2_SPLITS, - SHADOW_PARALLEL_4_SPLITS - }; - enum ShadowParam { - SHADOW_PARAM_MAX_DISTANCE, - SHADOW_PARAM_PSSM_SPLIT_WEIGHT, - SHADOW_PARAM_PSSM_ZOFFSET_SCALE - }; private: - ShadowMode shadow_mode; - float shadow_param[3]; + + protected: static void _bind_methods(); public: - void set_shadow_mode(ShadowMode p_mode); - ShadowMode get_shadow_mode() const; - - void set_shadow_max_distance(float p_distance); - float get_shadow_max_distance() const; - void set_shadow_param(ShadowParam p_param, float p_value); - float get_shadow_param(ShadowParam p_param) const; DirectionalLight(); }; -VARIANT_ENUM_CAST( DirectionalLight::ShadowMode ); -VARIANT_ENUM_CAST( DirectionalLight::ShadowParam ); - class OmniLight : public Light { @@ -203,7 +109,7 @@ protected: public: - OmniLight() : Light( VisualServer::LIGHT_OMNI ) { set_parameter(PARAM_SHADOW_Z_OFFSET,0.001);} + OmniLight() : Light( VisualServer::LIGHT_OMNI ) { } }; class SpotLight : public Light { diff --git a/scene/3d/particles.cpp b/scene/3d/particles.cpp index 3ac5d8ed7b8..994e3298536 100644 --- a/scene/3d/particles.cpp +++ b/scene/3d/particles.cpp @@ -30,6 +30,7 @@ #include "servers/visual_server.h" #include "scene/resources/surface_tool.h" +#if 0 /* static const char* _var_names[Particles::VAR_MAX]={ "vars/lifetime", @@ -557,3 +558,4 @@ Particles::~Particles() { VisualServer::get_singleton()->free(particles); } +#endif diff --git a/scene/3d/particles.h b/scene/3d/particles.h index 42d27c41d7e..fb3bb258ef3 100644 --- a/scene/3d/particles.h +++ b/scene/3d/particles.h @@ -37,7 +37,7 @@ /** @author Juan Linietsky */ - +#if 0 class Particles : public GeometryInstance { public: @@ -163,3 +163,4 @@ public: VARIANT_ENUM_CAST( Particles::Variable ); #endif +#endif diff --git a/scene/3d/portal.cpp b/scene/3d/portal.cpp index 23bc64615db..131bbd7000d 100644 --- a/scene/3d/portal.cpp +++ b/scene/3d/portal.cpp @@ -96,45 +96,6 @@ void Portal::_get_property_list( List *p_list) const { } -RES Portal::_get_gizmo_geometry() const { - - Ref surface_tool( memnew( SurfaceTool )); - - Ref mat( memnew( FixedMaterial )); - - mat->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(1.0,0.8,0.8,0.7) ); - mat->set_line_width(4); - mat->set_flag(Material::FLAG_DOUBLE_SIDED,true); - mat->set_flag(Material::FLAG_UNSHADED,true); -// mat->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER,true); - - surface_tool->begin(Mesh::PRIMITIVE_LINES); - surface_tool->set_material(mat); - - Vector shape = get_shape(); - - Vector2 center; - for (int i=0;i points; - surface_tool->add_vertex( Vector3( shape[i].x, shape[i].y,0 )); - surface_tool->add_vertex( Vector3( shape[n].x, shape[n].y,0 )); - center+=shape[i]; - - } - - if (shape.size()>0) { - - center/=shape.size(); - Vector points; - surface_tool->add_vertex( Vector3( center.x, center.y,0 )); - surface_tool->add_vertex( Vector3( center.x, center.y,1.0 )); - } - - return surface_tool->commit(); -} - AABB Portal::get_aabb() const { @@ -178,18 +139,19 @@ void Portal::set_shape(const Vector& p_shape) { VisualServer::get_singleton()->portal_set_shape(portal, p_shape); + shape=p_shape; update_gizmo(); } Vector Portal::get_shape() const { - return VisualServer::get_singleton()->portal_get_shape(portal); + return shape; } void Portal::set_connect_range(float p_range) { connect_range=p_range; - VisualServer::get_singleton()->portal_set_connect_range(portal,p_range); + //VisualServer::get_singleton()->portal_set_connect_range(portal,p_range); } float Portal::get_connect_range() const { diff --git a/scene/3d/portal.h b/scene/3d/portal.h index 149a56900f0..8bcfa1ea4bc 100644 --- a/scene/3d/portal.h +++ b/scene/3d/portal.h @@ -47,6 +47,7 @@ class Portal : public VisualInstance { OBJ_TYPE(Portal, VisualInstance); RID portal; + Vector shape; bool enabled; float disable_distance; @@ -55,7 +56,6 @@ class Portal : public VisualInstance { AABB aabb; - virtual RES _get_gizmo_geometry() const; protected: diff --git a/scene/3d/position_3d.cpp b/scene/3d/position_3d.cpp index 27130cbe6a7..de1a0b73f40 100644 --- a/scene/3d/position_3d.cpp +++ b/scene/3d/position_3d.cpp @@ -29,37 +29,6 @@ #include "position_3d.h" #include "scene/resources/mesh.h" -RES Position3D::_get_gizmo_geometry() const { - - - Ref mesh = memnew( Mesh ); - - DVector cursor_points; - DVector cursor_colors; - float cs = 0.25; - cursor_points.push_back(Vector3(+cs,0,0)); - cursor_points.push_back(Vector3(-cs,0,0)); - cursor_points.push_back(Vector3(0,+cs,0)); - cursor_points.push_back(Vector3(0,-cs,0)); - cursor_points.push_back(Vector3(0,0,+cs)); - cursor_points.push_back(Vector3(0,0,-cs)); - cursor_colors.push_back(Color(1,0.5,0.5,1)); - cursor_colors.push_back(Color(1,0.5,0.5,1)); - cursor_colors.push_back(Color(0.5,1,0.5,1)); - cursor_colors.push_back(Color(0.5,1,0.5,1)); - cursor_colors.push_back(Color(0.5,0.5,1,1)); - cursor_colors.push_back(Color(0.5,0.5,1,1)); - - Ref mat = memnew( FixedMaterial ); - mat->set_flag(Material::FLAG_UNSHADED,true); - mat->set_line_width(3); - Array d; - d[Mesh::ARRAY_VERTEX]=cursor_points; - d[Mesh::ARRAY_COLOR]=cursor_colors; - mesh->add_surface(Mesh::PRIMITIVE_LINES,d); - mesh->surface_set_material(0,mat); - return mesh; -} Position3D::Position3D() { diff --git a/scene/3d/position_3d.h b/scene/3d/position_3d.h index 6bac540fcbb..676f46c2ee1 100644 --- a/scene/3d/position_3d.h +++ b/scene/3d/position_3d.h @@ -35,7 +35,6 @@ class Position3D : public Spatial { OBJ_TYPE(Position3D,Spatial); - virtual RES _get_gizmo_geometry() const; public: diff --git a/scene/3d/quad.cpp b/scene/3d/quad.cpp index 1a7eeef1801..f31d248ba7c 100644 --- a/scene/3d/quad.cpp +++ b/scene/3d/quad.cpp @@ -120,7 +120,7 @@ void Quad::_update() { } else { configured=true; } - VS::get_singleton()->mesh_add_surface(mesh,VS::PRIMITIVE_TRIANGLES,arr); + VS::get_singleton()->mesh_add_surface_from_arrays(mesh,VS::PRIMITIVE_TRIANGLES,arr); pending_update=false; } diff --git a/scene/3d/room_instance.cpp b/scene/3d/room_instance.cpp index 9e6867d2a25..080fa880bae 100644 --- a/scene/3d/room_instance.cpp +++ b/scene/3d/room_instance.cpp @@ -75,50 +75,6 @@ void Room::_notification(int p_what) { } -RES Room::_get_gizmo_geometry() const { - - DVector faces; - if (!room.is_null()) - faces=room->get_geometry_hint(); - - int count=faces.size(); - if (count==0) - return RES(); - - DVector::Read facesr=faces.read(); - - const Face3* facesptr=facesr.ptr(); - - DVector points; - - Ref surface_tool( memnew( SurfaceTool )); - - Ref mat( memnew( FixedMaterial )); - - mat->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(0.2,0.8,0.9,0.3) ); - mat->set_line_width(4); - mat->set_flag(Material::FLAG_DOUBLE_SIDED,true); - mat->set_flag(Material::FLAG_UNSHADED,true); -// mat->set_hint(Material::HINT_NO_DEPTH_DRAW,true); - - surface_tool->begin(Mesh::PRIMITIVE_LINES); - surface_tool->set_material(mat); - - for (int i=0;iadd_vertex(facesptr[i].vertex[0]); - surface_tool->add_vertex(facesptr[i].vertex[1]); - - surface_tool->add_vertex(facesptr[i].vertex[1]); - surface_tool->add_vertex(facesptr[i].vertex[2]); - - surface_tool->add_vertex(facesptr[i].vertex[2]); - surface_tool->add_vertex(facesptr[i].vertex[0]); - - } - - return surface_tool->commit(); -} @@ -127,8 +83,9 @@ AABB Room::get_aabb() const { if (room.is_null()) return AABB(); - return room->get_bounds().get_aabb(); + return AABB(); } + DVector Room::get_faces(uint32_t p_usage_flags) const { return DVector(); @@ -154,9 +111,6 @@ void Room::set_room( const Ref& p_room ) { propagate_notification(NOTIFICATION_AREA_CHANGED); update_gizmo(); - if (room.is_valid()) - SpatialSoundServer::get_singleton()->room_set_bounds(sound_room,room->get_bounds()); - } @@ -202,32 +156,6 @@ void Room::_parse_node_faces(DVector &all_faces,const Node *p_node) const } -void Room::compute_room_from_subtree() { - - - DVector all_faces; - _parse_node_faces(all_faces,this); - - - if (all_faces.size()==0) - return; - float error; - DVector wrapped_faces = Geometry::wrap_geometry(all_faces,&error); - - - if (wrapped_faces.size()==0) - return; - - BSP_Tree tree(wrapped_faces,error); - - Ref room( memnew( RoomBounds ) ); - room->set_bounds(tree); - room->set_geometry_hint(wrapped_faces); - - set_room(room); - -} - void Room::set_simulate_acoustics(bool p_enable) { @@ -268,7 +196,6 @@ void Room::_bind_methods() { ObjectTypeDB::bind_method(_MD("set_room","room:Room"),&Room::set_room ); ObjectTypeDB::bind_method(_MD("get_room:Room"),&Room::get_room ); - ObjectTypeDB::bind_method(_MD("compute_room_from_subtree"),&Room::compute_room_from_subtree); diff --git a/scene/3d/room_instance.h b/scene/3d/room_instance.h index c7df4ceadde..346e687d9e1 100644 --- a/scene/3d/room_instance.h +++ b/scene/3d/room_instance.h @@ -45,6 +45,8 @@ */ + + class Room : public VisualInstance { OBJ_TYPE( Room, VisualInstance ); @@ -65,7 +67,7 @@ private: void _bounds_changed(); - virtual RES _get_gizmo_geometry() const; + protected: @@ -89,7 +91,6 @@ public: void set_simulate_acoustics(bool p_enable); bool is_simulating_acoustics() const; - void compute_room_from_subtree(); RID get_sound_room() const; diff --git a/scene/3d/skeleton.cpp b/scene/3d/skeleton.cpp index c996a8123c3..96233c5ed72 100644 --- a/scene/3d/skeleton.cpp +++ b/scene/3d/skeleton.cpp @@ -163,7 +163,7 @@ void Skeleton::_notification(int p_what) { Bone *bonesptr=&bones[0]; int len=bones.size(); - vs->skeleton_resize( skeleton, len ); // if same size, nothin really happens + vs->skeleton_allocate( skeleton, len ); // if same size, nothin really happens // pose changed, rebuild cache of inverses if (rest_global_inverse_dirty) { @@ -513,51 +513,6 @@ void Skeleton::_make_dirty() { } -RES Skeleton::_get_gizmo_geometry() const { - - if (!GLOBAL_DEF("debug/draw_skeleton", true)) - return RES(); - - if (bones.size()==0) - return RES(); - - Ref surface_tool( memnew( SurfaceTool )); - - Ref mat( memnew( FixedMaterial )); - - mat->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(0.6,1.0,0.3,0.1) ); - mat->set_line_width(4); - mat->set_flag(Material::FLAG_DOUBLE_SIDED,true); - mat->set_flag(Material::FLAG_UNSHADED,true); - mat->set_flag(Material::FLAG_ONTOP,true); -// mat->set_hint(Material::HINT_NO_DEPTH_DRAW,true); - - surface_tool->begin(Mesh::PRIMITIVE_LINES); - surface_tool->set_material(mat); - - - const Bone *bonesptr=&bones[0]; - int len=bones.size(); - - for (int i=0;iadd_vertex(v1); - surface_tool->add_vertex(v2); - - } - - return surface_tool->commit(); - -} void Skeleton::localize_rests() { diff --git a/scene/3d/skeleton.h b/scene/3d/skeleton.h index bfdb1d14993..1b8136b06cc 100644 --- a/scene/3d/skeleton.h +++ b/scene/3d/skeleton.h @@ -84,7 +84,6 @@ class Skeleton : public Spatial { return bound; } - virtual RES _get_gizmo_geometry() const; protected: diff --git a/scene/3d/spatial_player.cpp b/scene/3d/spatial_player.cpp index c7cf03e2843..70370c51e78 100644 --- a/scene/3d/spatial_player.cpp +++ b/scene/3d/spatial_player.cpp @@ -88,146 +88,6 @@ bool SpatialPlayer::_can_gizmo_scale() const { return false; } -RES SpatialPlayer::_get_gizmo_geometry() const { - - Ref surface_tool( memnew( SurfaceTool )); - - Ref mat( memnew( FixedMaterial )); - - mat->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(0.0,0.6,0.7,0.05) ); - mat->set_parameter( FixedMaterial::PARAM_EMISSION,Color(0.5,0.7,0.8) ); - mat->set_blend_mode( Material::BLEND_MODE_ADD ); - mat->set_flag(Material::FLAG_DOUBLE_SIDED,true); -// mat->set_hint(Material::HINT_NO_DEPTH_DRAW,true); - - - surface_tool->begin(Mesh::PRIMITIVE_TRIANGLES); - surface_tool->set_material(mat); - - int sides=16; - int sections=24; - -// float len=1; - float deg=Math::deg2rad(params[PARAM_EMISSION_CONE_DEGREES]); - if (deg==180) - deg=179.5; - - Vector3 to=Vector3(0,0,-1); - - for(int j=0;jadd_normal(p1r.normalized()); - surface_tool->add_vertex(p1r); - surface_tool->add_normal(p1s.normalized()); - surface_tool->add_vertex(p1s); - surface_tool->add_normal(p2s.normalized()); - surface_tool->add_vertex(p2s); - - surface_tool->add_normal(p1r.normalized()); - surface_tool->add_vertex(p1r); - surface_tool->add_normal(p2s.normalized()); - surface_tool->add_vertex(p2s); - surface_tool->add_normal(p2r.normalized()); - surface_tool->add_vertex(p2r); - - if (j==sections-1) { - - surface_tool->add_normal(p2r.normalized()); - surface_tool->add_vertex(p2r); - surface_tool->add_normal(p2s.normalized()); - surface_tool->add_vertex(p2s); - surface_tool->add_normal(Vector3(0,0,1)); - surface_tool->add_vertex(Vector3()); - } - } - } - - - Ref mesh = surface_tool->commit(); - - Ref mat_speaker( memnew( FixedMaterial )); - - mat_speaker->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(0.3,0.3,0.6) ); - mat_speaker->set_parameter( FixedMaterial::PARAM_SPECULAR,Color(0.5,0.5,0.6) ); - //mat_speaker->set_blend_mode( Material::BLEND_MODE_MIX); - //mat_speaker->set_flag(Material::FLAG_DOUBLE_SIDED,false); - //mat_speaker->set_flag(Material::FLAG_UNSHADED,true); - - surface_tool->begin(Mesh::PRIMITIVE_TRIANGLES); - surface_tool->set_material(mat_speaker); - -// float radius=1; - - - const int speaker_points=8; - Vector3 speaker[speaker_points]={ - Vector3(0,0,1)*0.15, - Vector3(1,1,1)*0.15, - Vector3(1,1,0)*0.15, - Vector3(2,2,-1)*0.15, - Vector3(1,1,-1)*0.15, - Vector3(0.8,0.8,-1.2)*0.15, - Vector3(0.5,0.5,-1.4)*0.15, - Vector3(0.0,0.0,-1.6)*0.15 - }; - - int speaker_sides=10; - - - for(int i = 0; i < speaker_sides ; i++) { - - - Matrix3 ma(Vector3(0,0,1),Math_PI*2*float(i)/speaker_sides); - Matrix3 mb(Vector3(0,0,1),Math_PI*2*float(i+1)/speaker_sides); - - - for(int j=0;jadd_normal(n); - surface_tool->add_vertex(points[0]); - surface_tool->add_normal(n); - surface_tool->add_vertex(points[2]); - surface_tool->add_normal(n); - surface_tool->add_vertex(points[1]); - - surface_tool->add_normal(n); - surface_tool->add_vertex(points[0]); - surface_tool->add_normal(n); - surface_tool->add_vertex(points[3]); - surface_tool->add_normal(n); - surface_tool->add_vertex(points[2]); - - - } - - - } - - - return surface_tool->commit(mesh); - -} - - void SpatialPlayer::_bind_methods() { diff --git a/scene/3d/spatial_player.h b/scene/3d/spatial_player.h index 5a8687b8546..594fc8f8508 100644 --- a/scene/3d/spatial_player.h +++ b/scene/3d/spatial_player.h @@ -60,7 +60,7 @@ private: RID source_rid; virtual bool _can_gizmo_scale() const; - virtual RES _get_gizmo_geometry() const; + protected: diff --git a/scene/3d/visual_instance.cpp b/scene/3d/visual_instance.cpp index b4f7a4e5b43..5bc332f8fbb 100644 --- a/scene/3d/visual_instance.cpp +++ b/scene/3d/visual_instance.cpp @@ -52,7 +52,7 @@ void VisualInstance::_notification(int p_what) { Room *room=NULL; bool is_geom = cast_to(); - while(parent) { + /* while(parent) { room = parent->cast_to(); if (room) @@ -64,7 +64,7 @@ void VisualInstance::_notification(int p_what) { } parent=parent->get_parent_spatial(); - } + }*/ @@ -92,7 +92,7 @@ void VisualInstance::_notification(int p_what) { VisualServer::get_singleton()->instance_set_scenario( instance, RID() ); VisualServer::get_singleton()->instance_set_room(instance,RID()); VisualServer::get_singleton()->instance_attach_skeleton( instance, RID() ); - VS::get_singleton()->instance_geometry_set_baked_light_sampler(instance, RID() ); + // VS::get_singleton()->instance_geometry_set_baked_light_sampler(instance, RID() ); } break; @@ -172,30 +172,55 @@ Ref GeometryInstance::get_material_override() const{ -void GeometryInstance::set_draw_range_begin(float p_dist){ +void GeometryInstance::set_lod_min_distance(float p_dist){ - draw_begin=p_dist; - VS::get_singleton()->instance_geometry_set_draw_range(get_instance(),draw_begin,draw_end); + lod_min_distance=p_dist; + VS::get_singleton()->instance_geometry_set_draw_range(get_instance(),lod_min_distance,lod_max_distance,lod_min_hysteresis,lod_max_hysteresis); } -float GeometryInstance::get_draw_range_begin() const{ +float GeometryInstance::get_lod_min_distance() const{ - return draw_begin; + return lod_min_distance; } -void GeometryInstance::set_draw_range_end(float p_dist) { +void GeometryInstance::set_lod_max_distance(float p_dist) { - draw_end=p_dist; - VS::get_singleton()->instance_geometry_set_draw_range(get_instance(),draw_begin,draw_end); + lod_max_distance=p_dist; + VS::get_singleton()->instance_geometry_set_draw_range(get_instance(),lod_min_distance,lod_max_distance,lod_min_hysteresis,lod_max_hysteresis); } -float GeometryInstance::get_draw_range_end() const { +float GeometryInstance::get_lod_max_distance() const { - return draw_end; + return lod_max_distance; } +void GeometryInstance::set_lod_min_hysteresis(float p_dist){ + + lod_min_hysteresis=p_dist; + VS::get_singleton()->instance_geometry_set_draw_range(get_instance(),lod_min_distance,lod_max_distance,lod_min_hysteresis,lod_max_hysteresis); +} + +float GeometryInstance::get_lod_min_hysteresis() const{ + + return lod_min_hysteresis; +} + + +void GeometryInstance::set_lod_max_hysteresis(float p_dist) { + + lod_max_hysteresis=p_dist; + VS::get_singleton()->instance_geometry_set_draw_range(get_instance(),lod_min_distance,lod_max_distance,lod_min_hysteresis,lod_max_hysteresis); + +} + +float GeometryInstance::get_lod_max_hysteresis() const { + + return lod_max_hysteresis; +} + + void GeometryInstance::_notification(int p_what) { if (p_what==NOTIFICATION_ENTER_WORLD) { @@ -212,8 +237,8 @@ void GeometryInstance::_notification(int p_what) { if (flags[FLAG_USE_BAKED_LIGHT]) { if (baked_light_instance) { - baked_light_instance->disconnect(SceneStringNames::get_singleton()->baked_light_changed,this,SceneStringNames::get_singleton()->_baked_light_changed); - baked_light_instance=NULL; + // baked_light_instance->disconnect(SceneStringNames::get_singleton()->baked_light_changed,this,SceneStringNames::get_singleton()->_baked_light_changed); + // baked_light_instance=NULL; } _baked_light_changed(); @@ -229,15 +254,15 @@ void GeometryInstance::_notification(int p_what) { void GeometryInstance::_baked_light_changed() { - if (!baked_light_instance) - VS::get_singleton()->instance_geometry_set_baked_light(get_instance(),RID()); - else - VS::get_singleton()->instance_geometry_set_baked_light(get_instance(),baked_light_instance->get_baked_light_instance()); + //if (!baked_light_instance) + // VS::get_singleton()->instance_geometry_set_baked_light(get_instance(),RID()); +// else +// VS::get_singleton()->instance_geometry_set_baked_light(get_instance(),baked_light_instance->get_baked_light_instance()); } void GeometryInstance::_find_baked_light() { - +/* Node *n=get_parent(); while(n) { @@ -255,6 +280,7 @@ void GeometryInstance::_find_baked_light() { } _baked_light_changed(); + */ } void GeometryInstance::_update_visibility() { @@ -288,7 +314,7 @@ void GeometryInstance::set_flag(Flags p_flag,bool p_value) { } if (p_flag==FLAG_USE_BAKED_LIGHT) { - if (is_inside_world()) { + /* if (is_inside_world()) { if (!p_value) { if (baked_light_instance) { baked_light_instance->disconnect(SceneStringNames::get_singleton()->baked_light_changed,this,SceneStringNames::get_singleton()->_baked_light_changed); @@ -298,7 +324,7 @@ void GeometryInstance::set_flag(Flags p_flag,bool p_value) { } else { _find_baked_light(); } - } + }*/ } } @@ -333,8 +359,8 @@ GeometryInstance::ShadowCastingSetting GeometryInstance::get_cast_shadows_settin void GeometryInstance::set_baked_light_texture_id(int p_id) { - baked_light_texture_id=p_id; - VS::get_singleton()->instance_geometry_set_baked_light_texture_index(get_instance(),baked_light_texture_id); +// baked_light_texture_id=p_id; +// VS::get_singleton()->instance_geometry_set_baked_light_texture_index(get_instance(),baked_light_texture_id); } @@ -366,11 +392,18 @@ void GeometryInstance::_bind_methods() { ObjectTypeDB::bind_method(_MD("set_cast_shadows_setting", "shadow_casting_setting"), &GeometryInstance::set_cast_shadows_setting); ObjectTypeDB::bind_method(_MD("get_cast_shadows_setting"), &GeometryInstance::get_cast_shadows_setting); - ObjectTypeDB::bind_method(_MD("set_draw_range_begin","mode"), &GeometryInstance::set_draw_range_begin); - ObjectTypeDB::bind_method(_MD("get_draw_range_begin"), &GeometryInstance::get_draw_range_begin); + ObjectTypeDB::bind_method(_MD("set_lod_max_hysteresis","mode"), &GeometryInstance::set_lod_max_hysteresis); + ObjectTypeDB::bind_method(_MD("get_lod_max_hysteresis"), &GeometryInstance::get_lod_max_hysteresis); + + ObjectTypeDB::bind_method(_MD("set_lod_max_distance","mode"), &GeometryInstance::set_lod_max_distance); + ObjectTypeDB::bind_method(_MD("get_lod_max_distance"), &GeometryInstance::get_lod_max_distance); + + ObjectTypeDB::bind_method(_MD("set_lod_min_hysteresis","mode"), &GeometryInstance::set_lod_min_hysteresis); + ObjectTypeDB::bind_method(_MD("get_lod_min_hysteresis"), &GeometryInstance::get_lod_min_hysteresis); + + ObjectTypeDB::bind_method(_MD("set_lod_min_distance","mode"), &GeometryInstance::set_lod_min_distance); + ObjectTypeDB::bind_method(_MD("get_lod_min_distance"), &GeometryInstance::get_lod_min_distance); - ObjectTypeDB::bind_method(_MD("set_draw_range_end","mode"), &GeometryInstance::set_draw_range_end); - ObjectTypeDB::bind_method(_MD("get_draw_range_end"), &GeometryInstance::get_draw_range_end); ObjectTypeDB::bind_method(_MD("set_baked_light_texture_id","id"), &GeometryInstance::set_baked_light_texture_id); ObjectTypeDB::bind_method(_MD("get_baked_light_texture_id"), &GeometryInstance::get_baked_light_texture_id); @@ -385,9 +418,6 @@ void GeometryInstance::_bind_methods() { ADD_PROPERTYI( PropertyInfo( Variant::BOOL, "geometry/visible"), _SCS("set_flag"), _SCS("get_flag"),FLAG_VISIBLE); ADD_PROPERTY( PropertyInfo( Variant::OBJECT, "geometry/material_override",PROPERTY_HINT_RESOURCE_TYPE,"Material"), _SCS("set_material_override"), _SCS("get_material_override")); ADD_PROPERTY(PropertyInfo(Variant::INT, "geometry/cast_shadow", PROPERTY_HINT_ENUM, "Off,On,Double-Sided,Shadows Only"), _SCS("set_cast_shadows_setting"), _SCS("get_cast_shadows_setting")); - ADD_PROPERTYI( PropertyInfo( Variant::BOOL, "geometry/receive_shadows"), _SCS("set_flag"), _SCS("get_flag"),FLAG_RECEIVE_SHADOWS); - ADD_PROPERTY( PropertyInfo( Variant::INT, "geometry/range_begin",PROPERTY_HINT_RANGE,"0,32768,0.01"), _SCS("set_draw_range_begin"), _SCS("get_draw_range_begin")); - ADD_PROPERTY( PropertyInfo( Variant::INT, "geometry/range_end",PROPERTY_HINT_RANGE,"0,32768,0.01"), _SCS("set_draw_range_end"), _SCS("get_draw_range_end")); ADD_PROPERTY( PropertyInfo( Variant::REAL, "geometry/extra_cull_margin",PROPERTY_HINT_RANGE,"0,16384,0"), _SCS("set_extra_cull_margin"), _SCS("get_extra_cull_margin")); ADD_PROPERTYI( PropertyInfo( Variant::BOOL, "geometry/billboard"), _SCS("set_flag"), _SCS("get_flag"),FLAG_BILLBOARD); ADD_PROPERTYI( PropertyInfo( Variant::BOOL, "geometry/billboard_y"), _SCS("set_flag"), _SCS("get_flag"),FLAG_BILLBOARD_FIX_Y); @@ -395,12 +425,15 @@ void GeometryInstance::_bind_methods() { ADD_PROPERTYI( PropertyInfo( Variant::BOOL, "geometry/visible_in_all_rooms"), _SCS("set_flag"), _SCS("get_flag"),FLAG_VISIBLE_IN_ALL_ROOMS); ADD_PROPERTYI( PropertyInfo( Variant::BOOL, "geometry/use_baked_light"), _SCS("set_flag"), _SCS("get_flag"),FLAG_USE_BAKED_LIGHT); ADD_PROPERTY( PropertyInfo( Variant::INT, "geometry/baked_light_tex_id"), _SCS("set_baked_light_texture_id"), _SCS("get_baked_light_texture_id")); + ADD_PROPERTY( PropertyInfo( Variant::INT, "lod/min_distance",PROPERTY_HINT_RANGE,"0,32768,0.01"), _SCS("set_lod_min_distance"), _SCS("get_lod_min_distance")); + ADD_PROPERTY( PropertyInfo( Variant::INT, "lod/min_hysteresis",PROPERTY_HINT_RANGE,"0,32768,0.01"), _SCS("set_lod_min_hysteresis"), _SCS("get_lod_min_hysteresis")); + ADD_PROPERTY( PropertyInfo( Variant::INT, "lod/max_distance",PROPERTY_HINT_RANGE,"0,32768,0.01"), _SCS("set_lod_max_distance"), _SCS("get_lod_max_distance")); + ADD_PROPERTY( PropertyInfo( Variant::INT, "lod/max_hysteresis",PROPERTY_HINT_RANGE,"0,32768,0.01"), _SCS("set_lod_max_hysteresis"), _SCS("get_lod_max_hysteresis")); // ADD_SIGNAL( MethodInfo("visibility_changed")); BIND_CONSTANT(FLAG_VISIBLE ); BIND_CONSTANT(FLAG_CAST_SHADOW ); - BIND_CONSTANT(FLAG_RECEIVE_SHADOWS ); BIND_CONSTANT(FLAG_BILLBOARD ); BIND_CONSTANT(FLAG_BILLBOARD_FIX_Y ); BIND_CONSTANT(FLAG_DEPH_SCALE ); @@ -415,20 +448,23 @@ void GeometryInstance::_bind_methods() { } GeometryInstance::GeometryInstance() { - draw_begin=0; - draw_end=0; + lod_min_distance=0; + lod_max_distance=0; + lod_min_hysteresis=0; + lod_max_hysteresis=0; + for(int i=0;iinstance_geometry_set_baked_light_texture_index(get_instance(),0); +// VS::get_singleton()->instance_geometry_set_baked_light_texture_index(get_instance(),0); } diff --git a/scene/3d/visual_instance.h b/scene/3d/visual_instance.h index e286d5fa889..eb0587815f7 100644 --- a/scene/3d/visual_instance.h +++ b/scene/3d/visual_instance.h @@ -89,7 +89,6 @@ public: enum Flags { FLAG_VISIBLE=VS::INSTANCE_FLAG_VISIBLE, FLAG_CAST_SHADOW=VS::INSTANCE_FLAG_CAST_SHADOW, - FLAG_RECEIVE_SHADOWS=VS::INSTANCE_FLAG_RECEIVE_SHADOWS, FLAG_BILLBOARD=VS::INSTANCE_FLAG_BILLBOARD, FLAG_BILLBOARD_FIX_Y=VS::INSTANCE_FLAG_BILLBOARD_FIX_Y, FLAG_DEPH_SCALE=VS::INSTANCE_FLAG_DEPH_SCALE, @@ -110,8 +109,10 @@ private: bool flags[FLAG_MAX]; ShadowCastingSetting shadow_casting_setting; Ref material_override; - float draw_begin; - float draw_end; + float lod_min_distance; + float lod_max_distance; + float lod_min_hysteresis; + float lod_max_hysteresis; void _find_baked_light(); BakedLightInstance *baked_light_instance; int baked_light_texture_id; @@ -131,11 +132,17 @@ public: void set_cast_shadows_setting(ShadowCastingSetting p_shadow_casting_setting); ShadowCastingSetting get_cast_shadows_setting() const; - void set_draw_range_begin(float p_dist); - float get_draw_range_begin() const; + void set_lod_min_distance(float p_dist); + float get_lod_min_distance() const; - void set_draw_range_end(float p_dist); - float get_draw_range_end() const; + void set_lod_max_distance(float p_dist); + float get_lod_max_distance() const; + + void set_lod_min_hysteresis(float p_dist); + float get_lod_min_hysteresis() const; + + void set_lod_max_hysteresis(float p_dist); + float get_lod_max_hysteresis() const; void set_material_override(const Ref& p_material); Ref get_material_override() const; diff --git a/scene/gui/color_picker.cpp b/scene/gui/color_picker.cpp index 5e665441536..27bdb581e1c 100644 --- a/scene/gui/color_picker.cpp +++ b/scene/gui/color_picker.cpp @@ -59,6 +59,10 @@ void ColorPicker::_notification(int p_what) { w_material->set_shader(get_shader("w_editor")); update_material(uv_material,color,h,s,v); update_material(w_material,color,h,s,v); + uv_edit->set_texture(get_icon("color_main")); + w_edit->set_texture(get_icon("color_hue")); + sample->set_texture(get_icon("color_sample")); + _update_controls(); } break; @@ -192,10 +196,24 @@ void ColorPicker::_update_presets() { Size2 size=bt_add_preset->get_size(); preset->set_custom_minimum_size(Size2(size.width*presets.size(),size.height)); - Image i(size.x*presets.size(),size.y, false, Image::FORMAT_RGB); - for (int y=0;y img; + img.resize(size.x*presets.size()*size.y*3); + + { + DVector::Write w=img.write(); + for (int y=0;y t; t.instance(); t->create_from_image(i); @@ -394,15 +412,23 @@ void ColorPicker::_screen_input(const InputEvent &ev) } else if (ev.type==InputEvent::MOUSE_MOTION) { const InputEventMouse &mev = ev.mouse_motion; Viewport *r=get_tree()->get_root(); - if (!r->get_rect().has_point(Point2(mev.global_x,mev.global_y))) + if (!r->get_visible_rect().has_point(Point2(mev.global_x,mev.global_y))) return; Image img =r->get_screen_capture(); if (!img.empty()) { last_capture=img; r->queue_screen_capture(); } - if (!last_capture.empty()) - set_color(last_capture.get_pixel(mev.global_x,mev.global_y)); + if (!last_capture.empty()) { + int pw = last_capture.get_format()==Image::FORMAT_RGBA8?4:3; + int ofs = (mev.global_y*last_capture.get_width()+mev.global_x)*pw; + + DVector::Read r = last_capture.get_data().read(); + + Color c( r[ofs+0]/255.0, r[ofs+1]/255.0, r[ofs+2]/255.0 ); + + set_color(c); + } } } @@ -474,16 +500,10 @@ ColorPicker::ColorPicker() : HBoxContainer *hb_edit = memnew( HBoxContainer ); uv_edit= memnew ( TextureFrame ); - Image i(256, 256, false, Image::FORMAT_RGB); - for (int y=0;y<256;y++) - for (int x=0;x<256;x++) - i.put_pixel(x,y,Color()); - Ref t; - t.instance(); - t->create_from_image(i); - uv_edit->set_texture(t); + + + uv_edit->set_ignore_mouse(false); - uv_edit->set_custom_minimum_size(Size2(256,256)); uv_edit->connect("input_event", this, "_uv_input"); Control *c= memnew( Control ); uv_edit->add_child(c); @@ -497,16 +517,9 @@ ColorPicker::ColorPicker() : add_child(hb_edit); w_edit= memnew( TextureFrame ); - i = Image(15, 256, false, Image::FORMAT_RGB); - for (int y=0;y<256;y++) - for (int x=0;x<15;x++) - i.put_pixel(x,y,Color()); - Ref tw; - tw.instance(); - tw->create_from_image(i); - w_edit->set_texture(tw); - w_edit->set_ignore_mouse(false); - w_edit->set_custom_minimum_size(Size2(15,256)); + + + w_edit->set_ignore_mouse(false); w_edit->connect("input_event", this, "_w_input"); c= memnew( Control ); w_edit->add_child(c); @@ -594,17 +607,6 @@ ColorPicker::ColorPicker() : set_color(Color(1,1,1)); - i.create(256,20,false,Image::FORMAT_RGB); - for (int y=0;y<20;y++) - for(int x=0;x<256;x++) - if ((x/4+y/4)%2) - i.put_pixel(x,y,Color(1,1,1)); - else - i.put_pixel(x,y,Color(0.6,0.6,0.6)); - Ref t_smpl; - t_smpl.instance(); - t_smpl->create_from_image(i); - sample->set_texture(t_smpl); HBoxContainer *bbc = memnew( HBoxContainer ); add_child(bbc); diff --git a/scene/gui/patch_9_frame.cpp b/scene/gui/patch_9_frame.cpp index 9ad63983596..ed1abbadee3 100644 --- a/scene/gui/patch_9_frame.cpp +++ b/scene/gui/patch_9_frame.cpp @@ -39,7 +39,7 @@ void Patch9Frame::_notification(int p_what) { Size2 s=get_size(); RID ci = get_canvas_item(); - VS::get_singleton()->canvas_item_add_style_box(ci,Rect2(Point2(),s),region_rect,texture->get_rid(),Vector2(margin[MARGIN_LEFT],margin[MARGIN_TOP]),Vector2(margin[MARGIN_RIGHT],margin[MARGIN_BOTTOM]),draw_center,modulate); + VS::get_singleton()->canvas_item_add_nine_patch(ci,Rect2(Point2(),s),region_rect,texture->get_rid(),Vector2(margin[MARGIN_LEFT],margin[MARGIN_TOP]),Vector2(margin[MARGIN_RIGHT],margin[MARGIN_BOTTOM]),VS::NINE_PATCH_STRETCH,VS::NINE_PATCH_STRETCH,draw_center,modulate); // draw_texture_rect(texture,Rect2(Point2(),s),false,modulate); /* diff --git a/scene/main/canvas_layer.cpp b/scene/main/canvas_layer.cpp index 8e238c7d77d..9ee0cf9349a 100644 --- a/scene/main/canvas_layer.cpp +++ b/scene/main/canvas_layer.cpp @@ -246,6 +246,15 @@ Node* CanvasLayer::get_custom_viewport() const { return custom_viewport; } +void CanvasLayer::reset_sort_index() { + sort_index=0; +} + +int CanvasLayer::get_sort_index() { + + return sort_index++; +} + void CanvasLayer::_bind_methods() { @@ -296,4 +305,5 @@ CanvasLayer::CanvasLayer() { canvas = Ref( memnew(World2D) ); custom_viewport=NULL; custom_viewport_id=0; + sort_index=0; } diff --git a/scene/main/canvas_layer.h b/scene/main/canvas_layer.h index a1311390be4..bb63c96cdfa 100644 --- a/scene/main/canvas_layer.h +++ b/scene/main/canvas_layer.h @@ -52,6 +52,8 @@ class CanvasLayer : public Node { RID viewport; Viewport *vp; + int sort_index; + // Deprecated, should be removed in a future version. void _set_rotationd(real_t p_rotation); real_t _get_rotationd() const; @@ -93,6 +95,9 @@ public: void set_custom_viewport(Node *p_viewport); Node* get_custom_viewport() const; + void reset_sort_index(); + int get_sort_index(); + CanvasLayer(); }; diff --git a/scene/main/node.cpp b/scene/main/node.cpp index 18922404269..5d4e46f9aca 100644 --- a/scene/main/node.cpp +++ b/scene/main/node.cpp @@ -315,6 +315,12 @@ void Node::move_child(Node *p_child,int p_pos) { } + if (p_child->data.pos==p_pos) + return; //do nothing + + int motion_from = MIN(p_pos,p_child->data.pos); + int motion_to = MAX(p_pos,p_child->data.pos); + data.children.remove( p_child->data.pos ); data.children.insert( p_pos, p_child ); @@ -324,13 +330,13 @@ void Node::move_child(Node *p_child,int p_pos) { data.blocked++; //new pos first - for (int i=0;idata.pos=i; } // notification second move_child_notify(p_child); - for (int i=0;inotification( NOTIFICATION_MOVED_IN_PARENT ); } diff --git a/scene/main/scene_main_loop.cpp b/scene/main/scene_main_loop.cpp index e3472c074a8..c1552a4167d 100644 --- a/scene/main/scene_main_loop.cpp +++ b/scene/main/scene_main_loop.cpp @@ -746,11 +746,11 @@ Ref SceneTree::get_debug_navigation_material() { return navigation_material; Ref line_material = Ref( memnew( FixedMaterial )); - line_material->set_flag(Material::FLAG_UNSHADED, true); +/* line_material->set_flag(Material::FLAG_UNSHADED, true); line_material->set_line_width(3.0); line_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); line_material->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, true); - line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_navigation_color()); + line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_navigation_color());*/ navigation_material=line_material; @@ -764,11 +764,11 @@ Ref SceneTree::get_debug_navigation_disabled_material(){ return navigation_disabled_material; Ref line_material = Ref( memnew( FixedMaterial )); - line_material->set_flag(Material::FLAG_UNSHADED, true); +/* line_material->set_flag(Material::FLAG_UNSHADED, true); line_material->set_line_width(3.0); line_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); line_material->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, true); - line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_navigation_disabled_color()); + line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_navigation_disabled_color());*/ navigation_disabled_material=line_material; @@ -782,11 +782,11 @@ Ref SceneTree::get_debug_collision_material() { Ref line_material = Ref( memnew( FixedMaterial )); - line_material->set_flag(Material::FLAG_UNSHADED, true); + /*line_material->set_flag(Material::FLAG_UNSHADED, true); line_material->set_line_width(3.0); line_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); line_material->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, true); - line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_collisions_color()); + line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_collisions_color());*/ collision_material=line_material; @@ -801,10 +801,10 @@ Ref SceneTree::get_debug_contact_mesh() { debug_contact_mesh = Ref( memnew( Mesh ) ); Ref mat = memnew( FixedMaterial ); - mat->set_flag(Material::FLAG_UNSHADED,true); + /*mat->set_flag(Material::FLAG_UNSHADED,true); mat->set_flag(Material::FLAG_DOUBLE_SIDED,true); mat->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); - mat->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_collision_contact_color()); + mat->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_collision_contact_color());*/ Vector3 diamond[6]={ Vector3(-1, 0, 0), @@ -1098,7 +1098,11 @@ void SceneTree::_update_root_rect() { if (stretch_mode==STRETCH_MODE_DISABLED) { - root->set_rect(Rect2(Point2(),last_screen_size)); + + root->set_size(last_screen_size); + root->set_attach_to_screen_rect(Rect2(Point2(),last_screen_size)); + root->set_size_override_stretch(false); + root->set_size_override(false,Size2()); return; //user will take care } @@ -1174,21 +1178,18 @@ void SceneTree::_update_root_rect() { switch (stretch_mode) { case STRETCH_MODE_2D: { -// root->set_rect(Rect2(Point2(),video_mode)); - root->set_as_render_target(false); - root->set_rect(Rect2(margin,screen_size)); + root->set_size(screen_size); + root->set_attach_to_screen_rect(Rect2(margin,screen_size)); root->set_size_override_stretch(true); root->set_size_override(true,viewport_size); } break; case STRETCH_MODE_VIEWPORT: { - root->set_rect(Rect2(Point2(),viewport_size)); + root->set_size(viewport_size); + root->set_attach_to_screen_rect(Rect2(margin,screen_size)); root->set_size_override_stretch(false); root->set_size_override(false,Size2()); - root->set_as_render_target(true); - root->set_render_target_update_mode(Viewport::RENDER_TARGET_UPDATE_ALWAYS); - root->set_render_target_to_screen_rect(Rect2(margin,screen_size)); } break; @@ -2300,7 +2301,7 @@ SceneTree::SceneTree() { stretch_aspect=STRETCH_ASPECT_IGNORE; last_screen_size=Size2( OS::get_singleton()->get_video_mode().width, OS::get_singleton()->get_video_mode().height ); - root->set_rect(Rect2(Point2(),last_screen_size)); + root->set_size(last_screen_size); if (ScriptDebugger::get_singleton()) { ScriptDebugger::get_singleton()->set_request_scene_tree_message_func(_debugger_request_tree,this); diff --git a/scene/main/viewport.cpp b/scene/main/viewport.cpp index b22d1fcdf44..44a3f41a4ae 100644 --- a/scene/main/viewport.cpp +++ b/scene/main/viewport.cpp @@ -54,22 +54,22 @@ int RenderTargetTexture::get_width() const { ERR_FAIL_COND_V(!vp,0); - return vp->rect.size.width; + return vp->size.width; } int RenderTargetTexture::get_height() const{ ERR_FAIL_COND_V(!vp,0); - return vp->rect.size.height; + return vp->size.height; } Size2 RenderTargetTexture::get_size() const{ ERR_FAIL_COND_V(!vp,Size2()); - return vp->rect.size; + return vp->size; } RID RenderTargetTexture::get_rid() const{ ERR_FAIL_COND_V(!vp,RID()); - return vp->render_target_texture_rid; + return vp->texture_rid; } bool RenderTargetTexture::has_alpha() const{ @@ -85,7 +85,7 @@ void RenderTargetTexture::set_flags(uint32_t p_flags){ else flags=0; - VS::get_singleton()->texture_set_flags(vp->render_target_texture_rid,flags); + VS::get_singleton()->texture_set_flags(vp->texture_rid,flags); } @@ -141,9 +141,9 @@ void Viewport::_update_stretch_transform() { if (size_override_stretch && size_override) { //print_line("sive override size "+size_override_size); - //print_line("rect size "+rect.size); + //print_line("rect size "+size); stretch_transform=Matrix32(); - Size2 scale = rect.size/(size_override_size+size_override_margin*2); + Size2 scale = size/(size_override_size+size_override_margin*2); stretch_transform.scale(scale); stretch_transform.elements[2]=size_override_margin*scale; @@ -164,14 +164,14 @@ void Viewport::_update_rect() { return; - if (!render_target && parent_control) { + /*if (!render_target && parent_control) { Control *c = parent_control; rect.pos=Point2(); rect.size=c->get_size(); - } - + }*/ +/* VisualServer::ViewportRect vr; vr.x=rect.pos.x; vr.y=rect.pos.y; @@ -191,8 +191,8 @@ void Viewport::_update_rect() { } emit_signal("size_changed"); - render_target_texture->emit_changed(); - + texture->emit_changed(); +*/ } @@ -232,12 +232,12 @@ void Viewport::_vp_enter_tree() { VisualServer::get_singleton()->canvas_item_set_parent(canvas_item,parent_ci); VisualServer::get_singleton()->canvas_item_set_visible(canvas_item,false); - VisualServer::get_singleton()->canvas_item_attach_viewport(canvas_item,viewport); +// VisualServer::get_singleton()->canvas_item_attach_viewport(canvas_item,viewport); parent_control->connect("resized",this,"_parent_resized"); parent_control->connect("visibility_changed",this,"_parent_visibility_changed"); } else if (!parent){ - VisualServer::get_singleton()->viewport_attach_to_screen(viewport,0); +// VisualServer::get_singleton()->viewport_attach_to_screen(viewport,0); } @@ -246,6 +246,7 @@ void Viewport::_vp_enter_tree() { void Viewport::_vp_exit_tree() { + /* if (parent_control) { parent_control->disconnect("resized",this,"_parent_resized"); @@ -268,7 +269,7 @@ void Viewport::_vp_exit_tree() { VisualServer::get_singleton()->viewport_detach(viewport); } - +*/ } @@ -346,11 +347,6 @@ void Viewport::_notification(int p_what) { parent_node=parent_node->get_parent(); } - - if (!render_target) - _vp_enter_tree(); - - current_canvas=find_world_2d()->get_canvas(); VisualServer::get_singleton()->viewport_set_scenario(viewport,find_world()->get_scenario()); VisualServer::get_singleton()->viewport_attach_canvas(viewport,current_canvas); @@ -370,7 +366,7 @@ void Viewport::_notification(int p_what) { //3D PhysicsServer::get_singleton()->space_set_debug_contacts(find_world()->get_space(),get_tree()->get_collision_debug_contact_count()); contact_3d_debug_multimesh=VisualServer::get_singleton()->multimesh_create(); - VisualServer::get_singleton()->multimesh_set_instance_count(contact_3d_debug_multimesh,get_tree()->get_collision_debug_contact_count()); + VisualServer::get_singleton()->multimesh_allocate(contact_3d_debug_multimesh,get_tree()->get_collision_debug_contact_count(),VS::MULTIMESH_TRANSFORM_3D,VS::MULTIMESH_COLOR_8BIT); VisualServer::get_singleton()->multimesh_set_visible_instances(contact_3d_debug_multimesh,0); VisualServer::get_singleton()->multimesh_set_mesh(contact_3d_debug_multimesh,get_tree()->get_debug_contact_mesh()->get_rid()); contact_3d_debug_instance=VisualServer::get_singleton()->instance_create(); @@ -380,6 +376,7 @@ void Viewport::_notification(int p_what) { } + VS::get_singleton()->viewport_set_active(viewport,true); } break; case NOTIFICATION_READY: { #ifndef _3D_DISABLED @@ -418,8 +415,8 @@ void Viewport::_notification(int p_what) { if (world_2d.is_valid()) world_2d->_remove_viewport(this); - if (!render_target) - _vp_exit_tree(); + //if (!render_target) + // _vp_exit_tree(); VisualServer::get_singleton()->viewport_set_scenario(viewport,RID()); SpatialSoundServer::get_singleton()->listener_set_space(internal_listener, RID()); @@ -439,6 +436,9 @@ void Viewport::_notification(int p_what) { remove_from_group("_viewports"); parent_control=NULL; + VS::get_singleton()->viewport_set_active(viewport,false); + + } break; case NOTIFICATION_FIXED_PROCESS: { @@ -452,7 +452,7 @@ void Viewport::_notification(int p_what) { if (get_tree()->is_debugging_collisions_hint() && contact_2d_debug.is_valid()) { VisualServer::get_singleton()->canvas_item_clear(contact_2d_debug); - VisualServer::get_singleton()->canvas_item_raise(contact_2d_debug); + VisualServer::get_singleton()->canvas_item_set_draw_index(contact_2d_debug,0xFFFFF); //very high index Vector points = Physics2DServer::get_singleton()->space_get_contacts(find_world_2d()->get_space()); int point_count = Physics2DServer::get_singleton()->space_get_contact_count(find_world_2d()->get_space()); @@ -488,13 +488,13 @@ void Viewport::_notification(int p_what) { VisualServer::get_singleton()->multimesh_instance_set_transform(contact_3d_debug_multimesh,i,t); } aabb.grow(aabb.get_longest_axis_size()*0.01); - VisualServer::get_singleton()->multimesh_set_aabb(contact_3d_debug_multimesh,aabb); + VisualServer::get_singleton()->multimesh_set_custom_aabb(contact_3d_debug_multimesh,aabb); } } - if (physics_object_picking && (render_target || Input::get_singleton()->get_mouse_mode()!=Input::MOUSE_MODE_CAPTURED)) { + if (physics_object_picking && (to_screen_rect==Rect2() || Input::get_singleton()->get_mouse_mode()!=Input::MOUSE_MODE_CAPTURED)) { Vector2 last_pos(1e20,1e20); CollisionObject *last_object; @@ -708,15 +708,18 @@ RID Viewport::get_viewport() const { return viewport; } -void Viewport::set_rect(const Rect2& p_rect) { +void Viewport::set_size(const Size2 &p_size) { - if (rect==p_rect) + if (size==p_size.floor()) return; - rect=p_rect; + size=p_size.floor(); + VS::get_singleton()->viewport_set_size(viewport,size.width,size.height); _update_rect(); _update_stretch_transform(); + emit_signal("size_changed"); + } Rect2 Viewport::get_visible_rect() const { @@ -724,12 +727,12 @@ Rect2 Viewport::get_visible_rect() const { Rect2 r; - if (rect.pos==Vector2() && rect.size==Size2()) { + if (size==Size2()) { r=Rect2( Point2(), Size2( OS::get_singleton()->get_video_mode().width, OS::get_singleton()->get_video_mode().height ) ); } else { - r=Rect2( rect.pos , rect.size ); + r=Rect2( Point2() , size ); } if (size_override) { @@ -740,9 +743,9 @@ Rect2 Viewport::get_visible_rect() const { return r; } -Rect2 Viewport::get_rect() const { +Size2 Viewport::get_size() const { - return rect; + return size; } @@ -1219,10 +1222,10 @@ bool Viewport::is_size_override_stretch_enabled() const { return size_override_stretch; } - +#if 0 void Viewport::set_as_render_target(bool p_enable){ - if (render_target==p_enable) +/* if (render_target==p_enable) return; render_target=p_enable; @@ -1238,117 +1241,114 @@ void Viewport::set_as_render_target(bool p_enable){ if (p_enable) { - render_target_texture_rid = VS::get_singleton()->viewport_get_render_target_texture(viewport); + texture_rid = VS::get_singleton()->viewport_get_texture(viewport); } else { - render_target_texture_rid=RID(); + texture_rid=RID(); } - render_target_texture->set_flags(render_target_texture->flags); - render_target_texture->emit_changed(); + texture->set_flags(texture->flags); + texture->emit_changed(); update_configuration_warning(); + */ } bool Viewport::is_set_as_render_target() const{ return render_target; -} -void Viewport::set_render_target_update_mode(RenderTargetUpdateMode p_mode){ - - render_target_update_mode=p_mode; - VS::get_singleton()->viewport_set_render_target_update_mode(viewport,VS::RenderTargetUpdateMode(p_mode)); } -Viewport::RenderTargetUpdateMode Viewport::get_render_target_update_mode() const{ +#endif +void Viewport::set_update_mode(UpdateMode p_mode){ + + update_mode=p_mode; +// VS::get_singleton()->viewport_set_update_mode(viewport,VS::RenderTargetUpdateMode(p_mode)); - return render_target_update_mode; } -//RID get_render_target_texture() const; +Viewport::UpdateMode Viewport::get_update_mode() const{ + + return update_mode; +} +//RID get_texture() const; void Viewport::queue_screen_capture(){ - VS::get_singleton()->viewport_queue_screen_capture(viewport); + //VS::get_singleton()->viewport_queue_screen_capture(viewport); } Image Viewport::get_screen_capture() const { - return VS::get_singleton()->viewport_get_screen_capture(viewport); +// return VS::get_singleton()->viewport_get_screen_capture(viewport); + return Image(); } -Ref Viewport::get_render_target_texture() const { +Ref Viewport::get_texture() const { - return render_target_texture; + return texture; } -void Viewport::set_render_target_vflip(bool p_enable) { +void Viewport::set_vflip(bool p_enable) { - render_target_vflip=p_enable; - VisualServer::get_singleton()->viewport_set_render_target_vflip(viewport,p_enable); + vflip=p_enable; +// VisualServer::get_singleton()->viewport_set_vflip(viewport,p_enable); } -bool Viewport::get_render_target_vflip() const{ +bool Viewport::get_vflip() const{ - return render_target_vflip; + return vflip; } -void Viewport::set_render_target_clear_on_new_frame(bool p_enable) { +void Viewport::set_clear_on_new_frame(bool p_enable) { - render_target_clear_on_new_frame=p_enable; - VisualServer::get_singleton()->viewport_set_render_target_clear_on_new_frame(viewport,p_enable); + clear_on_new_frame=p_enable; + //VisualServer::get_singleton()->viewport_set_clear_on_new_frame(viewport,p_enable); } -bool Viewport::get_render_target_clear_on_new_frame() const{ +bool Viewport::get_clear_on_new_frame() const{ - return render_target_clear_on_new_frame; + return clear_on_new_frame; } -void Viewport::render_target_clear() { +void Viewport::clear() { - //render_target_clear=true; - VisualServer::get_singleton()->viewport_render_target_clear(viewport); + //clear=true; +// VisualServer::get_singleton()->viewport_clear(viewport); } -void Viewport::set_render_target_filter(bool p_enable) { +void Viewport::set_filter(bool p_enable) { - if(!render_target) - return; - - render_target_texture->set_flags(p_enable?int(Texture::FLAG_FILTER):int(0)); + texture->set_flags(p_enable?int(Texture::FLAG_FILTER):int(0)); } -bool Viewport::get_render_target_filter() const{ +bool Viewport::get_filter() const{ - return (render_target_texture->get_flags()&Texture::FLAG_FILTER)!=0; + return (texture->get_flags()&Texture::FLAG_FILTER)!=0; } -void Viewport::set_render_target_gen_mipmaps(bool p_enable) { +void Viewport::set_gen_mipmaps(bool p_enable) { - //render_target_texture->set_flags(p_enable?int(Texture::FLAG_FILTER):int(0)); - render_target_gen_mipmaps=p_enable; + //texture->set_flags(p_enable?int(Texture::FLAG_FILTER):int(0)); + gen_mipmaps=p_enable; } -bool Viewport::get_render_target_gen_mipmaps() const{ +bool Viewport::get_gen_mipmaps() const{ - //return (render_target_texture->get_flags()&Texture::FLAG_FILTER)!=0; - return render_target_gen_mipmaps; + //return (texture->get_flags()&Texture::FLAG_FILTER)!=0; + return gen_mipmaps; } Matrix32 Viewport::_get_input_pre_xform() const { Matrix32 pre_xf; - if (render_target) { - if (to_screen_rect!=Rect2()) { - pre_xf.elements[2]=-to_screen_rect.pos; - pre_xf.scale(rect.size/to_screen_rect.size); - } - } else { + if (to_screen_rect!=Rect2()) { - pre_xf.elements[2]=-rect.pos; + pre_xf.elements[2]=-to_screen_rect.pos; + pre_xf.scale(size/to_screen_rect.size); } return pre_xf; @@ -1457,7 +1457,7 @@ void Viewport::_vp_input(const InputEvent& p_ev) { if (parent_control && !parent_control->is_visible()) return; - if (render_target && to_screen_rect==Rect2()) + if (to_screen_rect==Rect2()) return; //if render target, can't get input events //this one handles system input, p_ev are in system coordinates @@ -1483,7 +1483,7 @@ void Viewport::_vp_unhandled_input(const InputEvent& p_ev) { if (parent_control && !parent_control->is_visible()) return; - if (render_target && to_screen_rect==Rect2()) + if (to_screen_rect==Rect2()) return; //if render target, can't get input events //this one handles system input, p_ev are in system coordinates @@ -2518,17 +2518,18 @@ bool Viewport::is_using_own_world() const { return own_world.is_valid(); } -void Viewport::set_render_target_to_screen_rect(const Rect2& p_rect) { +void Viewport::set_attach_to_screen_rect(const Rect2& p_rect) { + VS::get_singleton()->viewport_attach_to_screen(viewport,p_rect); to_screen_rect=p_rect; - VisualServer::get_singleton()->viewport_set_render_target_to_screen_rect(viewport,to_screen_rect); } -Rect2 Viewport::get_render_target_to_screen_rect() const{ +Rect2 Viewport::get_attach_to_screen_rect() const{ return to_screen_rect; } + void Viewport::set_physics_object_picking(bool p_enable) { physics_object_picking=p_enable; @@ -2584,19 +2585,28 @@ Control *Viewport::get_modal_stack_top() const { String Viewport::get_configuration_warning() const { - if (get_parent() && !get_parent()->cast_to() && !render_target) { + /*if (get_parent() && !get_parent()->cast_to() && !render_target) { return TTR("This viewport is not set as render target. If you intend for it to display its contents directly to the screen, make it a child of a Control so it can obtain a size. Otherwise, make it a RenderTarget and assign its internal texture to some node for display."); - } + }*/ return String(); } +void Viewport::gui_reset_canvas_sort_index() { + gui.canvas_sort_index=0; +} +int Viewport::gui_get_canvas_sort_index() { + + return gui.canvas_sort_index++; +} + + void Viewport::_bind_methods() { - ObjectTypeDB::bind_method(_MD("set_rect","rect"), &Viewport::set_rect); - ObjectTypeDB::bind_method(_MD("get_rect"), &Viewport::get_rect); + ObjectTypeDB::bind_method(_MD("set_size","size"), &Viewport::set_size); + ObjectTypeDB::bind_method(_MD("get_size"), &Viewport::get_size); ObjectTypeDB::bind_method(_MD("set_world_2d","world_2d:World2D"), &Viewport::set_world_2d); ObjectTypeDB::bind_method(_MD("get_world_2d:World2D"), &Viewport::get_world_2d); ObjectTypeDB::bind_method(_MD("find_world_2d:World2D"), &Viewport::find_world_2d); @@ -2630,27 +2640,25 @@ void Viewport::_bind_methods() { ObjectTypeDB::bind_method(_MD("queue_screen_capture"), &Viewport::queue_screen_capture); ObjectTypeDB::bind_method(_MD("get_screen_capture"), &Viewport::get_screen_capture); - ObjectTypeDB::bind_method(_MD("set_as_render_target","enable"), &Viewport::set_as_render_target); - ObjectTypeDB::bind_method(_MD("is_set_as_render_target"), &Viewport::is_set_as_render_target); - ObjectTypeDB::bind_method(_MD("set_render_target_vflip","enable"), &Viewport::set_render_target_vflip); - ObjectTypeDB::bind_method(_MD("get_render_target_vflip"), &Viewport::get_render_target_vflip); + ObjectTypeDB::bind_method(_MD("set_vflip","enable"), &Viewport::set_vflip); + ObjectTypeDB::bind_method(_MD("get_vflip"), &Viewport::get_vflip); - ObjectTypeDB::bind_method(_MD("set_render_target_clear_on_new_frame","enable"), &Viewport::set_render_target_clear_on_new_frame); - ObjectTypeDB::bind_method(_MD("get_render_target_clear_on_new_frame"), &Viewport::get_render_target_clear_on_new_frame); + ObjectTypeDB::bind_method(_MD("set_clear_on_new_frame","enable"), &Viewport::set_clear_on_new_frame); + ObjectTypeDB::bind_method(_MD("get_clear_on_new_frame"), &Viewport::get_clear_on_new_frame); - ObjectTypeDB::bind_method(_MD("render_target_clear"), &Viewport::render_target_clear); + ObjectTypeDB::bind_method(_MD("clear"), &Viewport::clear); - ObjectTypeDB::bind_method(_MD("set_render_target_filter","enable"), &Viewport::set_render_target_filter); - ObjectTypeDB::bind_method(_MD("get_render_target_filter"), &Viewport::get_render_target_filter); + ObjectTypeDB::bind_method(_MD("set_filter","enable"), &Viewport::set_filter); + ObjectTypeDB::bind_method(_MD("get_filter"), &Viewport::get_filter); - ObjectTypeDB::bind_method(_MD("set_render_target_gen_mipmaps","enable"), &Viewport::set_render_target_gen_mipmaps); - ObjectTypeDB::bind_method(_MD("get_render_target_gen_mipmaps"), &Viewport::get_render_target_gen_mipmaps); + ObjectTypeDB::bind_method(_MD("set_gen_mipmaps","enable"), &Viewport::set_gen_mipmaps); + ObjectTypeDB::bind_method(_MD("get_gen_mipmaps"), &Viewport::get_gen_mipmaps); - ObjectTypeDB::bind_method(_MD("set_render_target_update_mode","mode"), &Viewport::set_render_target_update_mode); - ObjectTypeDB::bind_method(_MD("get_render_target_update_mode"), &Viewport::get_render_target_update_mode); + ObjectTypeDB::bind_method(_MD("set_update_mode","mode"), &Viewport::set_update_mode); + ObjectTypeDB::bind_method(_MD("get_update_mode"), &Viewport::get_update_mode); - ObjectTypeDB::bind_method(_MD("get_render_target_texture:RenderTargetTexture"), &Viewport::get_render_target_texture); + ObjectTypeDB::bind_method(_MD("get_texture:RenderTargetTexture"), &Viewport::get_texture); ObjectTypeDB::bind_method(_MD("set_physics_object_picking","enable"), &Viewport::set_physics_object_picking); ObjectTypeDB::bind_method(_MD("get_physics_object_picking"), &Viewport::get_physics_object_picking); @@ -2671,7 +2679,7 @@ void Viewport::_bind_methods() { ObjectTypeDB::bind_method(_MD("set_as_audio_listener_2d","enable"), &Viewport::set_as_audio_listener_2d); ObjectTypeDB::bind_method(_MD("is_audio_listener_2d","enable"), &Viewport::is_audio_listener_2d); - ObjectTypeDB::bind_method(_MD("set_render_target_to_screen_rect","rect"), &Viewport::set_render_target_to_screen_rect); + ObjectTypeDB::bind_method(_MD("set_attach_to_screen_rect","rect"), &Viewport::set_attach_to_screen_rect); ObjectTypeDB::bind_method(_MD("get_mouse_pos"), &Viewport::get_mouse_pos); ObjectTypeDB::bind_method(_MD("warp_mouse","to_pos"), &Viewport::warp_mouse); @@ -2685,17 +2693,16 @@ void Viewport::_bind_methods() { ObjectTypeDB::bind_method(_MD("_gui_show_tooltip"), &Viewport::_gui_show_tooltip); ObjectTypeDB::bind_method(_MD("_gui_remove_focus"), &Viewport::_gui_remove_focus); - ADD_PROPERTY( PropertyInfo(Variant::RECT2,"rect"), _SCS("set_rect"), _SCS("get_rect") ); + ADD_PROPERTY( PropertyInfo(Variant::RECT2,"size"), _SCS("set_size"), _SCS("get_size") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"own_world"), _SCS("set_use_own_world"), _SCS("is_using_own_world") ); ADD_PROPERTY( PropertyInfo(Variant::OBJECT,"world",PROPERTY_HINT_RESOURCE_TYPE,"World"), _SCS("set_world"), _SCS("get_world") ); // ADD_PROPERTY( PropertyInfo(Variant::OBJECT,"world_2d",PROPERTY_HINT_RESOURCE_TYPE,"World2D"), _SCS("set_world_2d"), _SCS("get_world_2d") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"transparent_bg"), _SCS("set_transparent_background"), _SCS("has_transparent_background") ); - ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/enabled"), _SCS("set_as_render_target"), _SCS("is_set_as_render_target") ); - ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/v_flip"), _SCS("set_render_target_vflip"), _SCS("get_render_target_vflip") ); - ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/clear_on_new_frame"), _SCS("set_render_target_clear_on_new_frame"), _SCS("get_render_target_clear_on_new_frame") ); - ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/filter"), _SCS("set_render_target_filter"), _SCS("get_render_target_filter") ); - ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/gen_mipmaps"), _SCS("set_render_target_gen_mipmaps"), _SCS("get_render_target_gen_mipmaps") ); - ADD_PROPERTY( PropertyInfo(Variant::INT,"render_target/update_mode",PROPERTY_HINT_ENUM,"Disabled,Once,When Visible,Always"), _SCS("set_render_target_update_mode"), _SCS("get_render_target_update_mode") ); + ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/v_flip"), _SCS("set_vflip"), _SCS("get_vflip") ); + ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/clear_on_new_frame"), _SCS("set_clear_on_new_frame"), _SCS("get_clear_on_new_frame") ); + ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/filter"), _SCS("set_filter"), _SCS("get_filter") ); + ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/gen_mipmaps"), _SCS("set_gen_mipmaps"), _SCS("get_gen_mipmaps") ); + ADD_PROPERTY( PropertyInfo(Variant::INT,"render_target/update_mode",PROPERTY_HINT_ENUM,"Disabled,Once,When Visible,Always"), _SCS("set_update_mode"), _SCS("get_update_mode") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"audio_listener/enable_2d"), _SCS("set_as_audio_listener_2d"), _SCS("is_audio_listener_2d") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"audio_listener/enable_3d"), _SCS("set_as_audio_listener"), _SCS("is_audio_listener") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"physics/object_picking"), _SCS("set_physics_object_picking"), _SCS("get_physics_object_picking") ); @@ -2703,10 +2710,10 @@ void Viewport::_bind_methods() { ADD_SIGNAL(MethodInfo("size_changed")); - BIND_CONSTANT( RENDER_TARGET_UPDATE_DISABLED ); - BIND_CONSTANT( RENDER_TARGET_UPDATE_ONCE ); - BIND_CONSTANT( RENDER_TARGET_UPDATE_WHEN_VISIBLE ); - BIND_CONSTANT( RENDER_TARGET_UPDATE_ALWAYS ); + BIND_CONSTANT( UPDATE_DISABLED ); + BIND_CONSTANT( UPDATE_ONCE ); + BIND_CONSTANT( UPDATE_WHEN_VISIBLE ); + BIND_CONSTANT( UPDATE_ALWAYS ); } @@ -2731,13 +2738,13 @@ Viewport::Viewport() { size_override=false; size_override_stretch=false; size_override_size=Size2(1,1); - render_target_gen_mipmaps=false; - render_target=false; - render_target_vflip=false; - render_target_clear_on_new_frame=true; - //render_target_clear=true; - render_target_update_mode=RENDER_TARGET_UPDATE_WHEN_VISIBLE; - render_target_texture = Ref( memnew( RenderTargetTexture(this) ) ); + gen_mipmaps=false; + + vflip=false; + clear_on_new_frame=true; + //clear=true; + update_mode=UPDATE_WHEN_VISIBLE; + texture = Ref( memnew( RenderTargetTexture(this) ) ); physics_object_picking=false; physics_object_capture=0; @@ -2763,6 +2770,7 @@ Viewport::Viewport() { gui.tooltip_label=NULL; gui.drag_preview=NULL; gui.drag_attempted=false; + gui.canvas_sort_index=0; parent_control=NULL; @@ -2776,8 +2784,8 @@ Viewport::~Viewport() { VisualServer::get_singleton()->free( viewport ); SpatialSoundServer::get_singleton()->free(internal_listener); SpatialSound2DServer::get_singleton()->free(internal_listener_2d); - if (render_target_texture.is_valid()) - render_target_texture->vp=NULL; //so if used, will crash + if (texture.is_valid()) + texture->vp=NULL; //so if used, will crash } diff --git a/scene/main/viewport.h b/scene/main/viewport.h index f657f0507d2..d610237bec8 100644 --- a/scene/main/viewport.h +++ b/scene/main/viewport.h @@ -79,11 +79,11 @@ class Viewport : public Node { OBJ_TYPE( Viewport, Node ); public: - enum RenderTargetUpdateMode { - RENDER_TARGET_UPDATE_DISABLED, - RENDER_TARGET_UPDATE_ONCE, //then goes to disabled - RENDER_TARGET_UPDATE_WHEN_VISIBLE, // default - RENDER_TARGET_UPDATE_ALWAYS + enum UpdateMode { + UPDATE_DISABLED, + UPDATE_ONCE, //then goes to disabled + UPDATE_WHEN_VISIBLE, // default + UPDATE_ALWAYS }; private: @@ -114,7 +114,7 @@ friend class RenderTargetTexture; Matrix32 global_canvas_transform; Matrix32 stretch_transform; - Rect2 rect; + Size2 size; Rect2 to_screen_rect; RID contact_2d_debug; @@ -131,10 +131,10 @@ friend class RenderTargetTexture; Rect2 last_vp_rect; bool transparent_bg; - bool render_target_vflip; - bool render_target_clear_on_new_frame; - bool render_target_filter; - bool render_target_gen_mipmaps; + bool vflip; + bool clear_on_new_frame; + bool filter; + bool gen_mipmaps; bool physics_object_picking; List physics_picking_events; @@ -170,10 +170,10 @@ friend class RenderTargetTexture; void _update_stretch_transform(); void _update_global_transform(); - bool render_target; - RenderTargetUpdateMode render_target_update_mode; - RID render_target_texture_rid; - Ref render_target_texture; + + UpdateMode update_mode; + RID texture_rid; + Ref texture; struct GUI { @@ -202,6 +202,7 @@ friend class RenderTargetTexture; List subwindows; bool roots_order_dirty; List roots; + int canvas_sort_index; //for sorting items with canvas as root GUI(); @@ -299,8 +300,10 @@ public: void set_as_audio_listener_2d(bool p_enable); bool is_audio_listener_2d() const; - void set_rect(const Rect2& p_rect); - Rect2 get_rect() const; + void set_size(const Size2& p_size); + + + Size2 get_size() const; Rect2 get_visible_rect() const; RID get_viewport() const; @@ -327,29 +330,27 @@ public: void set_size_override(bool p_enable,const Size2& p_size=Size2(-1,-1),const Vector2& p_margin=Vector2()); Size2 get_size_override() const; + bool is_size_override_enabled() const; void set_size_override_stretch(bool p_enable); bool is_size_override_stretch_enabled() const; - void set_as_render_target(bool p_enable); - bool is_set_as_render_target() const; + void set_vflip(bool p_enable); + bool get_vflip() const; - void set_render_target_vflip(bool p_enable); - bool get_render_target_vflip() const; + void set_clear_on_new_frame(bool p_enable); + bool get_clear_on_new_frame() const; + void clear(); - void set_render_target_clear_on_new_frame(bool p_enable); - bool get_render_target_clear_on_new_frame() const; - void render_target_clear(); + void set_filter(bool p_enable); + bool get_filter() const; - void set_render_target_filter(bool p_enable); - bool get_render_target_filter() const; + void set_gen_mipmaps(bool p_enable); + bool get_gen_mipmaps() const; - void set_render_target_gen_mipmaps(bool p_enable); - bool get_render_target_gen_mipmaps() const; - - void set_render_target_update_mode(RenderTargetUpdateMode p_mode); - RenderTargetUpdateMode get_render_target_update_mode() const; - Ref get_render_target_texture() const; + void set_update_mode(UpdateMode p_mode); + UpdateMode get_update_mode() const; + Ref get_texture() const; Vector2 get_camera_coords(const Vector2& p_viewport_coords) const; @@ -367,8 +368,8 @@ public: void set_disable_input(bool p_disable); bool is_input_disabled() const; - void set_render_target_to_screen_rect(const Rect2& p_rect); - Rect2 get_render_target_to_screen_rect() const; + void set_attach_to_screen_rect(const Rect2& p_rect); + Rect2 get_attach_to_screen_rect() const; Vector2 get_mouse_pos() const; void warp_mouse(const Vector2& p_pos); @@ -381,12 +382,17 @@ public: Variant gui_get_drag_data() const; Control *get_modal_stack_top() const; + void gui_reset_canvas_sort_index(); + int gui_get_canvas_sort_index(); + virtual String get_configuration_warning() const; + + Viewport(); ~Viewport(); }; -VARIANT_ENUM_CAST(Viewport::RenderTargetUpdateMode); +VARIANT_ENUM_CAST(Viewport::UpdateMode); #endif diff --git a/scene/register_scene_types.cpp b/scene/register_scene_types.cpp index 104aeb2b5ee..dcb2edf8724 100644 --- a/scene/register_scene_types.cpp +++ b/scene/register_scene_types.cpp @@ -235,7 +235,6 @@ static ResourceFormatLoaderWAV *resource_loader_wav=NULL; #endif static ResourceFormatLoaderTheme *resource_loader_theme=NULL; -static ResourceFormatLoaderShader *resource_loader_shader=NULL; static ResourceFormatSaverText *resource_saver_text=NULL; static ResourceFormatLoaderText *resource_loader_text=NULL; @@ -269,8 +268,6 @@ void register_scene_types() { resource_loader_theme = memnew( ResourceFormatLoaderTheme ); ResourceLoader::add_resource_format_loader( resource_loader_theme ); - resource_loader_shader = memnew( ResourceFormatLoaderShader ); - ResourceLoader::add_resource_format_loader( resource_loader_shader ); bool default_theme_hidpi=GLOBAL_DEF("display/use_hidpi_theme",false); Globals::get_singleton()->set_custom_property_info("display/use_hidpi_theme",PropertyInfo(Variant::BOOL,"display/use_hidpi_theme",PROPERTY_HINT_NONE,"",PROPERTY_USAGE_DEFAULT|PROPERTY_USAGE_RESTART_IF_CHANGED)); @@ -424,7 +421,7 @@ void register_scene_types() { ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); + //ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); @@ -453,8 +450,8 @@ void register_scene_types() { ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); + //ObjectTypeDB::register_type(); + //ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); @@ -489,7 +486,7 @@ void register_scene_types() { ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); +// ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); @@ -538,17 +535,17 @@ void register_scene_types() { /* REGISTER RESOURCES */ ObjectTypeDB::register_virtual_type(); - ObjectTypeDB::register_virtual_type(); +// ObjectTypeDB::register_virtual_type(); ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); +// ObjectTypeDB::register_type(); #ifndef _3D_DISABLED ObjectTypeDB::register_type(); ObjectTypeDB::register_virtual_type(); ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); +// ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); +// ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::add_compatibility_type("Shader","MaterialShader"); ObjectTypeDB::add_compatibility_type("ParticleSystemMaterial","FixedMaterial"); @@ -568,7 +565,7 @@ void register_scene_types() { ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); + //ObjectTypeDB::register_type(); OS::get_singleton()->yield(); //may take time to init @@ -590,8 +587,7 @@ void register_scene_types() { ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); + ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::add_compatibility_type("Font","BitmapFont"); @@ -664,7 +660,6 @@ void unregister_scene_types() { memdelete( resource_loader_theme ); - memdelete( resource_loader_shader ); if (resource_saver_text) { memdelete(resource_saver_text); diff --git a/scene/resources/baked_light.cpp b/scene/resources/baked_light.cpp index e4510be8743..38ed661cdd6 100644 --- a/scene/resources/baked_light.cpp +++ b/scene/resources/baked_light.cpp @@ -29,6 +29,8 @@ #include "baked_light.h" #include "servers/visual_server.h" +#if 0 + void BakedLight::set_mode(Mode p_mode) { mode=p_mode; @@ -582,7 +584,7 @@ BakedLight::BakedLight() { tint=0.0; ao_radius=2.5; ao_strength=0.7; - format=FORMAT_RGB; + format=FORMAT_RGB8; transfer_only_uv2=false; @@ -600,3 +602,4 @@ BakedLight::~BakedLight() { VS::get_singleton()->free(baked_light); } +#endif diff --git a/scene/resources/baked_light.h b/scene/resources/baked_light.h index 16806d29e35..0eaa3df276a 100644 --- a/scene/resources/baked_light.h +++ b/scene/resources/baked_light.h @@ -32,6 +32,7 @@ #include "resource.h" #include "scene/resources/texture.h" +#if 0 class BakedLight : public Resource { OBJ_TYPE( BakedLight, Resource); @@ -44,7 +45,7 @@ public: enum Format { - FORMAT_RGB, + FORMAT_RGB8, FORMAT_HDR8, FORMAT_HDR16 }; @@ -195,5 +196,5 @@ public: VARIANT_ENUM_CAST(BakedLight::Format); VARIANT_ENUM_CAST(BakedLight::Mode); VARIANT_ENUM_CAST(BakedLight::BakeFlags); - +#endif #endif // BAKED_LIGHT_H diff --git a/scene/resources/bit_mask.cpp b/scene/resources/bit_mask.cpp index f5bfce3ef8e..57dc39138ad 100644 --- a/scene/resources/bit_mask.cpp +++ b/scene/resources/bit_mask.cpp @@ -45,8 +45,8 @@ void BitMap::create_from_image_alpha(const Image& p_image){ ERR_FAIL_COND(p_image.empty()); Image img=p_image; - img.convert(Image::FORMAT_INTENSITY); - ERR_FAIL_COND(img.get_format()!=Image::FORMAT_INTENSITY); + img.convert(Image::FORMAT_LA8); + ERR_FAIL_COND(img.get_format()!=Image::FORMAT_LA8); create(Size2(img.get_width(),img.get_height())); @@ -58,7 +58,7 @@ void BitMap::create_from_image_alpha(const Image& p_image){ int bbyte = i/8; int bbit = i % 8; - if (r[i]) + if (r[i*2]) w[bbyte]|=(1<-{AmZ?FaB#`1P@Q5sCVBi)8VMc~ob0mO* z>?NMQuIvvuC3sXMR&LhW4ir-Mba4#HxcBy;AtR7?Xu((iTweYKKqegQ<23{tZrUR9 P2qfg`>gTe~DWM4f%)S|| literal 0 HcmV?d00001 diff --git a/scene/resources/default_theme/color_picker_main.png b/scene/resources/default_theme/color_picker_main.png new file mode 100644 index 0000000000000000000000000000000000000000..04986281807584065047119a3a5fe09f63712e1b GIT binary patch literal 310 zcmeAS@N?(olHy`uVBq!ia0y~yU<5K58911L)MWvCLmEaktaqI0tLy#GV7JT)8o65Za$Q;6;L7GvQfpOmH RLvKNz@O1TaS?83{1OTce9OnQ4 literal 0 HcmV?d00001 diff --git a/scene/resources/default_theme/color_picker_sample.png b/scene/resources/default_theme/color_picker_sample.png new file mode 100644 index 0000000000000000000000000000000000000000..b145a3e38420180fb3ebe5e7ec24c95fa18288a3 GIT binary patch literal 194 zcmeAS@N?(olHy`uVBq!ia0y~yU<5K5L^zm$rkKg71jM01{+ zH?#ERyWf$aK`#%#j1=%;3}6Ug;P~=W)N^vMFoU*$27?Ae%1yn4m!4c make_stylebox(T p_src,float p_left, float p_top, flo if (scale>1) { Size2 orig_size = Size2(img.get_width(),img.get_height()); - img.convert(Image::FORMAT_RGBA); + img.convert(Image::FORMAT_RGBA8); img.expand_x2_hq2x(); if (scale!=2.0) { img.resize(orig_size.x*scale,orig_size.y*scale); } } else if (scale<1) { Size2 orig_size = Size2(img.get_width(),img.get_height()); - img.convert(Image::FORMAT_RGBA); + img.convert(Image::FORMAT_RGBA8); img.resize(orig_size.x*scale,orig_size.y*scale); } @@ -108,14 +108,14 @@ static Ref make_icon(T p_src) { if (scale>1) { Size2 orig_size = Size2(img.get_width(),img.get_height()); - img.convert(Image::FORMAT_RGBA); + img.convert(Image::FORMAT_RGBA8); img.expand_x2_hq2x(); if (scale!=2.0) { img.resize(orig_size.x*scale,orig_size.y*scale); } } else if (scale<1) { Size2 orig_size = Size2(img.get_width(),img.get_height()); - img.convert(Image::FORMAT_RGBA); + img.convert(Image::FORMAT_RGBA8); img.resize(orig_size.x*scale,orig_size.y*scale); } texture->create_from_image( img,ImageTexture::FLAG_FILTER ); @@ -125,7 +125,7 @@ static Ref make_icon(T p_src) { static Ref make_shader(const char*vertex_code,const char*fragment_code,const char*lighting_code) { Ref shader = (memnew( Shader(Shader::MODE_CANVAS_ITEM) )); - shader->set_code(vertex_code, fragment_code, lighting_code); +// shader->set_code(vertex_code, fragment_code, lighting_code); return shader; } @@ -214,6 +214,8 @@ static Ref make_empty_stylebox(float p_margin_left=-1, float p_margin_ return style; } + + void fill_default_theme(Ref& t, const Ref & default_font, const Ref & large_font, Ref& default_icon, Ref& default_style, float p_scale) { scale=p_scale; @@ -832,10 +834,15 @@ void fill_default_theme(Ref& t, const Ref & default_font, const Ref t->set_icon("screen_picker","ColorPicker", make_icon( icon_color_pick_png ) ); t->set_icon("add_preset","ColorPicker", make_icon( icon_add_png ) ); + t->set_icon("color_area", "ColorPicker", make_icon( color_picker_main_png)); + t->set_icon("color_hue", "ColorPicker", make_icon( color_picker_hue_png)); + t->set_icon("color_sample", "ColorPicker", make_icon( color_picker_sample_png)); t->set_shader("uv_editor", "ColorPicker", make_shader("", uv_editor_shader_code, "")); t->set_shader("w_editor", "ColorPicker", make_shader("", w_editor_shader_code, "")); + + // TooltipPanel Ref style_tt = make_stylebox( tooltip_bg_png,4,4,4,4); @@ -962,7 +969,6 @@ void fill_default_theme(Ref& t, const Ref & default_font, const Ref t->set_icon( "logo","Icons", make_icon(logo_png) ); - // Theme default_icon= make_icon(error_icon_png) ; diff --git a/scene/resources/default_theme/theme_data.h b/scene/resources/default_theme/theme_data.h index 73c801483fc..913ea9b5e97 100644 --- a/scene/resources/default_theme/theme_data.h +++ b/scene/resources/default_theme/theme_data.h @@ -69,6 +69,21 @@ static const unsigned char close_hl_png[]={ }; +static const unsigned char color_picker_hue_png[]={ +0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x1,0x0,0x8,0x2,0x0,0x0,0x0,0x35,0x30,0x61,0x19,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xe0,0x9,0x18,0xc,0x24,0x18,0xa9,0xb3,0x2c,0xb7,0x0,0x0,0x0,0x23,0x49,0x44,0x41,0x54,0x68,0xde,0xed,0xc1,0x31,0x1,0x0,0x0,0x0,0xc2,0xa0,0xf5,0x4f,0x6d,0xd,0xf,0xa0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xbe,0xd,0x31,0x0,0x0,0x1,0x35,0x84,0x14,0xe2,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82 +}; + + +static const unsigned char color_picker_main_png[]={ +0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x1,0x0,0x0,0x0,0x1,0x0,0x8,0x2,0x0,0x0,0x0,0xd3,0x10,0x3f,0x31,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xe0,0x9,0x18,0xc,0x23,0x22,0x20,0xfe,0x63,0xc2,0x0,0x0,0x0,0xd5,0x49,0x44,0x41,0x54,0x78,0xda,0xed,0xc1,0x31,0x1,0x0,0x0,0x0,0xc2,0xa0,0xf5,0x4f,0xed,0x65,0xb,0xa0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x80,0x1b,0x1,0x2d,0x0,0x1,0x9e,0xcb,0xc2,0xed,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82 +}; + + +static const unsigned char color_picker_sample_png[]={ +0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x14,0x8,0x2,0x0,0x0,0x0,0xed,0x20,0x74,0x8,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xe0,0x9,0x18,0xc,0x27,0x37,0x29,0x4f,0x42,0x2d,0x0,0x0,0x0,0x61,0x49,0x44,0x41,0x54,0x68,0xde,0xed,0xd9,0xb1,0xd,0x0,0x21,0xc,0x3,0x40,0x40,0xbf,0x5f,0x66,0xcd,0x84,0xf9,0x96,0x19,0xf0,0x5d,0x87,0x5c,0x5b,0x9,0xca,0x9e,0x99,0x75,0xe9,0xee,0xfb,0x59,0x55,0x52,0xe9,0xc3,0xe9,0x59,0x10,0x4c,0x1,0x50,0x0,0x50,0x0,0x8,0xf4,0xf9,0x15,0x49,0x93,0x53,0x13,0x0,0x2b,0x10,0x28,0x0,0x28,0x0,0x64,0xd9,0x2e,0xc1,0xd2,0xe4,0xd4,0x4,0xc0,0xa,0x4,0xa,0x0,0xa,0x0,0x59,0x5c,0x82,0xa5,0xd1,0xa9,0x9,0x80,0x15,0x8,0x14,0x0,0x14,0x0,0xb2,0xfc,0x5b,0xb2,0x3c,0x5a,0x4,0xa1,0xf3,0x57,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82 +}; + + static const unsigned char dosfont_png[]={ 0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x80,0x0,0x0,0x0,0x80,0x1,0x0,0x0,0x0,0x0,0xeb,0x45,0x5c,0x66,0x0,0x0,0x0,0x4,0x67,0x41,0x4d,0x41,0x0,0x0,0xb1,0x8f,0xb,0xfc,0x61,0x5,0x0,0x0,0x0,0x20,0x63,0x48,0x52,0x4d,0x0,0x0,0x7a,0x26,0x0,0x0,0x80,0x84,0x0,0x0,0xfa,0x0,0x0,0x0,0x80,0xe8,0x0,0x0,0x75,0x30,0x0,0x0,0xea,0x60,0x0,0x0,0x3a,0x98,0x0,0x0,0x17,0x70,0x9c,0xba,0x51,0x3c,0x0,0x0,0x0,0x2,0x74,0x52,0x4e,0x53,0x0,0x0,0x76,0x93,0xcd,0x38,0x0,0x0,0x0,0x2,0x62,0x4b,0x47,0x44,0x0,0x1,0xdd,0x8a,0x13,0xa4,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x89,0x0,0x0,0xb,0x89,0x1,0x37,0xc9,0xcb,0xad,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xe0,0x6,0x16,0x12,0x2b,0x5,0x39,0x1a,0x32,0x39,0x0,0x0,0x2,0x83,0x49,0x44,0x41,0x54,0x48,0xc7,0xed,0xd4,0xb1,0x6e,0xdb,0x30,0x10,0x0,0x50,0x22,0x3,0x27,0x22,0xc8,0x78,0x83,0x91,0xa9,0x1f,0xc0,0xa9,0x10,0xa,0x7e,0xc,0x11,0x14,0x87,0xc,0x1c,0x32,0x9,0x1a,0xe,0x46,0xa6,0xfc,0x43,0xff,0x86,0xb5,0x80,0x9b,0x88,0x8e,0x5d,0x64,0x18,0x9e,0xdc,0xd5,0x53,0x91,0xc1,0xa0,0x7a,0xa4,0xe4,0xd4,0x31,0xd2,0x25,0x70,0x97,0xa2,0x37,0x48,0xe6,0x33,0x45,0xdd,0x51,0x24,0x95,0x7a,0x23,0xe0,0x75,0x13,0xb,0xd8,0x93,0xbf,0x51,0x51,0xa3,0xac,0x99,0xc9,0x29,0x87,0x81,0x83,0xb2,0x0,0xc7,0xfe,0xee,0x43,0x58,0x85,0x95,0xb7,0xa6,0xb6,0xaf,0x7a,0xe9,0x93,0x63,0xc3,0xf2,0xc,0x96,0x3e,0xba,0x97,0x11,0x3,0xb5,0x46,0xc0,0x15,0x30,0x43,0x1,0xbd,0x6,0x81,0x71,0xa9,0xb2,0x82,0x9,0x92,0x3d,0xf6,0xb0,0xbd,0x5c,0xf2,0x53,0xf2,0x75,0xc,0x11,0x5f,0xc7,0xe0,0xc4,0xaa,0xb4,0x40,0x41,0xc4,0x69,0xd0,0x27,0x55,0x12,0x13,0x78,0x74,0xa7,0xb5,0xd8,0x3f,0x14,0x77,0x81,0x0,0x22,0x93,0x9b,0x4c,0x54,0x5b,0x72,0x6d,0x98,0x17,0xd1,0x33,0xb3,0x94,0xaa,0x3c,0x93,0xea,0xb4,0x76,0x31,0x6a,0x66,0x0,0xa9,0x59,0x1c,0x8c,0xe,0x33,0xc0,0x12,0x4c,0x29,0xc2,0xa5,0xc3,0xc1,0xd0,0xb2,0x64,0x6a,0x60,0xa3,0xc0,0xea,0xac,0x19,0x58,0x4b,0xa9,0x4a,0x17,0xf0,0xda,0x68,0xb6,0x5,0xec,0xb2,0xf6,0x88,0x33,0xc8,0x18,0x52,0xd5,0x5a,0x1,0xb3,0x61,0x1,0x53,0xdf,0x2,0x51,0x2d,0x33,0xdd,0x12,0x59,0xea,0x94,0x95,0x3c,0x80,0x2e,0x5e,0xf9,0xdb,0x71,0x73,0x70,0xcf,0x39,0x3b,0x76,0xb7,0xbb,0x7d,0xcf,0x74,0x50,0xd,0x62,0x40,0x44,0x6,0x83,0xfe,0xc7,0x8e,0x51,0x5,0x5c,0xe1,0xdd,0xdd,0xaa,0xc2,0xf8,0x53,0x80,0x31,0xe2,0xfd,0x7d,0x14,0x8,0x7e,0xcc,0x5,0x28,0x62,0xd7,0xc5,0xc,0xa6,0xf3,0xc3,0x46,0xa6,0x30,0xd7,0x1e,0x1b,0x2e,0xd0,0x7f,0x63,0x5f,0x1f,0xf1,0x32,0xc9,0x90,0x82,0xef,0xb9,0x82,0xc,0x5a,0x1,0xef,0x66,0x90,0xd7,0x7a,0x2c,0x80,0x13,0x94,0xc4,0xf6,0x9f,0xd8,0x75,0xbb,0x2c,0x89,0xed,0xff,0x42,0xe9,0xa7,0xfb,0xa4,0x84,0xec,0x13,0x45,0x7,0x1a,0xb9,0x97,0x18,0x65,0xab,0x4,0xf9,0x54,0x8c,0x3c,0x54,0xe8,0xed,0xa3,0x7c,0x7b,0x55,0xe0,0x8b,0x0,0xf6,0x4f,0x36,0xd6,0x3d,0xc3,0xe8,0x41,0xc0,0xa1,0xb1,0xdb,0x9,0xa8,0x29,0x0,0xe,0xec,0xc3,0x4,0xc1,0x8,0xc,0x0,0xd6,0x36,0xf3,0x23,0xba,0x80,0x3,0x6f,0x17,0x15,0xbe,0x4b,0xe5,0x8c,0x23,0xc2,0x57,0x7b,0x5d,0x61,0x53,0xc0,0x61,0xf,0xbd,0xd5,0x15,0x68,0x47,0x8e,0x0,0x7b,0x37,0xba,0xab,0xba,0x59,0xcc,0x79,0x3d,0xd7,0xaf,0x5a,0xe3,0x85,0x66,0x69,0xab,0x16,0x32,0x31,0x5b,0xd0,0xdb,0x66,0x2,0x2f,0x6f,0xe,0xb2,0x42,0xb5,0x87,0xdf,0xf0,0x59,0xae,0x6a,0x86,0xae,0x93,0x4c,0x7d,0x1b,0x9a,0x6b,0x84,0xdd,0x9a,0xd6,0xca,0xc8,0x89,0xc3,0xfb,0xd4,0x82,0xe,0x30,0xa2,0x2c,0x18,0xc2,0x98,0xb2,0x4f,0x8,0xba,0x83,0xa1,0x40,0x12,0x88,0x6f,0x43,0x38,0x82,0x1c,0x1f,0x15,0x70,0x82,0x96,0xa8,0xa5,0x3d,0xed,0x9c,0xde,0xb9,0x1,0xe9,0xb8,0x5f,0x4c,0x39,0xd2,0xa6,0xac,0xa6,0x48,0xe7,0xd0,0x95,0x53,0xb0,0xc4,0x7b,0x97,0xd4,0xcd,0x3c,0xdd,0xf0,0xd0,0x4e,0xbf,0xe6,0x65,0x24,0x5b,0x7b,0x7d,0xe,0xe5,0xd6,0xae,0xe9,0x90,0x64,0xfd,0x70,0x9e,0x21,0xb5,0x6c,0x5,0xa4,0xa2,0x87,0xe9,0xa3,0x25,0xf4,0x5,0x5c,0x39,0x61,0xa6,0x1e,0xbe,0x11,0x18,0x80,0xed,0xb,0x18,0x9b,0x70,0x70,0xec,0x5f,0x80,0x3f,0x26,0x27,0xb3,0xc9,0x33,0xc8,0x5c,0x2c,0x5a,0x59,0x1f,0xcb,0x2c,0x89,0x9d,0xae,0xf,0x7d,0xcc,0xdb,0x9c,0xdd,0xd5,0xed,0x7c,0x7f,0xbe,0xd0,0x52,0xf9,0x1f,0xff,0x76,0xfc,0x2,0x24,0x3a,0x65,0x42,0xf6,0x41,0x91,0x95,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x0,0x25,0x74,0x45,0x58,0x74,0x64,0x61,0x74,0x65,0x3a,0x63,0x72,0x65,0x61,0x74,0x65,0x0,0x32,0x30,0x31,0x36,0x2d,0x30,0x36,0x2d,0x32,0x32,0x54,0x32,0x30,0x3a,0x33,0x39,0x3a,0x32,0x36,0x2b,0x30,0x32,0x3a,0x30,0x30,0xc9,0xad,0xc8,0x52,0x0,0x0,0x0,0x25,0x74,0x45,0x58,0x74,0x64,0x61,0x74,0x65,0x3a,0x6d,0x6f,0x64,0x69,0x66,0x79,0x0,0x32,0x30,0x31,0x36,0x2d,0x30,0x36,0x2d,0x32,0x32,0x54,0x32,0x30,0x3a,0x33,0x39,0x3a,0x32,0x36,0x2b,0x30,0x32,0x3a,0x30,0x30,0xb8,0xf0,0x70,0xee,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82 }; diff --git a/scene/resources/dynamic_font.cpp b/scene/resources/dynamic_font.cpp index 679c8a000c1..83518e1f3ec 100644 --- a/scene/resources/dynamic_font.cpp +++ b/scene/resources/dynamic_font.cpp @@ -573,7 +573,7 @@ void DynamicFontAtSize::_update_char(CharType p_char) { //blit to image and texture { - Image img(tex.texture_size,tex.texture_size,0,Image::FORMAT_GRAYSCALE_ALPHA,tex.imgdata); + Image img(tex.texture_size,tex.texture_size,0,Image::FORMAT_LA8,tex.imgdata); if (tex.texture.is_null()) { tex.texture.instance(); diff --git a/scene/resources/dynamic_font_stb.cpp b/scene/resources/dynamic_font_stb.cpp index 456e6a5ee78..d1efc0f705c 100644 --- a/scene/resources/dynamic_font_stb.cpp +++ b/scene/resources/dynamic_font_stb.cpp @@ -321,7 +321,7 @@ void DynamicFontAtSize::_update_char(CharType p_char) { //blit to image and texture { - Image img(tex.texture_size,tex.texture_size,0,Image::FORMAT_GRAYSCALE_ALPHA,tex.imgdata); + Image img(tex.texture_size,tex.texture_size,0,Image::FORMAT_LA8,tex.imgdata); if (tex.texture.is_null()) { tex.texture.instance(); diff --git a/scene/resources/environment.cpp b/scene/resources/environment.cpp index 4551aff0ef5..6fecdef59d1 100644 --- a/scene/resources/environment.cpp +++ b/scene/resources/environment.cpp @@ -28,59 +28,7 @@ /*************************************************************************/ #include "environment.h" #include "texture.h" -void Environment::set_background(BG p_bg) { - ERR_FAIL_INDEX(p_bg,BG_MAX); - bg_mode=p_bg; - VS::get_singleton()->environment_set_background(environment,VS::EnvironmentBG(p_bg)); -} - -Environment::BG Environment::get_background() const{ - - return bg_mode; -} - -void Environment::set_background_param(BGParam p_param, const Variant& p_value){ - - ERR_FAIL_INDEX(p_param,BG_PARAM_MAX); - bg_param[p_param]=p_value; - VS::get_singleton()->environment_set_background_param(environment,VS::EnvironmentBGParam(p_param),p_value); - -} -Variant Environment::get_background_param(BGParam p_param) const{ - - ERR_FAIL_INDEX_V(p_param,BG_PARAM_MAX,Variant()); - return bg_param[p_param]; - -} - -void Environment::set_enable_fx(Fx p_effect,bool p_enabled){ - - ERR_FAIL_INDEX(p_effect,FX_MAX); - fx_enabled[p_effect]=p_enabled; - VS::get_singleton()->environment_set_enable_fx(environment,VS::EnvironmentFx(p_effect),p_enabled); - -} -bool Environment::is_fx_enabled(Fx p_effect) const{ - - ERR_FAIL_INDEX_V(p_effect,FX_MAX,false); - return fx_enabled[p_effect]; - -} - -void Environment::fx_set_param(FxParam p_param,const Variant& p_value){ - - ERR_FAIL_INDEX(p_param,FX_PARAM_MAX); - fx_param[p_param]=p_value; - VS::get_singleton()->environment_fx_set_param(environment,VS::EnvironmentFxParam(p_param),p_value); - -} -Variant Environment::fx_get_param(FxParam p_param) const{ - - ERR_FAIL_INDEX_V(p_param,FX_PARAM_MAX,Variant()); - return fx_param[p_param]; - -} RID Environment::get_rid() const { @@ -89,160 +37,6 @@ RID Environment::get_rid() const { void Environment::_bind_methods() { - ObjectTypeDB::bind_method(_MD("set_background","bgmode"),&Environment::set_background); - ObjectTypeDB::bind_method(_MD("get_background"),&Environment::get_background); - - ObjectTypeDB::bind_method(_MD("set_background_param","param","value"),&Environment::set_background_param); - ObjectTypeDB::bind_method(_MD("get_background_param","param"),&Environment::get_background_param); - - ObjectTypeDB::bind_method(_MD("set_enable_fx","effect","enabled"),&Environment::set_enable_fx); - ObjectTypeDB::bind_method(_MD("is_fx_enabled","effect"),&Environment::is_fx_enabled); - - ObjectTypeDB::bind_method(_MD("fx_set_param","param","value"),&Environment::fx_set_param); - ObjectTypeDB::bind_method(_MD("fx_get_param","param"),&Environment::fx_get_param); - - ADD_PROPERTYI( PropertyInfo(Variant::BOOL,"ambient_light/enabled"),_SCS("set_enable_fx"),_SCS("is_fx_enabled"), FX_AMBIENT_LIGHT); - ADD_PROPERTYI( PropertyInfo(Variant::COLOR,"ambient_light/color",PROPERTY_HINT_COLOR_NO_ALPHA),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_AMBIENT_LIGHT_COLOR); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"ambient_light/energy",PROPERTY_HINT_RANGE,"0,64,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_AMBIENT_LIGHT_ENERGY); - - - ADD_PROPERTYI( PropertyInfo(Variant::BOOL,"fxaa/enabled"),_SCS("set_enable_fx"),_SCS("is_fx_enabled"), FX_FXAA); - - ADD_PROPERTY( PropertyInfo(Variant::INT,"background/mode",PROPERTY_HINT_ENUM,"Keep,Default Color,Color,Texture,Cubemap,Canvas"),_SCS("set_background"),_SCS("get_background")); - ADD_PROPERTYI( PropertyInfo(Variant::COLOR,"background/color"),_SCS("set_background_param"),_SCS("get_background_param"), BG_PARAM_COLOR); - ADD_PROPERTYI( PropertyInfo(Variant::OBJECT,"background/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_background_param"),_SCS("get_background_param"), BG_PARAM_TEXTURE); - ADD_PROPERTYI( PropertyInfo(Variant::OBJECT,"background/cubemap",PROPERTY_HINT_RESOURCE_TYPE,"CubeMap"),_SCS("set_background_param"),_SCS("get_background_param"), BG_PARAM_CUBEMAP); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"background/energy",PROPERTY_HINT_RANGE,"0,128,0.01"),_SCS("set_background_param"),_SCS("get_background_param"), BG_PARAM_ENERGY); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"background/scale",PROPERTY_HINT_RANGE,"0.001,16,0.001"),_SCS("set_background_param"),_SCS("get_background_param"), BG_PARAM_SCALE); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"background/glow",PROPERTY_HINT_RANGE,"0.00,8,0.01"),_SCS("set_background_param"),_SCS("get_background_param"), BG_PARAM_GLOW); - ADD_PROPERTYI( PropertyInfo(Variant::INT,"background/canvas_max_layer"),_SCS("set_background_param"),_SCS("get_background_param"), BG_PARAM_CANVAS_MAX_LAYER); - - ADD_PROPERTYI( PropertyInfo(Variant::BOOL,"glow/enabled"),_SCS("set_enable_fx"),_SCS("is_fx_enabled"), FX_GLOW); - ADD_PROPERTYI( PropertyInfo(Variant::INT,"glow/blur_passes",PROPERTY_HINT_RANGE,"1,4,1"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_GLOW_BLUR_PASSES); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"glow/blur_scale",PROPERTY_HINT_RANGE,"0.01,4,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_GLOW_BLUR_SCALE); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"glow/blur_strength",PROPERTY_HINT_RANGE,"0.01,4,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_GLOW_BLUR_STRENGTH); - ADD_PROPERTYI( PropertyInfo(Variant::INT,"glow/blur_blend_mode",PROPERTY_HINT_ENUM,"Additive,Screen,SoftLight"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_GLOW_BLUR_BLEND_MODE); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"glow/bloom",PROPERTY_HINT_RANGE,"0,8,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_GLOW_BLOOM); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"glow/bloom_treshold",PROPERTY_HINT_RANGE,"0,1,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_GLOW_BLOOM_TRESHOLD); - ADD_PROPERTYI( PropertyInfo(Variant::BOOL,"dof_blur/enabled"),_SCS("set_enable_fx"),_SCS("is_fx_enabled"), FX_DOF_BLUR); - ADD_PROPERTYI( PropertyInfo(Variant::INT,"dof_blur/blur_passes",PROPERTY_HINT_RANGE,"1,4,1"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_DOF_BLUR_PASSES); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"dof_blur/begin",PROPERTY_HINT_RANGE,"0,4096,0.1"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_DOF_BLUR_BEGIN); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"dof_blur/range",PROPERTY_HINT_RANGE,"0,4096,0.1"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_DOF_BLUR_RANGE); - ADD_PROPERTYI( PropertyInfo(Variant::BOOL,"hdr/enabled"),_SCS("set_enable_fx"),_SCS("is_fx_enabled"), FX_HDR); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"hdr/tonemapper",PROPERTY_HINT_ENUM,"Linear,Log,Reinhardt,ReinhardtAutoWhite"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_HDR_TONEMAPPER); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"hdr/exposure",PROPERTY_HINT_RANGE,"0.01,16,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_HDR_EXPOSURE); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"hdr/white",PROPERTY_HINT_RANGE,"0.01,16,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_HDR_WHITE); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"hdr/glow_treshold",PROPERTY_HINT_RANGE,"0.00,8,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_HDR_GLOW_TRESHOLD); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"hdr/glow_scale",PROPERTY_HINT_RANGE,"0.00,16,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_HDR_GLOW_SCALE); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"hdr/min_luminance",PROPERTY_HINT_RANGE,"0.01,64,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_HDR_MIN_LUMINANCE); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"hdr/max_luminance",PROPERTY_HINT_RANGE,"0.01,64,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_HDR_MAX_LUMINANCE); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"hdr/exposure_adj_speed",PROPERTY_HINT_RANGE,"0.001,64,0.001"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_HDR_EXPOSURE_ADJUST_SPEED); - ADD_PROPERTYI( PropertyInfo(Variant::BOOL,"fog/enabled"),_SCS("set_enable_fx"),_SCS("is_fx_enabled"), FX_FOG); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"fog/begin",PROPERTY_HINT_RANGE,"0.01,4096,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_FOG_BEGIN); - ADD_PROPERTYI( PropertyInfo(Variant::COLOR,"fog/begin_color",PROPERTY_HINT_COLOR_NO_ALPHA),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_FOG_BEGIN_COLOR); - ADD_PROPERTYI( PropertyInfo(Variant::COLOR,"fog/end_color",PROPERTY_HINT_COLOR_NO_ALPHA),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_FOG_END_COLOR); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"fog/attenuation",PROPERTY_HINT_EXP_EASING),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_FOG_ATTENUATION); - ADD_PROPERTYI( PropertyInfo(Variant::BOOL,"fog/bg"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_FOG_BG); - ADD_PROPERTYI( PropertyInfo(Variant::BOOL,"bcs/enabled"),_SCS("set_enable_fx"),_SCS("is_fx_enabled"), FX_BCS); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"bcs/brightness",PROPERTY_HINT_RANGE,"0.01,8,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_BCS_BRIGHTNESS); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"bcs/contrast",PROPERTY_HINT_RANGE,"0.01,8,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_BCS_CONTRAST); - ADD_PROPERTYI( PropertyInfo(Variant::REAL,"bcs/saturation",PROPERTY_HINT_RANGE,"0.01,8,0.01"),_SCS("fx_set_param"),_SCS("fx_get_param"), FX_PARAM_BCS_SATURATION); - ADD_PROPERTYI( PropertyInfo(Variant::BOOL,"srgb/enabled"),_SCS("set_enable_fx"),_SCS("is_fx_enabled"), FX_SRGB); - - - - - -/* - FX_PARAM_BLOOM_GLOW_BLUR_PASSES=VS::ENV_FX_PARAM_BLOOM_GLOW_BLUR_PASSES, - FX_PARAM_BLOOM_AMOUNT=VS::ENV_FX_PARAM_BLOOM_AMOUNT, - FX_PARAM_DOF_BLUR_PASSES=VS::ENV_FX_PARAM_DOF_BLUR_PASSES, - FX_PARAM_DOF_BLUR_BEGIN=VS::ENV_FX_PARAM_DOF_BLUR_BEGIN, - FX_PARAM_DOF_BLUR_END=VS::ENV_FX_PARAM_DOF_BLUR_END, - FX_PARAM_HDR_EXPOSURE=VS::ENV_FX_PARAM_HDR_EXPOSURE, - FX_PARAM_HDR_WHITE=VS::ENV_FX_PARAM_HDR_WHITE, - FX_PARAM_HDR_GLOW_TRESHOLD=VS::ENV_FX_PARAM_HDR_GLOW_TRESHOLD, - FX_PARAM_HDR_GLOW_SCALE=VS::ENV_FX_PARAM_HDR_GLOW_SCALE, - FX_PARAM_HDR_MIN_LUMINANCE=VS::ENV_FX_PARAM_HDR_MIN_LUMINANCE, - FX_PARAM_HDR_MAX_LUMINANCE=VS::ENV_FX_PARAM_HDR_MAX_LUMINANCE, - FX_PARAM_HDR_EXPOSURE_ADJUST_SPEED=VS::ENV_FX_PARAM_HDR_EXPOSURE_ADJUST_SPEED, - FX_PARAM_FOG_BEGIN=VS::ENV_FX_PARAM_FOG_BEGIN, - FX_PARAM_FOG_ATTENUATION=VS::ENV_FX_PARAM_FOG_ATTENUATION, - FX_PARAM_FOG_BEGIN_COLOR=VS::ENV_FX_PARAM_FOG_BEGIN_COLOR, - FX_PARAM_FOG_END_COLOR=VS::ENV_FX_PARAM_FOG_END_COLOR, - FX_PARAM_FOG_BG=VS::ENV_FX_PARAM_FOG_BG, - FX_PARAM_GAMMA_VALUE=VS::ENV_FX_PARAM_GAMMA_VALUE, - FX_PARAM_BRIGHTNESS_VALUE=VS::ENV_FX_PARAM_BRIGHTNESS_VALUE, - FX_PARAM_CONTRAST_VALUE=VS::ENV_FX_PARAM_CONTRAST_VALUE, - FX_PARAM_SATURATION_VALUE=VS::ENV_FX_PARAM_SATURATION_VALUE, - FX_PARAM_MAX=VS::ENV_FX_PARAM_MAX -*/ - - BIND_CONSTANT( BG_KEEP ); - BIND_CONSTANT( BG_DEFAULT_COLOR ); - BIND_CONSTANT( BG_COLOR ); - BIND_CONSTANT( BG_TEXTURE ); - BIND_CONSTANT( BG_CUBEMAP ); - BIND_CONSTANT( BG_CANVAS ); - BIND_CONSTANT( BG_MAX ); - - BIND_CONSTANT( BG_PARAM_CANVAS_MAX_LAYER ); - BIND_CONSTANT( BG_PARAM_COLOR ); - BIND_CONSTANT( BG_PARAM_TEXTURE ); - BIND_CONSTANT( BG_PARAM_CUBEMAP ); - BIND_CONSTANT( BG_PARAM_ENERGY ); - BIND_CONSTANT( BG_PARAM_GLOW ); - BIND_CONSTANT( BG_PARAM_MAX ); - - - BIND_CONSTANT( FX_AMBIENT_LIGHT ); - BIND_CONSTANT( FX_FXAA ); - BIND_CONSTANT( FX_GLOW ); - BIND_CONSTANT( FX_DOF_BLUR ); - BIND_CONSTANT( FX_HDR ); - BIND_CONSTANT( FX_FOG ); - BIND_CONSTANT( FX_BCS); - BIND_CONSTANT( FX_SRGB ); - BIND_CONSTANT( FX_MAX ); - - - BIND_CONSTANT( FX_BLUR_BLEND_MODE_ADDITIVE ); - BIND_CONSTANT( FX_BLUR_BLEND_MODE_SCREEN ); - BIND_CONSTANT( FX_BLUR_BLEND_MODE_SOFTLIGHT ); - - BIND_CONSTANT( FX_HDR_TONE_MAPPER_LINEAR ); - BIND_CONSTANT( FX_HDR_TONE_MAPPER_LOG ); - BIND_CONSTANT( FX_HDR_TONE_MAPPER_REINHARDT ); - BIND_CONSTANT( FX_HDR_TONE_MAPPER_REINHARDT_AUTOWHITE ); - - BIND_CONSTANT( FX_PARAM_AMBIENT_LIGHT_COLOR ); - BIND_CONSTANT( FX_PARAM_AMBIENT_LIGHT_ENERGY ); - BIND_CONSTANT( FX_PARAM_GLOW_BLUR_PASSES ); - BIND_CONSTANT( FX_PARAM_GLOW_BLUR_SCALE ); - BIND_CONSTANT( FX_PARAM_GLOW_BLUR_STRENGTH ); - BIND_CONSTANT( FX_PARAM_GLOW_BLUR_BLEND_MODE ); - BIND_CONSTANT( FX_PARAM_GLOW_BLOOM); - BIND_CONSTANT( FX_PARAM_GLOW_BLOOM_TRESHOLD); - BIND_CONSTANT( FX_PARAM_DOF_BLUR_PASSES ); - BIND_CONSTANT( FX_PARAM_DOF_BLUR_BEGIN ); - BIND_CONSTANT( FX_PARAM_DOF_BLUR_RANGE ); - BIND_CONSTANT( FX_PARAM_HDR_TONEMAPPER); - BIND_CONSTANT( FX_PARAM_HDR_EXPOSURE ); - BIND_CONSTANT( FX_PARAM_HDR_WHITE ); - BIND_CONSTANT( FX_PARAM_HDR_GLOW_TRESHOLD ); - BIND_CONSTANT( FX_PARAM_HDR_GLOW_SCALE ); - BIND_CONSTANT( FX_PARAM_HDR_MIN_LUMINANCE ); - BIND_CONSTANT( FX_PARAM_HDR_MAX_LUMINANCE ); - BIND_CONSTANT( FX_PARAM_HDR_EXPOSURE_ADJUST_SPEED ); - BIND_CONSTANT( FX_PARAM_FOG_BEGIN ); - BIND_CONSTANT( FX_PARAM_FOG_ATTENUATION ); - BIND_CONSTANT( FX_PARAM_FOG_BEGIN_COLOR ); - BIND_CONSTANT( FX_PARAM_FOG_END_COLOR ); - BIND_CONSTANT( FX_PARAM_FOG_BG ); - BIND_CONSTANT( FX_PARAM_BCS_BRIGHTNESS ); - BIND_CONSTANT( FX_PARAM_BCS_CONTRAST ); - BIND_CONSTANT( FX_PARAM_BCS_SATURATION ); - BIND_CONSTANT( FX_PARAM_MAX ); } @@ -250,45 +44,8 @@ Environment::Environment() { environment = VS::get_singleton()->environment_create(); - set_background(BG_DEFAULT_COLOR); - set_background_param(BG_PARAM_COLOR,Color(0,0,0)); - set_background_param(BG_PARAM_TEXTURE,Ref()); - set_background_param(BG_PARAM_CUBEMAP,Ref()); - set_background_param(BG_PARAM_ENERGY,1.0); - set_background_param(BG_PARAM_SCALE,1.0); - set_background_param(BG_PARAM_GLOW,0.0); - - for(int i=0;ifree(environment); diff --git a/scene/resources/environment.h b/scene/resources/environment.h index a72cc6a47f0..f8ea0cb36f0 100644 --- a/scene/resources/environment.h +++ b/scene/resources/environment.h @@ -37,107 +37,16 @@ class Environment : public Resource { OBJ_TYPE(Environment,Resource); public: - enum BG { - BG_KEEP=VS::ENV_BG_KEEP, - BG_DEFAULT_COLOR=VS::ENV_BG_DEFAULT_COLOR, - BG_COLOR=VS::ENV_BG_COLOR, - BG_TEXTURE=VS::ENV_BG_TEXTURE, - BG_CUBEMAP=VS::ENV_BG_CUBEMAP, - BG_CANVAS=VS::ENV_BG_CANVAS, - BG_MAX=VS::ENV_BG_MAX - }; - - enum BGParam { - - BG_PARAM_CANVAS_MAX_LAYER=VS::ENV_BG_PARAM_CANVAS_MAX_LAYER, - BG_PARAM_COLOR=VS::ENV_BG_PARAM_COLOR, - BG_PARAM_TEXTURE=VS::ENV_BG_PARAM_TEXTURE, - BG_PARAM_CUBEMAP=VS::ENV_BG_PARAM_CUBEMAP, - BG_PARAM_ENERGY=VS::ENV_BG_PARAM_ENERGY, - BG_PARAM_SCALE=VS::ENV_BG_PARAM_SCALE, - BG_PARAM_GLOW=VS::ENV_BG_PARAM_GLOW, - BG_PARAM_MAX=VS::ENV_BG_PARAM_MAX - }; - - enum Fx { - FX_AMBIENT_LIGHT=VS::ENV_FX_AMBIENT_LIGHT, - FX_FXAA=VS::ENV_FX_FXAA, - FX_GLOW=VS::ENV_FX_GLOW, - FX_DOF_BLUR=VS::ENV_FX_DOF_BLUR, - FX_HDR=VS::ENV_FX_HDR, - FX_FOG=VS::ENV_FX_FOG, - FX_BCS=VS::ENV_FX_BCS, - FX_SRGB=VS::ENV_FX_SRGB, - FX_MAX=VS::ENV_FX_MAX, - }; - - enum FxBlurBlendMode { - FX_BLUR_BLEND_MODE_ADDITIVE, - FX_BLUR_BLEND_MODE_SCREEN, - FX_BLUR_BLEND_MODE_SOFTLIGHT, - }; - - enum FxHDRToneMapper { - FX_HDR_TONE_MAPPER_LINEAR, - FX_HDR_TONE_MAPPER_LOG, - FX_HDR_TONE_MAPPER_REINHARDT, - FX_HDR_TONE_MAPPER_REINHARDT_AUTOWHITE, - }; - - enum FxParam { - FX_PARAM_AMBIENT_LIGHT_COLOR=VS::ENV_FX_PARAM_AMBIENT_LIGHT_COLOR, - FX_PARAM_AMBIENT_LIGHT_ENERGY=VS::ENV_FX_PARAM_AMBIENT_LIGHT_ENERGY, - FX_PARAM_GLOW_BLUR_PASSES=VS::ENV_FX_PARAM_GLOW_BLUR_PASSES, - FX_PARAM_GLOW_BLUR_SCALE=VS::ENV_FX_PARAM_GLOW_BLUR_SCALE, - FX_PARAM_GLOW_BLUR_STRENGTH=VS::ENV_FX_PARAM_GLOW_BLUR_STRENGTH, - FX_PARAM_GLOW_BLUR_BLEND_MODE=VS::ENV_FX_PARAM_GLOW_BLUR_BLEND_MODE, - FX_PARAM_GLOW_BLOOM=VS::ENV_FX_PARAM_GLOW_BLOOM, - FX_PARAM_GLOW_BLOOM_TRESHOLD=VS::ENV_FX_PARAM_GLOW_BLOOM_TRESHOLD, - FX_PARAM_DOF_BLUR_PASSES=VS::ENV_FX_PARAM_DOF_BLUR_PASSES, - FX_PARAM_DOF_BLUR_BEGIN=VS::ENV_FX_PARAM_DOF_BLUR_BEGIN, - FX_PARAM_DOF_BLUR_RANGE=VS::ENV_FX_PARAM_DOF_BLUR_RANGE, - FX_PARAM_HDR_EXPOSURE=VS::ENV_FX_PARAM_HDR_EXPOSURE, - FX_PARAM_HDR_TONEMAPPER=VS::ENV_FX_PARAM_HDR_TONEMAPPER, - FX_PARAM_HDR_WHITE=VS::ENV_FX_PARAM_HDR_WHITE, - FX_PARAM_HDR_GLOW_TRESHOLD=VS::ENV_FX_PARAM_HDR_GLOW_TRESHOLD, - FX_PARAM_HDR_GLOW_SCALE=VS::ENV_FX_PARAM_HDR_GLOW_SCALE, - FX_PARAM_HDR_MIN_LUMINANCE=VS::ENV_FX_PARAM_HDR_MIN_LUMINANCE, - FX_PARAM_HDR_MAX_LUMINANCE=VS::ENV_FX_PARAM_HDR_MAX_LUMINANCE, - FX_PARAM_HDR_EXPOSURE_ADJUST_SPEED=VS::ENV_FX_PARAM_HDR_EXPOSURE_ADJUST_SPEED, - FX_PARAM_FOG_BEGIN=VS::ENV_FX_PARAM_FOG_BEGIN, - FX_PARAM_FOG_BEGIN_COLOR=VS::ENV_FX_PARAM_FOG_BEGIN_COLOR, - FX_PARAM_FOG_END_COLOR=VS::ENV_FX_PARAM_FOG_END_COLOR, - FX_PARAM_FOG_ATTENUATION=VS::ENV_FX_PARAM_FOG_ATTENUATION, - FX_PARAM_FOG_BG=VS::ENV_FX_PARAM_FOG_BG, - FX_PARAM_BCS_BRIGHTNESS=VS::ENV_FX_PARAM_BCS_BRIGHTNESS, - FX_PARAM_BCS_CONTRAST=VS::ENV_FX_PARAM_BCS_CONTRAST, - FX_PARAM_BCS_SATURATION=VS::ENV_FX_PARAM_BCS_SATURATION, - FX_PARAM_MAX=VS::ENV_FX_PARAM_MAX - }; private: - BG bg_mode; - Variant bg_param[BG_PARAM_MAX]; - bool fx_enabled[FX_MAX]; - Variant fx_param[FX_PARAM_MAX]; + RID environment; protected: static void _bind_methods(); public: - void set_background(BG p_bg); - BG get_background() const; - - void set_background_param(BGParam p_param, const Variant& p_value); - Variant get_background_param(BGParam p_param) const; - - void set_enable_fx(Fx p_effect,bool p_enabled); - bool is_fx_enabled(Fx p_effect) const; - - void fx_set_param(FxParam p_param,const Variant& p_value); - Variant fx_get_param(FxParam p_param) const; virtual RID get_rid() const; @@ -145,9 +54,5 @@ public: ~Environment(); }; -VARIANT_ENUM_CAST( Environment::BG ); -VARIANT_ENUM_CAST( Environment::BGParam ); -VARIANT_ENUM_CAST( Environment::Fx ); -VARIANT_ENUM_CAST( Environment::FxParam ); #endif // ENVIRONMENT_H diff --git a/scene/resources/material.cpp b/scene/resources/material.cpp index 9dc54ef0e4c..3ebe5b7800b 100644 --- a/scene/resources/material.cpp +++ b/scene/resources/material.cpp @@ -29,144 +29,14 @@ #include "material.h" #include "scene/scene_string_names.h" - -static const char*_flag_names[Material::FLAG_MAX]={ - "visible", - "double_sided", - "invert_faces", - "unshaded", - "on_top", - "lightmap_on_uv2", - "colarray_is_srgb" -}; - - -static const Material::Flag _flag_indices[Material::FLAG_MAX]={ - Material::FLAG_VISIBLE, - Material::FLAG_DOUBLE_SIDED, - Material::FLAG_INVERT_FACES, - Material::FLAG_UNSHADED, - Material::FLAG_ONTOP, - Material::FLAG_LIGHTMAP_ON_UV2, - Material::FLAG_COLOR_ARRAY_SRGB, -}; - - RID Material::get_rid() const { return material; } -void Material::set_flag(Flag p_flag,bool p_enabled) { - - ERR_FAIL_INDEX(p_flag,FLAG_MAX); - flags[p_flag]=p_enabled; - VisualServer::get_singleton()->material_set_flag(material,(VS::MaterialFlag)p_flag,p_enabled); - _change_notify(); -} - - -void Material::set_blend_mode(BlendMode p_blend_mode) { - - ERR_FAIL_INDEX(p_blend_mode,4); - blend_mode=p_blend_mode; - VisualServer::get_singleton()->material_set_blend_mode(material,(VS::MaterialBlendMode)p_blend_mode); - _change_notify(); -} - -Material::BlendMode Material::get_blend_mode() const { - - return blend_mode; -} - - -void Material::set_depth_draw_mode(DepthDrawMode p_depth_draw_mode) { - - depth_draw_mode=p_depth_draw_mode; - VisualServer::get_singleton()->material_set_depth_draw_mode(material,(VS::MaterialDepthDrawMode)p_depth_draw_mode); -} - -Material::DepthDrawMode Material::get_depth_draw_mode() const { - - return depth_draw_mode; -} - -bool Material::get_flag(Flag p_flag) const { - - ERR_FAIL_INDEX_V(p_flag,FLAG_MAX,false); - return flags[p_flag]; -} - -void Material::set_line_width(float p_width) { - - line_width=p_width; - VisualServer::get_singleton()->material_set_line_width(material,p_width); - _change_notify("line_width"); -} - -float Material::get_line_width() const { - - return line_width; -} - - -void Material::_bind_methods() { - - ObjectTypeDB::bind_method(_MD("set_flag","flag","enable"),&Material::set_flag); - ObjectTypeDB::bind_method(_MD("get_flag","flag"),&Material::get_flag); - ObjectTypeDB::bind_method(_MD("set_blend_mode","mode"),&Material::set_blend_mode); - ObjectTypeDB::bind_method(_MD("get_blend_mode"),&Material::get_blend_mode); - ObjectTypeDB::bind_method(_MD("set_line_width","width"),&Material::set_line_width); - ObjectTypeDB::bind_method(_MD("get_line_width"),&Material::get_line_width); - ObjectTypeDB::bind_method(_MD("set_depth_draw_mode","mode"),&Material::set_depth_draw_mode); - ObjectTypeDB::bind_method(_MD("get_depth_draw_mode"),&Material::get_depth_draw_mode); - - - for(int i=0;imaterial_create(); } Material::~Material() { @@ -174,431 +44,15 @@ Material::~Material() { VisualServer::get_singleton()->free(material); } -static const char*_param_names[FixedMaterial::PARAM_MAX]={ - "diffuse", - "detail", - "specular", - "emission", - "specular_exp", - "glow", - "normal", - "shade_param" -}; -static const char*_full_param_names[FixedMaterial::PARAM_MAX]={ - "params/diffuse", - "params/detail", - "params/specular", - "params/emission", - "params/specular_exp", - "params/glow", - "params/normal", - "params/shade_param" -}; +FixedMaterial::FixedMaterial() { -/* -static const char*_texture_param_names[FixedMaterial::PARAM_MAX]={ - "tex_diffuse", - "tex_detail", - "tex_specular", - "tex_emission", - "tex_specular_exp", - "tex_glow", - "tex_detail_mix", - "tex_normal", - "tex_shade_param" -}; -*/ -static const FixedMaterial::Parameter _param_indices[FixedMaterial::PARAM_MAX]={ - FixedMaterial::PARAM_DIFFUSE, - FixedMaterial::PARAM_DETAIL, - FixedMaterial::PARAM_SPECULAR, - FixedMaterial::PARAM_EMISSION, - FixedMaterial::PARAM_SPECULAR_EXP, - FixedMaterial::PARAM_GLOW, - FixedMaterial::PARAM_NORMAL, - FixedMaterial::PARAM_SHADE_PARAM, -}; - - - - -void FixedMaterial::set_parameter(Parameter p_parameter, const Variant& p_value) { - - ERR_FAIL_INDEX(p_parameter,PARAM_MAX); - if ((p_parameter==PARAM_DIFFUSE || p_parameter==PARAM_SPECULAR || p_parameter==PARAM_EMISSION)) { - - if (p_value.get_type()!=Variant::COLOR) { - ERR_EXPLAIN(String(_param_names[p_parameter])+" expects Color"); - ERR_FAIL(); - } - } else { - - if (!p_value.is_num()) { - ERR_EXPLAIN(String(_param_names[p_parameter])+" expects scalar"); - ERR_FAIL(); - } - } - - ERR_FAIL_COND( (p_parameter==PARAM_DIFFUSE || p_parameter==PARAM_SPECULAR || p_parameter==PARAM_EMISSION) && p_value.get_type()!=Variant::COLOR ); - ERR_FAIL_COND( p_parameter!=PARAM_SHADE_PARAM && p_parameter!=PARAM_DIFFUSE && p_parameter!=PARAM_DETAIL && p_parameter!=PARAM_SPECULAR && p_parameter!=PARAM_EMISSION && p_value.get_type()!=Variant::REAL && p_value.get_type()!=Variant::INT ); - - param[p_parameter]=p_value; - - VisualServer::get_singleton()->fixed_material_set_param(material,(VS::FixedMaterialParam)p_parameter,p_value); - - _change_notify(_full_param_names[p_parameter]); -} - -Variant FixedMaterial::get_parameter(Parameter p_parameter) const { - ERR_FAIL_INDEX_V(p_parameter,PARAM_MAX,Variant()); - return param[p_parameter]; -} - - - -void FixedMaterial::set_texture(Parameter p_parameter, Ref p_texture) { - - ERR_FAIL_INDEX(p_parameter,PARAM_MAX); - - texture_param[p_parameter]=p_texture; - VisualServer::get_singleton()->fixed_material_set_texture(material,(VS::FixedMaterialParam)p_parameter,p_texture.is_null()?RID():p_texture->get_rid()); - - _change_notify(); -} -Ref FixedMaterial::get_texture(Parameter p_parameter) const { - - ERR_FAIL_INDEX_V(p_parameter,PARAM_MAX,Ref()); - return texture_param[p_parameter]; -} - -void FixedMaterial::set_texcoord_mode(Parameter p_parameter, TexCoordMode p_mode) { - - ERR_FAIL_INDEX(p_parameter,PARAM_MAX); - ERR_FAIL_INDEX(p_mode,4); - - if (p_mode==texture_texcoord[p_parameter]) - return; - - texture_texcoord[p_parameter]=p_mode; - - VisualServer::get_singleton()->fixed_material_set_texcoord_mode(material,(VS::FixedMaterialParam)p_parameter,(VS::FixedMaterialTexCoordMode)p_mode); - - _change_notify(); -} - -FixedMaterial::TexCoordMode FixedMaterial::get_texcoord_mode(Parameter p_parameter) const { - - ERR_FAIL_INDEX_V(p_parameter,PARAM_MAX,TEXCOORD_UV); - return texture_texcoord[p_parameter]; -} - -void FixedMaterial::set_light_shader(LightShader p_shader) { - - light_shader=p_shader; - VS::get_singleton()->fixed_material_set_light_shader(material,VS::FixedMaterialLightShader(p_shader)); -} - -FixedMaterial::LightShader FixedMaterial::get_light_shader() const { - - return light_shader; -} - - -void FixedMaterial::set_uv_transform(const Transform& p_transform) { - - uv_transform=p_transform; - VisualServer::get_singleton()->fixed_material_set_uv_transform(material, p_transform ); - _change_notify(); -} - -Transform FixedMaterial::get_uv_transform() const { - - return uv_transform; -} - - - - - -void FixedMaterial::set_fixed_flag(FixedFlag p_flag, bool p_value) { - ERR_FAIL_INDEX(p_flag,5); - fixed_flags[p_flag]=p_value; - VisualServer::get_singleton()->fixed_material_set_flag(material,(VS::FixedMaterialFlags)p_flag,p_value); } -bool FixedMaterial::get_fixed_flag(FixedFlag p_flag) const { - ERR_FAIL_INDEX_V(p_flag,5,false); - return fixed_flags[p_flag]; -} - -void FixedMaterial::set_point_size(float p_size) { - - ERR_FAIL_COND(p_size<0); - point_size=p_size; - VisualServer::get_singleton()->fixed_material_set_point_size(material,p_size); -} - -float FixedMaterial::get_point_size() const{ - - - return point_size; -} - - -void FixedMaterial::_bind_methods() { - - - ObjectTypeDB::bind_method(_MD("set_parameter","param","value"),&FixedMaterial::set_parameter); - ObjectTypeDB::bind_method(_MD("get_parameter","param"),&FixedMaterial::get_parameter); - - ObjectTypeDB::bind_method(_MD("set_texture","param","texture:Texture"),&FixedMaterial::set_texture); - ObjectTypeDB::bind_method(_MD("get_texture:Texture","param"),&FixedMaterial::get_texture); - - - ObjectTypeDB::bind_method(_MD("set_texcoord_mode","param","mode"),&FixedMaterial::set_texcoord_mode); - ObjectTypeDB::bind_method(_MD("get_texcoord_mode","param"),&FixedMaterial::get_texcoord_mode); - - ObjectTypeDB::bind_method(_MD("set_fixed_flag","flag","value"),&FixedMaterial::set_fixed_flag); - ObjectTypeDB::bind_method(_MD("get_fixed_flag","flag"),&FixedMaterial::get_fixed_flag); - - ObjectTypeDB::bind_method(_MD("set_uv_transform","transform"),&FixedMaterial::set_uv_transform); - ObjectTypeDB::bind_method(_MD("get_uv_transform"),&FixedMaterial::get_uv_transform); - - ObjectTypeDB::bind_method(_MD("set_light_shader","shader"),&FixedMaterial::set_light_shader); - ObjectTypeDB::bind_method(_MD("get_light_shader"),&FixedMaterial::get_light_shader); - - ObjectTypeDB::bind_method(_MD("set_point_size","size"),&FixedMaterial::set_point_size); - ObjectTypeDB::bind_method(_MD("get_point_size"),&FixedMaterial::get_point_size); - - - ADD_PROPERTYI( PropertyInfo( Variant::BOOL, "fixed_flags/use_alpha" ), _SCS("set_fixed_flag"), _SCS("get_fixed_flag"), FLAG_USE_ALPHA); - ADD_PROPERTYI( PropertyInfo( Variant::BOOL, "fixed_flags/use_color_array" ), _SCS("set_fixed_flag"), _SCS("get_fixed_flag"), FLAG_USE_COLOR_ARRAY); - ADD_PROPERTYI( PropertyInfo( Variant::BOOL, "fixed_flags/use_point_size" ), _SCS("set_fixed_flag"), _SCS("get_fixed_flag"), FLAG_USE_POINT_SIZE); - ADD_PROPERTYI( PropertyInfo( Variant::BOOL, "fixed_flags/discard_alpha" ), _SCS("set_fixed_flag"), _SCS("get_fixed_flag"), FLAG_DISCARD_ALPHA); - ADD_PROPERTYI( PropertyInfo( Variant::BOOL, "fixed_flags/use_xy_normalmap" ), _SCS("set_fixed_flag"), _SCS("get_fixed_flag"), FLAG_USE_XY_NORMALMAP); - ADD_PROPERTYI( PropertyInfo( Variant::COLOR, "params/diffuse" ), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_DIFFUSE); - ADD_PROPERTYI( PropertyInfo( Variant::COLOR, "params/specular", PROPERTY_HINT_COLOR_NO_ALPHA ), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_SPECULAR ); - ADD_PROPERTYI( PropertyInfo( Variant::COLOR, "params/emission", PROPERTY_HINT_COLOR_NO_ALPHA ), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_EMISSION ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/specular_exp", PROPERTY_HINT_RANGE,"1,64,0.01" ), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_SPECULAR_EXP ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/detail_mix", PROPERTY_HINT_RANGE,"0,1,0.01" ), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_DETAIL ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/normal_depth", PROPERTY_HINT_RANGE,"-4,4,0.01" ), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_NORMAL ); - ADD_PROPERTY( PropertyInfo( Variant::INT, "params/shader", PROPERTY_HINT_ENUM,"Lambert,Wrap,Velvet,Toon" ), _SCS("set_light_shader"), _SCS("get_light_shader") ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/shader_param", PROPERTY_HINT_RANGE,"0,1,0.01" ), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_SHADE_PARAM ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "params/glow", PROPERTY_HINT_RANGE,"0,8,0.01" ), _SCS("set_parameter"), _SCS("get_parameter"), PARAM_GLOW ); - ADD_PROPERTY( PropertyInfo( Variant::REAL, "params/point_size", PROPERTY_HINT_RANGE,"0,1024,1" ), _SCS("set_point_size"), _SCS("get_point_size")); - ADD_PROPERTY( PropertyInfo( Variant::TRANSFORM, "uv_xform"), _SCS("set_uv_transform"), _SCS("get_uv_transform") ); - - for(int i=0;ifixed_material_create()) { - - - - param[PARAM_DIFFUSE]=Color(1,1,1); - param[PARAM_SPECULAR]=Color(0.0,0.0,0.0); - param[PARAM_EMISSION]=Color(0.0,0.0,0.0); - param[PARAM_SPECULAR_EXP]=40; - param[PARAM_GLOW]=0; - param[PARAM_NORMAL]=1; - param[PARAM_SHADE_PARAM]=0.5; - param[PARAM_DETAIL]=1.0; - - set_flag(FLAG_COLOR_ARRAY_SRGB,true); - - fixed_flags[FLAG_USE_ALPHA]=false; - fixed_flags[FLAG_USE_COLOR_ARRAY]=false; - fixed_flags[FLAG_USE_POINT_SIZE]=false; - fixed_flags[FLAG_USE_XY_NORMALMAP]=false; - fixed_flags[FLAG_DISCARD_ALPHA]=false; - - - for(int i=0;ishader_shader) { - set_shader(p_value); - return true; - } else { - - if (shader.is_valid()) { - - - StringName pr = shader->remap_param(p_name); - if (!pr) { - String n = p_name; - if (n.find("param/")==0) { //backwards compatibility - pr = n.substr(6,n.length()); - } - } - if (pr) { - VisualServer::get_singleton()->material_set_param(material,pr,p_value); - return true; - } - } - } - - return false; -} - -bool ShaderMaterial::_get(const StringName& p_name,Variant &r_ret) const { - - - if (p_name==SceneStringNames::get_singleton()->shader_shader) { - - r_ret=get_shader(); - return true; - } else { - - if (shader.is_valid()) { - - StringName pr = shader->remap_param(p_name); - if (pr) { - r_ret=VisualServer::get_singleton()->material_get_param(material,pr); - return true; - } - } - - } - - - return false; -} - - -void ShaderMaterial::_get_property_list( List *p_list) const { - - p_list->push_back( PropertyInfo( Variant::OBJECT, "shader/shader", PROPERTY_HINT_RESOURCE_TYPE,"MaterialShader,MaterialShaderGraph" ) ); - - if (!shader.is_null()) { - - shader->get_param_list(p_list); - } } - -void ShaderMaterial::_shader_changed() { - - _change_notify(); //also all may have changed then -} - -void ShaderMaterial::set_shader(const Ref& p_shader) { - - ERR_FAIL_COND(p_shader.is_valid() && p_shader->get_mode()!=Shader::MODE_MATERIAL); - - if (shader.is_valid()) - shader->disconnect(SceneStringNames::get_singleton()->changed,this,SceneStringNames::get_singleton()->_shader_changed); - shader=p_shader; - VS::get_singleton()->material_set_shader(material,shader.is_valid()?shader->get_rid():RID()); - if (shader.is_valid()) { - shader->connect(SceneStringNames::get_singleton()->changed,this,SceneStringNames::get_singleton()->_shader_changed); - } - _change_notify(); - -} - -Ref ShaderMaterial::get_shader() const { - - return shader; -} - - -void ShaderMaterial::set_shader_param(const StringName& p_param,const Variant& p_value) { - - VisualServer::get_singleton()->material_set_param(material,p_param,p_value); - -} - -Variant ShaderMaterial::get_shader_param(const StringName& p_param) const{ - - return VisualServer::get_singleton()->material_get_param(material,p_param); -} - - - -void ShaderMaterial::_bind_methods() { - - ObjectTypeDB::bind_method(_MD("set_shader","shader:Shader"), &ShaderMaterial::set_shader ); - ObjectTypeDB::bind_method(_MD("get_shader:Shader"), &ShaderMaterial::get_shader ); - - ObjectTypeDB::bind_method(_MD("set_shader_param","param","value:Variant"), &ShaderMaterial::set_shader_param); - ObjectTypeDB::bind_method(_MD("get_shader_param:Variant","param"), &ShaderMaterial::get_shader_param); - - ObjectTypeDB::bind_method(_MD("_shader_changed"), &ShaderMaterial::_shader_changed ); -} - - -void ShaderMaterial::get_argument_options(const StringName& p_function,int p_idx,List*r_options) const { - - String f = p_function.operator String(); - if ((f=="get_shader_param" || f=="set_shader_param") && p_idx==0) { - - if (shader.is_valid()) { - List pl; - shader->get_param_list(&pl); - for (List::Element *E=pl.front();E;E=E->next()) { - r_options->push_back("\""+E->get().name.replace_first("shader_param/","")+"\""); - } - } - } - Material::get_argument_options(p_function,p_idx,r_options); -} - -ShaderMaterial::ShaderMaterial() :Material(VisualServer::get_singleton()->material_create()){ - - -} - - ///////////////////////////////// diff --git a/scene/resources/material.h b/scene/resources/material.h index dbd05c466f7..87afa60ce9b 100644 --- a/scene/resources/material.h +++ b/scene/resources/material.h @@ -45,209 +45,27 @@ class Material : public Resource { RES_BASE_EXTENSION("mtl"); OBJ_SAVE_TYPE( Material ); -public: - - enum Flag { - FLAG_VISIBLE = VS::MATERIAL_FLAG_VISIBLE, - FLAG_DOUBLE_SIDED = VS::MATERIAL_FLAG_DOUBLE_SIDED, - FLAG_INVERT_FACES = VS::MATERIAL_FLAG_INVERT_FACES, - FLAG_UNSHADED = VS::MATERIAL_FLAG_UNSHADED, - FLAG_ONTOP = VS::MATERIAL_FLAG_ONTOP, - FLAG_LIGHTMAP_ON_UV2 = VS::MATERIAL_FLAG_LIGHTMAP_ON_UV2, - FLAG_COLOR_ARRAY_SRGB = VS::MATERIAL_FLAG_COLOR_ARRAY_SRGB, - FLAG_MAX = VS::MATERIAL_FLAG_MAX - }; - - enum BlendMode { - BLEND_MODE_MIX = VS::MATERIAL_BLEND_MODE_MIX, - BLEND_MODE_MUL = VS::MATERIAL_BLEND_MODE_MUL, - BLEND_MODE_ADD = VS::MATERIAL_BLEND_MODE_ADD, - BLEND_MODE_SUB = VS::MATERIAL_BLEND_MODE_SUB, - BLEND_MODE_PREMULT_ALPHA = VS::MATERIAL_BLEND_MODE_PREMULT_ALPHA, - - }; - - enum DepthDrawMode { - DEPTH_DRAW_ALWAYS = VS::MATERIAL_DEPTH_DRAW_ALWAYS, - DEPTH_DRAW_OPAQUE_ONLY = VS::MATERIAL_DEPTH_DRAW_OPAQUE_ONLY, - DEPTH_DRAW_OPAQUE_PRE_PASS_ALPHA = VS::MATERIAL_DEPTH_DRAW_OPAQUE_PRE_PASS_ALPHA, - DEPTH_DRAW_NEVER = VS::MATERIAL_DEPTH_DRAW_NEVER - }; - - - -private: - BlendMode blend_mode; - bool flags[VS::MATERIAL_FLAG_MAX]; - float line_width; - DepthDrawMode depth_draw_mode; -protected: RID material; - - static void _bind_methods(); - public: - void set_flag(Flag p_flag,bool p_enabled); - bool get_flag(Flag p_flag) const; - - void set_blend_mode(BlendMode p_blend_mode); - BlendMode get_blend_mode() const; - - void set_depth_draw_mode(DepthDrawMode p_depth_draw_mode); - DepthDrawMode get_depth_draw_mode() const; - - void set_line_width(float p_width); - float get_line_width() const; virtual RID get_rid() const; - - Material(const RID& p_rid=RID()); + Material(); virtual ~Material(); }; -VARIANT_ENUM_CAST( Material::Flag ); -VARIANT_ENUM_CAST( Material::DepthDrawMode ); - -VARIANT_ENUM_CAST( Material::BlendMode ); - class FixedMaterial : public Material { - OBJ_TYPE( FixedMaterial, Material ); - REVERSE_GET_PROPERTY_LIST -public: - - enum Parameter { - PARAM_DIFFUSE=VS::FIXED_MATERIAL_PARAM_DIFFUSE, - PARAM_DETAIL=VS::FIXED_MATERIAL_PARAM_DETAIL, - PARAM_SPECULAR=VS::FIXED_MATERIAL_PARAM_SPECULAR, - PARAM_EMISSION=VS::FIXED_MATERIAL_PARAM_EMISSION, - PARAM_SPECULAR_EXP=VS::FIXED_MATERIAL_PARAM_SPECULAR_EXP, - PARAM_GLOW=VS::FIXED_MATERIAL_PARAM_GLOW, - PARAM_NORMAL=VS::FIXED_MATERIAL_PARAM_NORMAL, - PARAM_SHADE_PARAM=VS::FIXED_MATERIAL_PARAM_SHADE_PARAM, - PARAM_MAX=VS::FIXED_MATERIAL_PARAM_MAX - }; - - - enum TexCoordMode { - - TEXCOORD_UV=VS::FIXED_MATERIAL_TEXCOORD_UV, - TEXCOORD_UV_TRANSFORM=VS::FIXED_MATERIAL_TEXCOORD_UV_TRANSFORM, - TEXCOORD_UV2=VS::FIXED_MATERIAL_TEXCOORD_UV2, - TEXCOORD_SPHERE=VS::FIXED_MATERIAL_TEXCOORD_SPHERE - }; - - enum FixedFlag { - FLAG_USE_ALPHA=VS::FIXED_MATERIAL_FLAG_USE_ALPHA, - FLAG_USE_COLOR_ARRAY=VS::FIXED_MATERIAL_FLAG_USE_COLOR_ARRAY, - FLAG_USE_POINT_SIZE=VS::FIXED_MATERIAL_FLAG_USE_POINT_SIZE, - FLAG_DISCARD_ALPHA=VS::FIXED_MATERIAL_FLAG_DISCARD_ALPHA, - FLAG_USE_XY_NORMALMAP=VS::FIXED_MATERIAL_FLAG_USE_XY_NORMALMAP, - FLAG_MAX=VS::FIXED_MATERIAL_FLAG_MAX - }; - - enum LightShader { - - LIGHT_SHADER_LAMBERT=VS::FIXED_MATERIAL_LIGHT_SHADER_LAMBERT, - LIGHT_SHADER_WRAP=VS::FIXED_MATERIAL_LIGHT_SHADER_WRAP, - LIGHT_SHADER_VELVET=VS::FIXED_MATERIAL_LIGHT_SHADER_VELVET, - LIGHT_SHADER_TOON=VS::FIXED_MATERIAL_LIGHT_SHADER_TOON - }; - -private: - - - struct Node { - - int param; - int mult; - int tex; - }; - - Variant param[PARAM_MAX]; - Ref texture_param[PARAM_MAX]; - TexCoordMode texture_texcoord[PARAM_MAX]; - LightShader light_shader; - bool fixed_flags[FLAG_MAX]; - float point_size; - - - Transform uv_transform; - -protected: - - - static void _bind_methods(); + OBJ_TYPE(FixedMaterial,Resource); public: - void set_fixed_flag(FixedFlag p_flag, bool p_value); - bool get_fixed_flag(FixedFlag p_flag) const; - - void set_parameter(Parameter p_parameter, const Variant& p_value); - Variant get_parameter(Parameter p_parameter) const; - - void set_texture(Parameter p_parameter, Ref p_texture); - Ref get_texture(Parameter p_parameter) const; - - void set_texcoord_mode(Parameter p_parameter, TexCoordMode p_mode); - TexCoordMode get_texcoord_mode(Parameter p_parameter) const; - - void set_light_shader(LightShader p_shader); - LightShader get_light_shader() const; - - void set_uv_transform(const Transform& p_transform); - Transform get_uv_transform() const; - - void set_point_size(float p_transform); - float get_point_size() const; - FixedMaterial(); - ~FixedMaterial(); - + virtual ~FixedMaterial(); }; - -VARIANT_ENUM_CAST( FixedMaterial::Parameter ); -VARIANT_ENUM_CAST( FixedMaterial::TexCoordMode ); -VARIANT_ENUM_CAST( FixedMaterial::FixedFlag ); -VARIANT_ENUM_CAST( FixedMaterial::LightShader ); - -class ShaderMaterial : public Material { - - OBJ_TYPE( ShaderMaterial, Material ); - - Ref shader; - - - - void _shader_changed(); - static void _shader_parse(void*p_self,ShaderLanguage::ProgramNode*p_node); - -protected: - - bool _set(const StringName& p_name, const Variant& p_value); - bool _get(const StringName& p_name,Variant &r_ret) const; - void _get_property_list( List *p_list) const; - - static void _bind_methods(); - -public: - - void set_shader(const Ref& p_shader); - Ref get_shader() const; - - void set_shader_param(const StringName& p_param,const Variant& p_value); - Variant get_shader_param(const StringName& p_param) const; - - void get_argument_options(const StringName& p_function,int p_idx,List*r_options) const; - - ShaderMaterial(); -}; - ////////////////////// diff --git a/scene/resources/mesh.cpp b/scene/resources/mesh.cpp index 921466585d7..132ec54e48f 100644 --- a/scene/resources/mesh.cpp +++ b/scene/resources/mesh.cpp @@ -122,22 +122,14 @@ bool Mesh::_set(const StringName& p_name, const Variant& p_value) { if (idx==surfaces.size()) { - if (what=="custom") { - add_custom_surface(p_value); - return true; - - } - //create Dictionary d=p_value; ERR_FAIL_COND_V(!d.has("primitive"),false); ERR_FAIL_COND_V(!d.has("arrays"),false); ERR_FAIL_COND_V(!d.has("morph_arrays"),false); - bool alphasort = d.has("alphasort") && bool(d["alphasort"]); - - add_surface(PrimitiveType(int(d["primitive"])),d["arrays"],d["morph_arrays"],alphasort); + add_surface(PrimitiveType(int(d["primitive"])),d["arrays"],d["morph_arrays"]); if (d.has("material")) { surface_set_material(idx,d["material"]); @@ -196,7 +188,6 @@ bool Mesh::_get(const StringName& p_name,Variant &r_ret) const { d["primitive"]=surface_get_primitive_type(idx); d["arrays"]=surface_get_arrays(idx); d["morph_arrays"]=surface_get_morph_arrays(idx); - d["alphasort"]=surface_is_alpha_sorting_enabled(idx); Ref m = surface_get_material(idx); if (m.is_valid()) d["material"]=m; @@ -243,14 +234,14 @@ void Mesh::_recompute_aabb() { } -void Mesh::add_surface(PrimitiveType p_primitive,const Array& p_arrays,const Array& p_blend_shapes,bool p_alphasort) { +void Mesh::add_surface(PrimitiveType p_primitive,const Array& p_arrays,const Array& p_blend_shapes) { ERR_FAIL_COND(p_arrays.size()!=ARRAY_MAX); Surface s; - VisualServer::get_singleton()->mesh_add_surface(mesh,(VisualServer::PrimitiveType)p_primitive, p_arrays,p_blend_shapes,p_alphasort); + VisualServer::get_singleton()->mesh_add_surface_from_arrays(mesh,(VisualServer::PrimitiveType)p_primitive, p_arrays,p_blend_shapes); surfaces.push_back(s); @@ -274,7 +265,6 @@ void Mesh::add_surface(PrimitiveType p_primitive,const Array& p_arrays,const Arr } surfaces[surfaces.size()-1].aabb=aabb; - surfaces[surfaces.size()-1].alphasort=p_alphasort; _recompute_aabb(); @@ -289,29 +279,19 @@ void Mesh::add_surface(PrimitiveType p_primitive,const Array& p_arrays,const Arr Array Mesh::surface_get_arrays(int p_surface) const { ERR_FAIL_INDEX_V(p_surface,surfaces.size(),Array()); - return VisualServer::get_singleton()->mesh_get_surface_arrays(mesh,p_surface); + //return VisualServer::get_singleton()->mesh_get_surface_arrays(mesh,p_surface); + return Array(); } Array Mesh::surface_get_morph_arrays(int p_surface) const { ERR_FAIL_INDEX_V(p_surface,surfaces.size(),Array()); - return VisualServer::get_singleton()->mesh_get_surface_morph_arrays(mesh,p_surface); + return Array(); } -void Mesh::add_custom_surface(const Variant& p_data) { - - Surface s; - s.aabb=AABB(); - VisualServer::get_singleton()->mesh_add_custom_surface(mesh,p_data); - surfaces.push_back(s); - - triangle_mesh=Ref(); - _change_notify(); -} - int Mesh::get_surface_count() const { @@ -418,11 +398,6 @@ Mesh::PrimitiveType Mesh::surface_get_primitive_type(int p_idx) const { return (PrimitiveType)VisualServer::get_singleton()->mesh_surface_get_primitive_type( mesh, p_idx ); } -bool Mesh::surface_is_alpha_sorting_enabled(int p_idx) const { - - ERR_FAIL_INDEX_V( p_idx, surfaces.size(), 0 ); - return surfaces[p_idx].alphasort; -} void Mesh::surface_set_material(int p_idx, const Ref& p_material) { @@ -968,7 +943,6 @@ void Mesh::_bind_methods() { ObjectTypeDB::bind_method(_MD("set_morph_target_mode","mode"),&Mesh::set_morph_target_mode); ObjectTypeDB::bind_method(_MD("get_morph_target_mode"),&Mesh::get_morph_target_mode); - ObjectTypeDB::bind_method(_MD("add_surface","primitive","arrays","morph_arrays","alphasort"),&Mesh::add_surface,DEFVAL(Array()),DEFVAL(false)); ObjectTypeDB::bind_method(_MD("get_surface_count"),&Mesh::get_surface_count); ObjectTypeDB::bind_method(_MD("surface_remove","surf_idx"),&Mesh::surface_remove); ObjectTypeDB::bind_method(_MD("surface_get_array_len","surf_idx"),&Mesh::surface_get_array_len); diff --git a/scene/resources/mesh.h b/scene/resources/mesh.h index dc1d97a49e2..e329c353a02 100644 --- a/scene/resources/mesh.h +++ b/scene/resources/mesh.h @@ -99,7 +99,6 @@ private: struct Surface { String name; AABB aabb; - bool alphasort; Ref material; }; Vector surfaces; @@ -123,12 +122,10 @@ protected: public: - void add_surface(PrimitiveType p_primitive,const Array& p_arrays,const Array& p_blend_shapes=Array(),bool p_alphasort=false); + void add_surface(PrimitiveType p_primitive,const Array& p_arrays,const Array& p_blend_shapes=Array()); Array surface_get_arrays(int p_surface) const; virtual Array surface_get_morph_arrays(int p_surface) const; - void add_custom_surface(const Variant& p_data); //only recognized by driver - void add_morph_target(const StringName& p_name); int get_morph_target_count() const; StringName get_morph_target_name(int p_index) const; diff --git a/scene/resources/multimesh.cpp b/scene/resources/multimesh.cpp index c5ade631248..0a005d8c549 100644 --- a/scene/resources/multimesh.cpp +++ b/scene/resources/multimesh.cpp @@ -141,7 +141,7 @@ Ref MultiMesh::get_mesh() const { void MultiMesh::set_instance_count(int p_count) { - VisualServer::get_singleton()->multimesh_set_instance_count(multimesh,p_count); + VisualServer::get_singleton()->multimesh_allocate(multimesh,p_count,VS::MultimeshTransformFormat(transform_format),VS::MultimeshColorFormat(color_format)); } int MultiMesh::get_instance_count() const { @@ -177,7 +177,7 @@ Color MultiMesh::get_instance_color(int p_instance) const { void MultiMesh::set_aabb(const AABB& p_aabb) { aabb=p_aabb; - VisualServer::get_singleton()->multimesh_set_aabb(multimesh,p_aabb); + VisualServer::get_singleton()->multimesh_set_custom_aabb(multimesh,p_aabb); } @@ -220,10 +220,36 @@ RID MultiMesh::get_rid() const { } + +void MultiMesh::set_color_format(ColorFormat p_color_format) { + + color_format=p_color_format; +} + +MultiMesh::ColorFormat MultiMesh::get_color_format() const{ + + return color_format; +} + +void MultiMesh::set_transform_format(TransformFormat p_transform_format){ + + transform_format=p_transform_format; +} +MultiMesh::TransformFormat MultiMesh::get_transform_format() const{ + + return transform_format; +} + + void MultiMesh::_bind_methods() { ObjectTypeDB::bind_method(_MD("set_mesh","mesh:Mesh"),&MultiMesh::set_mesh); ObjectTypeDB::bind_method(_MD("get_mesh:Mesh"),&MultiMesh::get_mesh); + ObjectTypeDB::bind_method(_MD("set_color_format","format"),&MultiMesh::set_color_format); + ObjectTypeDB::bind_method(_MD("get_color_format"),&MultiMesh::get_color_format); + ObjectTypeDB::bind_method(_MD("set_transform_format","format"),&MultiMesh::set_transform_format); + ObjectTypeDB::bind_method(_MD("get_transform_format"),&MultiMesh::get_transform_format); + ObjectTypeDB::bind_method(_MD("set_instance_count","count"),&MultiMesh::set_instance_count); ObjectTypeDB::bind_method(_MD("get_instance_count"),&MultiMesh::get_instance_count); ObjectTypeDB::bind_method(_MD("set_instance_transform","instance","transform"),&MultiMesh::set_instance_transform); @@ -241,17 +267,29 @@ void MultiMesh::_bind_methods() { ObjectTypeDB::bind_method(_MD("_get_color_array"),&MultiMesh::_get_color_array); + ADD_PROPERTY(PropertyInfo(Variant::INT,"color_format",PROPERTY_HINT_ENUM,"None,Byte,Float"), _SCS("set_color_format"), _SCS("get_color_format")); + ADD_PROPERTY(PropertyInfo(Variant::INT,"transform_format",PROPERTY_HINT_ENUM,"None,2D,3D"), _SCS("set_transform_format"), _SCS("get_transform_format")); ADD_PROPERTY(PropertyInfo(Variant::INT,"instance_count",PROPERTY_HINT_RANGE,"0,16384,1"), _SCS("set_instance_count"), _SCS("get_instance_count")); ADD_PROPERTY(PropertyInfo(Variant::OBJECT,"mesh",PROPERTY_HINT_RESOURCE_TYPE,"Mesh"), _SCS("set_mesh"), _SCS("get_mesh")); ADD_PROPERTY(PropertyInfo(Variant::_AABB,"aabb"), _SCS("set_aabb"), _SCS("get_aabb") ); ADD_PROPERTY(PropertyInfo(Variant::VECTOR3_ARRAY,"transform_array",PROPERTY_HINT_NONE,"",PROPERTY_USAGE_NOEDITOR), _SCS("_set_transform_array"), _SCS("_get_transform_array")); ADD_PROPERTY(PropertyInfo(Variant::COLOR_ARRAY,"color_array",PROPERTY_HINT_NONE,"",PROPERTY_USAGE_NOEDITOR), _SCS("_set_color_array"), _SCS("_get_color_array")); + + BIND_CONSTANT( TRANSFORM_NONE ); + BIND_CONSTANT( TRANSFORM_2D ); + BIND_CONSTANT( TRANSFORM_3D ); + BIND_CONSTANT( COLOR_NONE ); + BIND_CONSTANT( COLOR_8BIT ); + BIND_CONSTANT( COLOR_FLOAT ); + } MultiMesh::MultiMesh() { multimesh = VisualServer::get_singleton()->multimesh_create(); + color_format=COLOR_NONE; + transform_format=TRANSFORM_NONE; } MultiMesh::~MultiMesh() { diff --git a/scene/resources/multimesh.h b/scene/resources/multimesh.h index 0cf9e92defd..be4ee38fcdb 100644 --- a/scene/resources/multimesh.h +++ b/scene/resources/multimesh.h @@ -30,16 +30,33 @@ #define MULTIMESH_H #include "scene/resources/mesh.h" - +#include "servers/visual_server.h" class MultiMesh : public Resource { OBJ_TYPE( MultiMesh, Resource ); RES_BASE_EXTENSION("mmsh"); +public: + enum TransformFormat { + TRANSFORM_NONE = VS::MULTIMESH_TRANSFORM_NONE, + TRANSFORM_2D = VS::MULTIMESH_TRANSFORM_2D, + TRANSFORM_3D = VS::MULTIMESH_TRANSFORM_3D + }; + + enum ColorFormat { + COLOR_NONE = VS::MULTIMESH_COLOR_NONE, + COLOR_8BIT = VS::MULTIMESH_COLOR_8BIT, + COLOR_FLOAT = VS::MULTIMESH_COLOR_FLOAT, + }; +private: AABB aabb; Ref mesh; RID multimesh; + TransformFormat transform_format; + ColorFormat color_format; + + protected: static void _bind_methods(); @@ -55,6 +72,12 @@ public: void set_mesh(const Ref& p_mesh); Ref get_mesh() const; + void set_color_format(ColorFormat p_color_format); + ColorFormat get_color_format() const; + + void set_transform_format(TransformFormat p_transform_format); + TransformFormat get_transform_format() const; + void set_instance_count(int p_count); int get_instance_count() const; @@ -76,4 +99,8 @@ public: }; + +VARIANT_ENUM_CAST( MultiMesh::TransformFormat ); +VARIANT_ENUM_CAST( MultiMesh::ColorFormat); + #endif // MULTI_MESH_H diff --git a/scene/resources/room.cpp b/scene/resources/room.cpp index f28220531b6..978a15ab5e6 100644 --- a/scene/resources/room.cpp +++ b/scene/resources/room.cpp @@ -36,16 +36,6 @@ RID RoomBounds::get_rid() const { return area; } -void RoomBounds::set_bounds( const BSP_Tree& p_bounds ) { - - VisualServer::get_singleton()->room_set_bounds(area,p_bounds); - emit_signal("changed"); -} - -BSP_Tree RoomBounds::get_bounds() const { - - return VisualServer::get_singleton()->room_get_bounds(area); -} void RoomBounds::set_geometry_hint(const DVector& p_geometry_hint) { @@ -57,41 +47,15 @@ DVector RoomBounds::get_geometry_hint() const { return geometry_hint; } -void RoomBounds::_regenerate_bsp_cubic() { - if (geometry_hint.size()) { - - float err=0; - geometry_hint= Geometry::wrap_geometry( geometry_hint, &err ); ///< create a "wrap" that encloses the given geometry - - BSP_Tree new_bounds(geometry_hint,err); - set_bounds(new_bounds); - } - -} - -void RoomBounds::_regenerate_bsp() { - - if (geometry_hint.size()) { - - BSP_Tree new_bounds(geometry_hint,0); - set_bounds(new_bounds); - } -} void RoomBounds::_bind_methods() { - ObjectTypeDB::bind_method(_MD("set_bounds","bsp_tree"),&RoomBounds::set_bounds); - ObjectTypeDB::bind_method(_MD("get_bounds"),&RoomBounds::get_bounds); ObjectTypeDB::bind_method(_MD("set_geometry_hint","triangles"),&RoomBounds::set_geometry_hint); ObjectTypeDB::bind_method(_MD("get_geometry_hint"),&RoomBounds::get_geometry_hint); - ObjectTypeDB::bind_method(_MD("regenerate_bsp"),&RoomBounds::_regenerate_bsp); - ObjectTypeDB::set_method_flags(get_type_static(),_SCS("regenerate_bsp"),METHOD_FLAGS_DEFAULT|METHOD_FLAG_EDITOR); - ObjectTypeDB::bind_method(_MD("regenerate_bsp_cubic"),&RoomBounds::_regenerate_bsp_cubic); - ObjectTypeDB::set_method_flags(get_type_static(),_SCS("regenerate_bsp_cubic"),METHOD_FLAGS_DEFAULT|METHOD_FLAG_EDITOR); - ADD_PROPERTY( PropertyInfo( Variant::DICTIONARY, "bounds"), _SCS("set_bounds"),_SCS("get_bounds") ); + //ADD_PROPERTY( PropertyInfo( Variant::DICTIONARY, "bounds"), _SCS("set_bounds"),_SCS("get_bounds") ); ADD_PROPERTY( PropertyInfo( Variant::VECTOR3_ARRAY, "geometry_hint"),_SCS("set_geometry_hint"),_SCS("get_geometry_hint") ); } diff --git a/scene/resources/room.h b/scene/resources/room.h index dc5e2848383..62005210448 100644 --- a/scene/resources/room.h +++ b/scene/resources/room.h @@ -34,6 +34,7 @@ /** @author Juan Linietsky */ + class RoomBounds : public Resource { OBJ_TYPE( RoomBounds, Resource ); @@ -42,8 +43,7 @@ class RoomBounds : public Resource { RID area; DVector geometry_hint; - void _regenerate_bsp(); - void _regenerate_bsp_cubic(); + protected: static void _bind_methods(); @@ -52,8 +52,6 @@ public: virtual RID get_rid() const; - void set_bounds( const BSP_Tree& p_bounds ); - BSP_Tree get_bounds() const; void set_geometry_hint(const DVector& geometry_hint); DVector get_geometry_hint() const; @@ -63,4 +61,6 @@ public: }; + + #endif // ROOM_H diff --git a/scene/resources/shader.cpp b/scene/resources/shader.cpp index ee9f23dd2ac..b6c8fcf7a17 100644 --- a/scene/resources/shader.cpp +++ b/scene/resources/shader.cpp @@ -39,37 +39,17 @@ Shader::Mode Shader::get_mode() const { return mode; } -void Shader::set_code( const String& p_vertex, const String& p_fragment, const String& p_light,int p_fragment_ofs,int p_light_ofs) { +void Shader::set_code(const String& p_code) { - VisualServer::get_singleton()->shader_set_code(shader,p_vertex,p_fragment,p_light,0,p_fragment_ofs,p_light_ofs); + VisualServer::get_singleton()->shader_set_code(shader,p_code); params_cache_dirty=true; emit_signal(SceneStringNames::get_singleton()->changed); } -String Shader::get_vertex_code() const { +String Shader::get_code() const { - return VisualServer::get_singleton()->shader_get_vertex_code(shader); -} - -String Shader::get_fragment_code() const { - - return VisualServer::get_singleton()->shader_get_fragment_code(shader); - -} - -String Shader::get_light_code() const { - - return VisualServer::get_singleton()->shader_get_light_code(shader); - -} - -bool Shader::has_param(const StringName& p_param) const { - - if (params_cache_dirty) - get_param_list(NULL); - - return (params_cache.has(p_param)); + return VisualServer::get_singleton()->shader_get_code(shader); } @@ -101,48 +81,6 @@ RID Shader::get_rid() const { return shader; } -Dictionary Shader::_get_code() { - - String fs = VisualServer::get_singleton()->shader_get_fragment_code(shader); - String vs = VisualServer::get_singleton()->shader_get_vertex_code(shader); - String ls = VisualServer::get_singleton()->shader_get_light_code(shader); - - Dictionary c; - c["fragment"]=fs; - c["fragment_ofs"]=0; - c["vertex"]=vs; - c["vertex_ofs"]=0; - c["light"]=ls; - c["light_ofs"]=0; - Array arr; - for(const Map >::Element *E=default_textures.front();E;E=E->next()) { - arr.push_back(E->key()); - arr.push_back(E->get()); - } - if (arr.size()) - c["default_tex"]=arr; - return c; -} - -void Shader::_set_code(const Dictionary& p_string) { - - ERR_FAIL_COND(!p_string.has("fragment")); - ERR_FAIL_COND(!p_string.has("vertex")); - String light; - if (p_string.has("light")) - light=p_string["light"]; - - set_code(p_string["vertex"],p_string["fragment"],light); - if (p_string.has("default_tex")) { - Array arr=p_string["default_tex"]; - if ((arr.size()&1)==0) { - for(int i=0;i& p_texture) { @@ -171,29 +109,27 @@ void Shader::get_default_texture_param_list(List* r_textures) const{ } } +bool Shader::has_param(const StringName& p_param) const { + return params_cache.has(p_param); +} void Shader::_bind_methods() { ObjectTypeDB::bind_method(_MD("get_mode"),&Shader::get_mode); - ObjectTypeDB::bind_method(_MD("set_code","vcode","fcode","lcode","fofs","lofs"),&Shader::set_code,DEFVAL(0),DEFVAL(0)); - ObjectTypeDB::bind_method(_MD("get_vertex_code"),&Shader::get_vertex_code); - ObjectTypeDB::bind_method(_MD("get_fragment_code"),&Shader::get_fragment_code); - ObjectTypeDB::bind_method(_MD("get_light_code"),&Shader::get_light_code); + ObjectTypeDB::bind_method(_MD("set_code","code"),&Shader::set_code); + ObjectTypeDB::bind_method(_MD("get_code"),&Shader::get_code); ObjectTypeDB::bind_method(_MD("set_default_texture_param","param","texture:Texture"),&Shader::set_default_texture_param); ObjectTypeDB::bind_method(_MD("get_default_texture_param:Texture","param"),&Shader::get_default_texture_param); ObjectTypeDB::bind_method(_MD("has_param","name"),&Shader::has_param); - ObjectTypeDB::bind_method(_MD("_set_code","code"),&Shader::_set_code); - ObjectTypeDB::bind_method(_MD("_get_code"),&Shader::_get_code); - //ObjectTypeDB::bind_method(_MD("get_param_list"),&Shader::get_fragment_code); - ADD_PROPERTY( PropertyInfo(Variant::STRING, "_code",PROPERTY_HINT_NONE,"",PROPERTY_USAGE_NOEDITOR), _SCS("_set_code"), _SCS("_get_code") ); + ADD_PROPERTY( PropertyInfo(Variant::STRING, "code",PROPERTY_HINT_NONE,"",PROPERTY_USAGE_NOEDITOR), _SCS("set_code"), _SCS("get_code") ); BIND_CONSTANT( MODE_MATERIAL ); BIND_CONSTANT( MODE_CANVAS_ITEM ); @@ -214,253 +150,3 @@ Shader::~Shader() { } - -/************ Loader from text ***************/ - - - -RES ResourceFormatLoaderShader::load(const String &p_path, const String& p_original_path, Error *r_error) { - - if (r_error) - *r_error=ERR_FILE_CANT_OPEN; - - String fragment_code; - String vertex_code; - String light_code; - - int mode=-1; - - Error err; - FileAccess *f = FileAccess::open(p_path,FileAccess::READ,&err); - - - ERR_EXPLAIN("Unable to open shader file: "+p_path); - ERR_FAIL_COND_V(err,RES()); - String base_path = p_path.get_base_dir(); - - if (r_error) - *r_error=ERR_FILE_CORRUPT; - - Ref shader;//( memnew( Shader ) ); - - int line=0; - - while(!f->eof_reached()) { - - String l = f->get_line(); - line++; - - if (mode<=0) { - l = l.strip_edges(); - int comment = l.find(";"); - if (comment!=-1) - l=l.substr(0,comment); - } - - if (mode<1) - vertex_code+="\n"; - if (mode<2) - fragment_code+="\n"; - - if (mode < 1 && l=="") - continue; - - if (l.begins_with("[")) { - l=l.strip_edges(); - if (l=="[params]") { - if (mode>=0) { - memdelete(f); - ERR_EXPLAIN(p_path+":"+itos(line)+": Misplaced [params] section."); - ERR_FAIL_V(RES()); - } - mode=0; - } else if (l=="[vertex]") { - if (mode>=1) { - memdelete(f); - ERR_EXPLAIN(p_path+":"+itos(line)+": Misplaced [vertex] section."); - ERR_FAIL_V(RES()); - } - mode=1; - } else if (l=="[fragment]") { - if (mode>=2) { - memdelete(f); - ERR_EXPLAIN(p_path+":"+itos(line)+": Misplaced [fragment] section."); - ERR_FAIL_V(RES()); - } - mode=1; - } else { - memdelete(f); - ERR_EXPLAIN(p_path+":"+itos(line)+": Unknown section type: '"+l+"'."); - ERR_FAIL_V(RES()); - } - continue; - } - - if (mode==0) { - - int eqpos = l.find("="); - if (eqpos==-1) { - memdelete(f); - ERR_EXPLAIN(p_path+":"+itos(line)+": Expected '='."); - ERR_FAIL_V(RES()); - } - - - String right=l.substr(eqpos+1,l.length()).strip_edges(); - if (right=="") { - memdelete(f); - ERR_EXPLAIN(p_path+":"+itos(line)+": Expected value after '='."); - ERR_FAIL_V(RES()); - } - - Variant value; - - if (right=="true") { - value = true; - } else if (right=="false") { - value = false; - } else if (right.is_valid_float()) { - //is number - value = right.to_double(); - } else if (right.is_valid_html_color()) { - //is html color - value = Color::html(right); - } else { - //attempt to parse a constructor - int popenpos = right.find("("); - - if (popenpos==-1) { - memdelete(f); - ERR_EXPLAIN(p_path+":"+itos(line)+": Invalid constructor syntax: "+right); - ERR_FAIL_V(RES()); - } - - int pclosepos = right.find_last(")"); - - if (pclosepos==-1) { - ERR_EXPLAIN(p_path+":"+itos(line)+": Invalid constructor parameter syntax: "+right); - ERR_FAIL_V(RES()); - - } - - String type = right.substr(0,popenpos); - String param = right.substr(popenpos+1,pclosepos-popenpos-1).strip_edges(); - - - if (type=="tex") { - - if (param=="") { - - value=RID(); - } else { - - String path; - - if (param.is_abs_path()) - path=param; - else - path=base_path+"/"+param; - - Ref texture = ResourceLoader::load(path); - if (!texture.is_valid()) { - memdelete(f); - ERR_EXPLAIN(p_path+":"+itos(line)+": Couldn't find icon at path: "+path); - ERR_FAIL_V(RES()); - } - - value=texture; - } - - } else if (type=="vec3") { - - if (param=="") { - value=Vector3(); - } else { - Vector params = param.split(","); - if (params.size()!=3) { - memdelete(f); - ERR_EXPLAIN(p_path+":"+itos(line)+": Invalid param count for vec3(): '"+right+"'."); - ERR_FAIL_V(RES()); - - } - - Vector3 v; - for(int i=0;i<3;i++) - v[i]=params[i].to_double(); - value=v; - } - - - } else if (type=="xform") { - - if (param=="") { - value=Transform(); - } else { - - Vector params = param.split(","); - if (params.size()!=12) { - memdelete(f); - ERR_EXPLAIN(p_path+":"+itos(line)+": Invalid param count for xform(): '"+right+"'."); - ERR_FAIL_V(RES()); - - } - - Transform t; - for(int i=0;i<9;i++) - t.basis[i%3][i/3]=params[i].to_double(); - for(int i=0;i<3;i++) - t.origin[i]=params[i-9].to_double(); - - value=t; - } - - } else { - memdelete(f); - ERR_EXPLAIN(p_path+":"+itos(line)+": Invalid constructor type: '"+type+"'."); - ERR_FAIL_V(RES()); - - } - - } - - String left= l.substr(0,eqpos); - -// shader->set_param(left,value); - } else if (mode==1) { - - vertex_code+=l; - - } else if (mode==2) { - - fragment_code+=l; - } - } - - shader->set_code(vertex_code,fragment_code,light_code); - - f->close(); - memdelete(f); - if (r_error) - *r_error=OK; - - return shader; -} - -void ResourceFormatLoaderShader::get_recognized_extensions(List *p_extensions) const { - - ObjectTypeDB::get_extensions_for_type("Shader", p_extensions); -} - -bool ResourceFormatLoaderShader::handles_type(const String& p_type) const { - - return ObjectTypeDB::is_type(p_type, "Shader"); -} - - -String ResourceFormatLoaderShader::get_resource_type(const String &p_path) const { - - if (p_path.extension().to_lower()=="shd") - return "Shader"; - return ""; -} - diff --git a/scene/resources/shader.h b/scene/resources/shader.h index 6ee8d4e793d..988305b6540 100644 --- a/scene/resources/shader.h +++ b/scene/resources/shader.h @@ -32,6 +32,7 @@ #include "resource.h" #include "io/resource_loader.h" #include "scene/resources/texture.h" + class Shader : public Resource { OBJ_TYPE(Shader,Resource); @@ -49,9 +50,6 @@ public: private: RID shader; Mode mode; - Dictionary _get_code(); - void _set_code(const Dictionary& p_string); - // hack the name of performance // shaders keep a list of ShaderMaterial -> VisualServer name translations, to make @@ -73,10 +71,8 @@ public: //void set_mode(Mode p_mode); Mode get_mode() const; - void set_code( const String& p_vertex, const String& p_fragment, const String& p_light,int p_fragment_ofs=0,int p_light_ofs=0); - String get_vertex_code() const; - String get_fragment_code() const; - String get_light_code() const; + void set_code( const String& p_code); + String get_code() const; void get_param_list(List *p_params) const; bool has_param(const StringName& p_param) const; @@ -124,14 +120,5 @@ public: -class ResourceFormatLoaderShader : public ResourceFormatLoader { -public: - virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL); - virtual void get_recognized_extensions(List *p_extensions) const; - virtual bool handles_type(const String& p_type) const; - virtual String get_resource_type(const String &p_path) const; -}; - - #endif // SHADER_H diff --git a/scene/resources/shader_graph.cpp b/scene/resources/shader_graph.cpp index 02faa9425d8..c62479ca89a 100644 --- a/scene/resources/shader_graph.cpp +++ b/scene/resources/shader_graph.cpp @@ -29,6 +29,7 @@ #include "shader_graph.h" #include "scene/scene_string_names.h" +#if 0 Array ShaderGraph::_get_node_list(ShaderType p_type) const { List nodes; @@ -2489,7 +2490,7 @@ void ShaderGraph::_add_node_code(ShaderType p_type,Node *p_node,const Vector it = memnew( ImageTexture ); it->create_from_image(gradient,Texture::FLAG_FILTER|Texture::FLAG_MIPMAPS); @@ -2559,7 +2560,7 @@ void ShaderGraph::_add_node_code(ShaderType p_type,Node *p_node,const Vector it = memnew( ImageTexture ); it->create_from_image(gradient,Texture::FLAG_FILTER|Texture::FLAG_MIPMAPS); @@ -2660,3 +2661,5 @@ void ShaderGraph::_add_node_code(ShaderType p_type,Node *p_node,const Vectorcanvas_item_add_style_box( p_canvas_item,r,region_rect,texture->get_rid(),Vector2(margin[MARGIN_LEFT],margin[MARGIN_TOP]),Vector2(margin[MARGIN_RIGHT],margin[MARGIN_BOTTOM]),draw_center,modulate); + VisualServer::get_singleton()->canvas_item_add_nine_patch( p_canvas_item,r,region_rect,texture->get_rid(),Vector2(margin[MARGIN_LEFT],margin[MARGIN_TOP]),Vector2(margin[MARGIN_RIGHT],margin[MARGIN_BOTTOM]),VS::NINE_PATCH_STRETCH,VS::NINE_PATCH_STRETCH,draw_center,modulate); } void StyleBoxTexture::set_draw_center(bool p_draw) { @@ -423,127 +423,3 @@ StyleBoxFlat::~StyleBoxFlat() { } -//////////////// - - - -void StyleBoxImageMask::_bind_methods() { - - ObjectTypeDB::bind_method(_MD("set_image","image"),&StyleBoxImageMask::set_image); - ObjectTypeDB::bind_method(_MD("get_image"),&StyleBoxImageMask::get_image); - ObjectTypeDB::bind_method(_MD("set_expand","expand"),&StyleBoxImageMask::set_expand); - ObjectTypeDB::bind_method(_MD("get_expand"),&StyleBoxImageMask::get_expand); - ObjectTypeDB::bind_method(_MD("set_expand_margin_size","margin","size"),&StyleBoxImageMask::set_expand_margin_size); - ObjectTypeDB::bind_method(_MD("get_expand_margin_size","margin"),&StyleBoxImageMask::get_expand_margin_size); - - ADD_PROPERTY( PropertyInfo(Variant::IMAGE, "image"), _SCS("set_image"), _SCS("get_image")); - ADD_PROPERTY( PropertyInfo(Variant::BOOL, "expand"), _SCS("set_expand"), _SCS("get_expand")); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "expand_margin/left", PROPERTY_HINT_RANGE,"0,2048,1" ), _SCS("set_expand_margin_size"),_SCS("get_expand_margin_size"), MARGIN_LEFT ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "expand_margin/right", PROPERTY_HINT_RANGE,"0,2048,1" ), _SCS("set_expand_margin_size"),_SCS("get_expand_margin_size"), MARGIN_RIGHT ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "expand_margin/top", PROPERTY_HINT_RANGE,"0,2048,1" ), _SCS("set_expand_margin_size"),_SCS("get_expand_margin_size"), MARGIN_TOP ); - ADD_PROPERTYI( PropertyInfo( Variant::REAL, "expand_margin/bottom", PROPERTY_HINT_RANGE,"0,2048,1" ), _SCS("set_expand_margin_size"),_SCS("get_expand_margin_size"), MARGIN_BOTTOM ); - -} - - -bool StyleBoxImageMask::test_mask(const Point2& p_point, const Rect2& p_rect) const { - - if (image.empty()) - return false; - if (p_rect.size.x<1) - return false; - if (p_rect.size.y<1) - return false; - - Size2i imgsize(image.get_width(),image.get_height()); - if (imgsize.x<=0 || imgsize.y<=0) - return false; - - Point2i img_expand_size( imgsize.x - expand_margin[MARGIN_LEFT] - expand_margin[MARGIN_RIGHT], imgsize.y - expand_margin[MARGIN_TOP] - expand_margin[MARGIN_BOTTOM]); - Point2i rect_expand_size( p_rect.size.x - expand_margin[MARGIN_LEFT] - expand_margin[MARGIN_RIGHT], p_rect.size.y - expand_margin[MARGIN_TOP] - expand_margin[MARGIN_BOTTOM]); - if (rect_expand_size.x<1) - rect_expand_size.x=1; - if (rect_expand_size.y<1) - rect_expand_size.y=1; - - - Point2i click_pos; - - - //treat x - - if (p_point.x=p_rect.pos.x+p_rect.size.x) - click_pos.x=imgsize.x-1; - else if ((p_point.x-p_rect.pos.x) imgsize.x) - click_pos.x=imgsize.x; - - //treat y - - if (p_point.y=p_rect.pos.y+p_rect.size.y) - click_pos.y=imgsize.y-1; - else if ((p_point.y-p_rect.pos.y) imgsize.y) - click_pos.y=imgsize.y; - - return image.get_pixel(click_pos.x,click_pos.y).gray()>0.5; - -} - - -void StyleBoxImageMask::set_image(const Image& p_image) { - - image=p_image; -} -Image StyleBoxImageMask::get_image() const { - - return image; -} - - -void StyleBoxImageMask::set_expand(bool p_expand) { - - expand=p_expand; -} -bool StyleBoxImageMask::get_expand() const { - - return expand; -} -void StyleBoxImageMask::set_expand_margin_size(Margin p_expand_margin,float p_size) { - - ERR_FAIL_INDEX(p_expand_margin,4); - expand_margin[p_expand_margin]=p_size; -} - - -float StyleBoxImageMask::get_expand_margin_size(Margin p_expand_margin) const { - - ERR_FAIL_INDEX_V(p_expand_margin,4,0); - return expand_margin[p_expand_margin]; -} - -StyleBoxImageMask::StyleBoxImageMask() { - - for (int i=0;i<4;i++) { - expand_margin[i]=0; - } - expand=true; -} diff --git a/scene/resources/style_box.h b/scene/resources/style_box.h index f667318e247..6ac67148c54 100644 --- a/scene/resources/style_box.h +++ b/scene/resources/style_box.h @@ -167,35 +167,5 @@ public: }; -class StyleBoxImageMask : public StyleBox { - - OBJ_TYPE( StyleBoxImageMask, StyleBox ); - virtual float get_style_margin(Margin p_margin) const { return 0; } - - Image image; - float expand_margin[4]; - bool expand; - -protected: - - static void _bind_methods(); - -public: - - virtual void draw(RID p_canvas_item,const Rect2& p_rect) const {} - virtual bool test_mask(const Point2& p_point, const Rect2& p_rect) const; - - void set_image(const Image& p_image); - Image get_image() const; - - void set_expand(bool p_expand); - bool get_expand() const; - void set_expand_margin_size(Margin p_expand_margin,float p_size); - float get_expand_margin_size(Margin p_expand_margin) const; - - StyleBoxImageMask(); - -}; - #endif diff --git a/scene/resources/texture.cpp b/scene/resources/texture.cpp index 726b1938c46..c83c5e6b802 100644 --- a/scene/resources/texture.cpp +++ b/scene/resources/texture.cpp @@ -258,23 +258,13 @@ void ImageTexture::load(const String& p_path) { void ImageTexture::set_data(const Image& p_image) { VisualServer::get_singleton()->texture_set_data(texture,p_image); - VisualServer::get_singleton()->texture_set_reload_hook(texture,0,StringName()); //hook is erased if data is changed + _change_notify(); } void ImageTexture::_resource_path_changed() { String path=get_path(); - if (VS::get_singleton()->has_feature(VS::FEATURE_NEEDS_RELOAD_HOOK)) { - //this needs to be done much better, but probably will end up being deprecated as technology advances - if (path.is_resource_file() && ImageLoader::recognize(path.extension())) { - - //hook is set only if path is hookable - VisualServer::get_singleton()->texture_set_reload_hook(texture,get_instance_ID(),"_reload_hook"); - } else { - VisualServer::get_singleton()->texture_set_reload_hook(texture,0,StringName()); - } - } } Image ImageTexture::get_data() const { @@ -300,7 +290,7 @@ RID ImageTexture::get_rid() const { void ImageTexture::fix_alpha_edges() { - if (format==Image::FORMAT_RGBA /*&& !(flags&FLAG_CUBEMAP)*/) { + if (format==Image::FORMAT_RGBA8 /*&& !(flags&FLAG_CUBEMAP)*/) { Image img = get_data(); img.fix_alpha_edges(); @@ -310,7 +300,7 @@ void ImageTexture::fix_alpha_edges() { void ImageTexture::premultiply_alpha() { - if (format==Image::FORMAT_RGBA /*&& !(flags&FLAG_CUBEMAP)*/) { + if (format==Image::FORMAT_RGBA8 /*&& !(flags&FLAG_CUBEMAP)*/) { Image img = get_data(); img.premultiply_alpha(); @@ -337,7 +327,7 @@ void ImageTexture::shrink_x2_and_keep_size() { bool ImageTexture::has_alpha() const { - return ( format==Image::FORMAT_GRAYSCALE_ALPHA || format==Image::FORMAT_INDEXED_ALPHA || format==Image::FORMAT_RGBA ); + return ( format==Image::FORMAT_LA8 || format==Image::FORMAT_RGBA8 ); } diff --git a/scene/resources/texture.h b/scene/resources/texture.h index 05ea8339789..fc5f05479c7 100644 --- a/scene/resources/texture.h +++ b/scene/resources/texture.h @@ -54,7 +54,7 @@ public: FLAG_FILTER=VisualServer::TEXTURE_FLAG_FILTER, FLAG_ANISOTROPIC_FILTER=VisualServer::TEXTURE_FLAG_ANISOTROPIC_FILTER, FLAG_CONVERT_TO_LINEAR=VisualServer::TEXTURE_FLAG_CONVERT_TO_LINEAR, - FLAG_VIDEO_SURFACE=VisualServer::TEXTURE_FLAG_VIDEO_SURFACE, + FLAG_VIDEO_SURFACE=VisualServer::TEXTURE_FLAG_USED_FOR_STREAMING, FLAGS_DEFAULT=FLAG_MIPMAPS|FLAG_REPEAT|FLAG_FILTER, FLAG_MIRRORED_REPEAT=VisualServer::TEXTURE_FLAG_MIRRORED_REPEAT }; diff --git a/servers/audio/audio_server_sw.h b/servers/audio/audio_server_sw.h index fda952fa94c..2f56479404f 100644 --- a/servers/audio/audio_server_sw.h +++ b/servers/audio/audio_server_sw.h @@ -53,7 +53,7 @@ class AudioServerSW : public AudioServer { virtual AudioMixer *get_mixer(); virtual void audio_mixer_chunk_callback(int p_frames); - struct Voice { + struct Voice : public RID_Data { float volume; volatile bool active; @@ -67,7 +67,7 @@ class AudioServerSW : public AudioServer { mutable RID_Owner voice_owner; SelfList::List active_list; - struct Stream { + struct Stream : public RID_Data { bool active; List::Element *E; AudioStream *audio_stream; diff --git a/servers/audio/sample_manager_sw.h b/servers/audio/sample_manager_sw.h index bd7a11a3d20..6ca3a8e8fbb 100644 --- a/servers/audio/sample_manager_sw.h +++ b/servers/audio/sample_manager_sw.h @@ -74,7 +74,7 @@ public: class SampleManagerMallocSW : public SampleManagerSW { - struct Sample { + struct Sample : public RID_Data { void *data; int length; diff --git a/servers/physics/broad_phase_octree.cpp b/servers/physics/broad_phase_octree.cpp index bfe41f84239..f1c19a618f3 100644 --- a/servers/physics/broad_phase_octree.cpp +++ b/servers/physics/broad_phase_octree.cpp @@ -29,7 +29,7 @@ #include "broad_phase_octree.h" #include "collision_object_sw.h" -ID BroadPhaseOctree::create(CollisionObjectSW *p_object, int p_subindex) { +BroadPhaseSW::ID BroadPhaseOctree::create(CollisionObjectSW *p_object, int p_subindex) { ID oid = octree.create(p_object,AABB(),p_subindex,false,1<get_type(),0); return oid; diff --git a/servers/physics/constraint_sw.h b/servers/physics/constraint_sw.h index d61701ac074..5aa96bc0262 100644 --- a/servers/physics/constraint_sw.h +++ b/servers/physics/constraint_sw.h @@ -31,7 +31,7 @@ #include "body_sw.h" -class ConstraintSW { +class ConstraintSW : public RID_Data { BodySW **_body_ptr; int _body_count; diff --git a/servers/physics/shape_sw.h b/servers/physics/shape_sw.h index 39779bcda38..8901c706cc5 100644 --- a/servers/physics/shape_sw.h +++ b/servers/physics/shape_sw.h @@ -46,7 +46,7 @@ SHAPE_CUSTOM, ///< Server-Implementation based custom shape, calling shape_creat class ShapeSW; -class ShapeOwnerSW { +class ShapeOwnerSW : public RID_Data { public: virtual void _shape_changed()=0; @@ -56,7 +56,7 @@ public: }; -class ShapeSW { +class ShapeSW : public RID_Data { RID self; AABB aabb; diff --git a/servers/physics/space_sw.h b/servers/physics/space_sw.h index 3fdef7e62b3..30720b8be2e 100644 --- a/servers/physics/space_sw.h +++ b/servers/physics/space_sw.h @@ -57,7 +57,7 @@ public: -class SpaceSW { +class SpaceSW : public RID_Data { public: diff --git a/servers/physics_2d/broad_phase_2d_basic.cpp b/servers/physics_2d/broad_phase_2d_basic.cpp index 3a95bb24114..0f45735d560 100644 --- a/servers/physics_2d/broad_phase_2d_basic.cpp +++ b/servers/physics_2d/broad_phase_2d_basic.cpp @@ -28,7 +28,7 @@ /*************************************************************************/ #include "broad_phase_2d_basic.h" -ID BroadPhase2DBasic::create(CollisionObject2DSW *p_object_, int p_subindex) { +BroadPhase2DBasic::ID BroadPhase2DBasic::create(CollisionObject2DSW *p_object_, int p_subindex) { current++; diff --git a/servers/physics_2d/constraint_2d_sw.h b/servers/physics_2d/constraint_2d_sw.h index f776dbfe2c6..c2d1357f269 100644 --- a/servers/physics_2d/constraint_2d_sw.h +++ b/servers/physics_2d/constraint_2d_sw.h @@ -31,7 +31,7 @@ #include "body_2d_sw.h" -class Constraint2DSW { +class Constraint2DSW : public RID_Data { Body2DSW **_body_ptr; int _body_count; diff --git a/servers/physics_2d/shape_2d_sw.h b/servers/physics_2d/shape_2d_sw.h index b90c36bc04f..e069de883f8 100644 --- a/servers/physics_2d/shape_2d_sw.h +++ b/servers/physics_2d/shape_2d_sw.h @@ -46,7 +46,7 @@ SHAPE_CUSTOM, ///< Server-Implementation based custom shape, calling shape_creat class Shape2DSW; -class ShapeOwner2DSW { +class ShapeOwner2DSW : public RID_Data{ public: virtual void _shape_changed()=0; @@ -55,7 +55,7 @@ public: virtual ~ShapeOwner2DSW() {} }; -class Shape2DSW { +class Shape2DSW : public RID_Data { RID self; Rect2 aabb; diff --git a/servers/physics_2d/space_2d_sw.h b/servers/physics_2d/space_2d_sw.h index f58e8c3fe76..93dbc99c40f 100644 --- a/servers/physics_2d/space_2d_sw.h +++ b/servers/physics_2d/space_2d_sw.h @@ -58,7 +58,7 @@ public: -class Space2DSW { +class Space2DSW : public RID_Data { public: diff --git a/servers/spatial_sound/spatial_sound_server_sw.h b/servers/spatial_sound/spatial_sound_server_sw.h index b4295bf1459..68bcf607079 100644 --- a/servers/spatial_sound/spatial_sound_server_sw.h +++ b/servers/spatial_sound/spatial_sound_server_sw.h @@ -67,7 +67,7 @@ class SpatialSoundServerSW : public SpatialSoundServer { struct Room; - struct Space { + struct Space : public RID_Data { RID default_room; Set rooms; @@ -79,7 +79,7 @@ class SpatialSoundServerSW : public SpatialSoundServer { mutable RID_Owner space_owner; - struct Room { + struct Room : public RID_Data{ RID space; Transform transform; Transform inverse_transform; @@ -97,7 +97,7 @@ class SpatialSoundServerSW : public SpatialSoundServer { - struct Source { + struct Source : public RID_Data { struct Voice { @@ -161,7 +161,7 @@ class SpatialSoundServerSW : public SpatialSoundServer { mutable RID_Owner source_owner; - struct Listener { + struct Listener : public RID_Data { RID space; Transform transform; diff --git a/servers/spatial_sound_2d/spatial_sound_2d_server_sw.h b/servers/spatial_sound_2d/spatial_sound_2d_server_sw.h index 619b11f376c..e601de6eed1 100644 --- a/servers/spatial_sound_2d/spatial_sound_2d_server_sw.h +++ b/servers/spatial_sound_2d/spatial_sound_2d_server_sw.h @@ -66,7 +66,7 @@ class SpatialSound2DServerSW : public SpatialSound2DServer { struct Room; - struct Space { + struct Space : public RID_Data { RID default_room; Set rooms; @@ -78,7 +78,7 @@ class SpatialSound2DServerSW : public SpatialSound2DServer { mutable RID_Owner space_owner; - struct Room { + struct Room : public RID_Data { RID space; Matrix32 transform; Matrix32 inverse_transform; @@ -96,7 +96,7 @@ class SpatialSound2DServerSW : public SpatialSound2DServer { - struct Source { + struct Source : public RID_Data { struct Voice { @@ -160,7 +160,7 @@ class SpatialSound2DServerSW : public SpatialSound2DServer { mutable RID_Owner source_owner; - struct Listener { + struct Listener : public RID_Data { RID space; Matrix32 transform; diff --git a/servers/visual/particle_system_sw.cpp b/servers/visual/particle_system_sw.cpp deleted file mode 100644 index 07cc6d8a2af..00000000000 --- a/servers/visual/particle_system_sw.cpp +++ /dev/null @@ -1,412 +0,0 @@ -/*************************************************************************/ -/* particle_system_sw.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#include "particle_system_sw.h" -#include "sort.h" - - -ParticleSystemSW::ParticleSystemSW() { - - amount=8; - emitting=true; - - for (int i=0;iamount<=0) { - ERR_EXPLAIN("Invalid amount of particles: "+itos(p_system->amount)); - ERR_FAIL_COND(p_system->amount<=0); - } - if (p_system->attractor_count<0 || p_system->attractor_count>VS::MAX_PARTICLE_ATTRACTORS) { - ERR_EXPLAIN("Invalid amount of particle attractors."); - ERR_FAIL_COND(p_system->attractor_count<0 || p_system->attractor_count>VS::MAX_PARTICLE_ATTRACTORS); - } - float lifetime = p_system->particle_vars[VS::PARTICLE_LIFETIME]; - if (lifetimeamount,ParticleSystemSW::MAX_PARTICLES);; - - - int emission_point_count = p_system->emission_points.size(); - DVector::Read r; - if (emission_point_count) - r=p_system->emission_points.read(); - - if (particle_count!=particle_data.size()) { - - //clear the whole system if particle amount changed - particle_data.clear(); - particle_data.resize(p_system->amount); - particle_system_time=0; - } - - float next_time = particle_system_time+p_time; - - if (next_time > lifetime) - next_time=Math::fmod(next_time,lifetime); - - - ParticleData *pdata=&particle_data[0]; - Vector3 attractor_positions[VS::MAX_PARTICLE_ATTRACTORS]; - - for(int i=0;iattractor_count;i++) { - - attractor_positions[i]=p_transform.xform(p_system->attractors[i].pos); - } - - - for(int i=0;iamount); - - bool restart=false; - - if ( next_time < particle_system_time ) { - - if (restart_time > particle_system_time || restart_time < next_time ) - restart=true; - - } else if (restart_time > particle_system_time && restart_time < next_time ) { - restart=true; - } - - if (restart) { - - - if (p_system->emitting) { - if (emission_point_count==0) { //use AABB - if (p_system->local_coordinates) - p.pos = p_system->emission_half_extents * Vector3( _rand_from_seed(&rand_seed), _rand_from_seed(&rand_seed), _rand_from_seed(&rand_seed) ); - else - p.pos = p_transform.xform( p_system->emission_half_extents * Vector3( _rand_from_seed(&rand_seed), _rand_from_seed(&rand_seed), _rand_from_seed(&rand_seed) ) ); - } else { - //use preset positions - if (p_system->local_coordinates) - p.pos = r[_irand_from_seed(&rand_seed)%emission_point_count]; - else - p.pos = p_transform.xform( r[_irand_from_seed(&rand_seed)%emission_point_count] ); - } - - - float angle1 = _rand_from_seed(&rand_seed)*p_system->particle_vars[VS::PARTICLE_SPREAD]*Math_PI; - float angle2 = _rand_from_seed(&rand_seed)*20.0*Math_PI; // make it more random like - - Vector3 rot_xz=Vector3( Math::sin(angle1), 0.0, Math::cos(angle1) ); - Vector3 rot = Vector3( Math::cos(angle2)*rot_xz.x,Math::sin(angle2)*rot_xz.x, rot_xz.z); - - p.vel=(rot*p_system->particle_vars[VS::PARTICLE_LINEAR_VELOCITY]+rot*p_system->particle_randomness[VS::PARTICLE_LINEAR_VELOCITY]*_rand_from_seed(&rand_seed)); - if (!p_system->local_coordinates) - p.vel=p_transform.basis.xform( p.vel ); - - p.vel+=p_system->emission_base_velocity; - - p.rot=p_system->particle_vars[VS::PARTICLE_INITIAL_ANGLE]+p_system->particle_randomness[VS::PARTICLE_INITIAL_ANGLE]*_rand_from_seed(&rand_seed); - p.active=true; - for(int r=0;rgravity_normal * (p_system->particle_vars[VS::PARTICLE_GRAVITY]+(p_system->particle_randomness[VS::PARTICLE_GRAVITY]*p.random[0])); - //apply linear acceleration - force+=p.vel.normalized() * (p_system->particle_vars[VS::PARTICLE_LINEAR_ACCELERATION]+p_system->particle_randomness[VS::PARTICLE_LINEAR_ACCELERATION]*p.random[1]); - //apply radial acceleration - Vector3 org; - if (!p_system->local_coordinates) - org=p_transform.origin; - force+=(p.pos-org).normalized() * (p_system->particle_vars[VS::PARTICLE_RADIAL_ACCELERATION]+p_system->particle_randomness[VS::PARTICLE_RADIAL_ACCELERATION]*p.random[2]); - //apply tangential acceleration - force+=(p.pos-org).cross(p_system->gravity_normal).normalized() * (p_system->particle_vars[VS::PARTICLE_TANGENTIAL_ACCELERATION]+p_system->particle_randomness[VS::PARTICLE_TANGENTIAL_ACCELERATION]*p.random[3]); - //apply attractor forces - for(int a=0;aattractor_count;a++) { - - force+=(p.pos-attractor_positions[a]).normalized() * p_system->attractors[a].force; - } - - - p.vel+=force * p_time; - if (p_system->particle_vars[VS::PARTICLE_DAMPING]) { - - float v = p.vel.length(); - float damp = p_system->particle_vars[VS::PARTICLE_DAMPING] + p_system->particle_vars[VS::PARTICLE_DAMPING] * p_system->particle_randomness[VS::PARTICLE_DAMPING]; - v -= damp * p_time; - if (v<0) { - p.vel=Vector3(); - } else { - p.vel=p.vel.normalized() * v; - } - - } - p.rot+=(p_system->particle_vars[VS::PARTICLE_ANGULAR_VELOCITY]+p_system->particle_randomness[VS::PARTICLE_ANGULAR_VELOCITY]*p.random[4]) *p_time; - p.pos+=p.vel * p_time; - } - } - - particle_system_time=Math::fmod( particle_system_time+p_time, lifetime ); - - -} - -ParticleSystemProcessSW::ParticleSystemProcessSW() { - - particle_system_time=0; - rand_seed=1234567; - valid=false; -} - - -struct _ParticleSorterSW { - - - _FORCE_INLINE_ bool operator()(const ParticleSystemDrawInfoSW::ParticleDrawInfo *p_a,const ParticleSystemDrawInfoSW::ParticleDrawInfo *p_b) const { - - return p_a->d > p_b->d; // draw from further away to closest - } -}; - -void ParticleSystemDrawInfoSW::prepare(const ParticleSystemSW *p_system,const ParticleSystemProcessSW *p_process,const Transform& p_system_transform,const Transform& p_camera_transform) { - - ERR_FAIL_COND(p_process->particle_data.size() != p_system->amount); - ERR_FAIL_COND(p_system->amount<=0 || p_system->amount>=ParticleSystemSW::MAX_PARTICLES); - - const ParticleSystemProcessSW::ParticleData *pdata=&p_process->particle_data[0]; - float time_pos=p_process->particle_system_time/p_system->particle_vars[VS::PARTICLE_LIFETIME]; - - ParticleSystemSW::ColorPhase cphase[VS::MAX_PARTICLE_COLOR_PHASES]; - - float last=-1; - int col_count=0; - - for(int i=0;icolor_phase_count;i++) { - - if (p_system->color_phases[i].pos<=last) - break; - cphase[i]=p_system->color_phases[i]; - col_count++; - } - - - - - - Vector3 camera_z_axis = p_camera_transform.basis.get_axis(2); - - for(int i=0;iamount;i++) { - - ParticleDrawInfo &pdi=draw_info[i]; - pdi.data=&pdata[i]; - pdi.transform.origin=pdi.data->pos; - if (p_system->local_coordinates) - pdi.transform.origin=p_system_transform.xform(pdi.transform.origin); - - pdi.d=-camera_z_axis.dot(pdi.transform.origin); - - // adjust particle size, color and rotation - - float time = ((float)i / p_system->amount); - if (timeheight_from_velocity) { - - Vector3 veld = pdi.data->vel; - Vector3 cam_z = camera_z_axis.normalized(); - float vc = Math::abs(veld.normalized().dot(cam_z)); - - if (vc<(1.0-CMP_EPSILON)) { - up = Plane(cam_z,0).project(veld).normalized(); - float h = p_system->particle_vars[VS::PARTICLE_HEIGHT]+p_system->particle_randomness[VS::PARTICLE_HEIGHT]*pdi.data->random[7]; - float velh = veld.length(); - h+=velh*(p_system->particle_vars[VS::PARTICLE_HEIGHT_SPEED_SCALE]+p_system->particle_randomness[VS::PARTICLE_HEIGHT_SPEED_SCALE]*pdi.data->random[7]); - - - up_scale=Math::lerp(1.0,h,(1.0-vc)); - } - - } else if (pdi.data->rot) { - - up.rotate(camera_z_axis,pdi.data->rot); - } - - { - // matrix - Vector3 v_z = (p_camera_transform.origin-pdi.transform.origin).normalized(); -// Vector3 v_z = (p_camera_transform.origin-pdi.data->pos).normalized(); - Vector3 v_y = up; - Vector3 v_x = v_y.cross(v_z); - v_y = v_z.cross(v_x); - v_x.normalize(); - v_y.normalize(); - - - float initial_scale, final_scale; - initial_scale = p_system->particle_vars[VS::PARTICLE_INITIAL_SIZE]+p_system->particle_randomness[VS::PARTICLE_INITIAL_SIZE]*pdi.data->random[5]; - final_scale = p_system->particle_vars[VS::PARTICLE_FINAL_SIZE]+p_system->particle_randomness[VS::PARTICLE_FINAL_SIZE]*pdi.data->random[6]; - float scale = initial_scale + time * (final_scale - initial_scale); - - pdi.transform.basis.set_axis(0,v_x * scale); - pdi.transform.basis.set_axis(1,v_y * scale * up_scale); - pdi.transform.basis.set_axis(2,v_z * scale); - } - - - - int cpos=0; - - while(cpos time) - break; - cpos++; - } - - cpos--; - - - if (cpos==-1) - pdi.color=Color(1,1,1,1); - else { - if (cpos==col_count-1) - pdi.color=cphase[cpos].color; - else { - float diff = (cphase[cpos+1].pos-cphase[cpos].pos); - if (diff>0) - pdi.color=cphase[cpos].color.linear_interpolate(cphase[cpos+1].color, (time - cphase[cpos].pos) / diff ); - else - pdi.color=cphase[cpos+1].color; - } - } - - - draw_info_order[i]=&pdi; - - } - - - SortArray particle_sort; - particle_sort.sort(&draw_info_order[0],p_system->amount); - -} diff --git a/servers/visual/particle_system_sw.h b/servers/visual/particle_system_sw.h deleted file mode 100644 index 4edcecaaa93..00000000000 --- a/servers/visual/particle_system_sw.h +++ /dev/null @@ -1,131 +0,0 @@ -/*************************************************************************/ -/* particle_system_sw.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#ifndef PARTICLE_SYSTEM_SW_H -#define PARTICLE_SYSTEM_SW_H - -/** - @author Juan Linietsky -*/ - -#include "servers/visual_server.h" - -struct ParticleSystemSW { - enum { - - MAX_PARTICLES=1024 - }; - - float particle_vars[VS::PARTICLE_VAR_MAX]; - float particle_randomness[VS::PARTICLE_VAR_MAX]; - - Vector3 emission_half_extents; - DVector emission_points; - Vector3 gravity_normal; - Vector3 emission_base_velocity; - int amount; - bool emitting; - bool height_from_velocity; - AABB visibility_aabb; - bool sort; - bool local_coordinates; - - struct ColorPhase { - - float pos; - Color color; - ColorPhase() { pos=1.0; color=Color(0.0,0.0,1.0,1.0); } - }; - - int color_phase_count; - ColorPhase color_phases[VS::MAX_PARTICLE_COLOR_PHASES]; - - - struct Attractor { - - Vector3 pos; - float force; - }; - - int attractor_count; - Attractor attractors[VS::MAX_PARTICLE_ATTRACTORS]; - - - ParticleSystemSW(); - ~ParticleSystemSW(); - -}; - - -struct ParticleSystemProcessSW { - - enum { - PARTICLE_RANDOM_NUMBERS = 8, - }; - - struct ParticleData { - - Vector3 pos; - Vector3 vel; - float rot; - bool active; - float random[PARTICLE_RANDOM_NUMBERS]; - - ParticleData() { active=0; rot=0; } - }; - - - bool valid; - float particle_system_time; - uint32_t rand_seed; - Vector particle_data; - - void process(const ParticleSystemSW *p_system,const Transform& p_transform,float p_time); - - ParticleSystemProcessSW(); -}; - -struct ParticleSystemDrawInfoSW { - - struct ParticleDrawInfo { - - const ParticleSystemProcessSW::ParticleData *data; - float d; - Transform transform; - Color color; - - }; - - ParticleDrawInfo draw_info[ParticleSystemSW::MAX_PARTICLES]; - ParticleDrawInfo *draw_info_order[ParticleSystemSW::MAX_PARTICLES]; - - void prepare(const ParticleSystemSW *p_system,const ParticleSystemProcessSW *p_process,const Transform& p_system_transform,const Transform& p_camera_transform); - -}; - -#endif diff --git a/servers/visual/rasterizer.cpp b/servers/visual/rasterizer.cpp index a4b91e17fee..2952098a3c0 100644 --- a/servers/visual/rasterizer.cpp +++ b/servers/visual/rasterizer.cpp @@ -30,6 +30,23 @@ #include "print_string.h" #include "os/os.h" + +Rasterizer* (*Rasterizer::_create_func)()=NULL; + +Rasterizer *Rasterizer::create() { + + return _create_func(); +} + +RasterizerStorage*RasterizerStorage::base_signleton=NULL; + +RasterizerStorage::RasterizerStorage() { + + base_signleton=this; +} + +#if 0 + RID Rasterizer::create_default_material() { return material_create(); @@ -636,3 +653,5 @@ RID Rasterizer::create_overdraw_debug_material() { return mat; } + +#endif diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h index 8cc567072f4..df18dd1679e 100644 --- a/servers/visual/rasterizer.h +++ b/servers/visual/rasterizer.h @@ -29,6 +29,670 @@ #ifndef RASTERIZER_H #define RASTERIZER_H + +#include "servers/visual_server.h" +#include "camera_matrix.h" + + + +class RasterizerStorage { +public: + /* TEXTURE API */ + + virtual RID texture_create()=0; + virtual void texture_allocate(RID p_texture,int p_width, int p_height,Image::Format p_format,uint32_t p_flags=VS::TEXTURE_FLAGS_DEFAULT)=0; + virtual void texture_set_data(RID p_texture,const Image& p_image,VS::CubeMapSide p_cube_side=VS::CUBEMAP_LEFT)=0; + virtual Image texture_get_data(RID p_texture,VS::CubeMapSide p_cube_side=VS::CUBEMAP_LEFT) const=0; + virtual void texture_set_flags(RID p_texture,uint32_t p_flags)=0; + virtual uint32_t texture_get_flags(RID p_texture) const=0; + virtual Image::Format texture_get_format(RID p_texture) const=0; + virtual uint32_t texture_get_width(RID p_texture) const=0; + virtual uint32_t texture_get_height(RID p_texture) const=0; + virtual void texture_set_size_override(RID p_texture,int p_width, int p_height)=0; + + virtual void texture_set_path(RID p_texture,const String& p_path)=0; + virtual String texture_get_path(RID p_texture) const=0; + + virtual void texture_set_shrink_all_x2_on_set_data(bool p_enable)=0; + + virtual void texture_debug_usage(List *r_info)=0; + + + /* SHADER API */ + + + virtual RID shader_create(VS::ShaderMode p_mode=VS::SHADER_SPATIAL)=0; + + virtual void shader_set_mode(RID p_shader,VS::ShaderMode p_mode)=0; + virtual VS::ShaderMode shader_get_mode(RID p_shader) const=0; + + virtual void shader_set_code(RID p_shader, const String& p_code)=0; + virtual String shader_get_code(RID p_shader) const=0; + virtual void shader_get_param_list(RID p_shader, List *p_param_list) const=0; + + virtual void shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture)=0; + virtual RID shader_get_default_texture_param(RID p_shader, const StringName& p_name) const=0; + + + /* COMMON MATERIAL API */ + + virtual RID material_create()=0; + + virtual void material_set_shader(RID p_shader_material, RID p_shader)=0; + virtual RID material_get_shader(RID p_shader_material) const=0; + + virtual void material_set_param(RID p_material, const StringName& p_param, const Variant& p_value)=0; + virtual Variant material_get_param(RID p_material, const StringName& p_param) const=0; + + /* MESH API */ + + virtual RID mesh_create()=0; + + virtual void mesh_add_surface(RID p_mesh,uint32_t p_format,VS::PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes=Vector >())=0; + + virtual void mesh_set_morph_target_count(RID p_mesh,int p_amount)=0; + virtual int mesh_get_morph_target_count(RID p_mesh) const=0; + + + virtual void mesh_set_morph_target_mode(RID p_mesh,VS::MorphTargetMode p_mode)=0; + virtual VS::MorphTargetMode mesh_get_morph_target_mode(RID p_mesh) const=0; + + virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material)=0; + virtual RID mesh_surface_get_material(RID p_mesh, int p_surface) const=0; + + virtual int mesh_surface_get_array_len(RID p_mesh, int p_surface) const=0; + virtual int mesh_surface_get_array_index_len(RID p_mesh, int p_surface) const=0; + + virtual DVector mesh_surface_get_array(RID p_mesh, int p_surface) const=0; + virtual DVector mesh_surface_get_index_array(RID p_mesh, int p_surface) const=0; + + + virtual uint32_t mesh_surface_get_format(RID p_mesh, int p_surface) const=0; + virtual VS::PrimitiveType mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const=0; + + virtual void mesh_remove_surface(RID p_mesh,int p_index)=0; + virtual int mesh_get_surface_count(RID p_mesh) const=0; + + virtual void mesh_set_custom_aabb(RID p_mesh,const AABB& p_aabb)=0; + virtual AABB mesh_get_custom_aabb(RID p_mesh) const=0; + + virtual AABB mesh_get_aabb(RID p_mesh) const=0; + virtual void mesh_clear(RID p_mesh)=0; + + /* MULTIMESH API */ + + + virtual RID multimesh_create()=0; + + virtual void multimesh_allocate(RID p_multimesh,int p_instances,VS::MultimeshTransformFormat p_transform_format,VS::MultimeshColorFormat p_color_format,bool p_gen_aabb=true)=0; + virtual int multimesh_get_instance_count(RID p_multimesh) const=0; + + virtual void multimesh_set_mesh(RID p_multimesh,RID p_mesh)=0; + virtual void multimesh_set_custom_aabb(RID p_multimesh,const AABB& p_aabb)=0; + virtual void multimesh_instance_set_transform(RID p_multimesh,int p_index,const Transform& p_transform)=0; + virtual void multimesh_instance_set_transform_2d(RID p_multimesh,int p_index,const Matrix32& p_transform)=0; + virtual void multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color)=0; + + virtual RID multimesh_get_mesh(RID p_multimesh) const=0; + virtual AABB multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const=0; + + virtual Transform multimesh_instance_get_transform(RID p_multimesh,int p_index) const=0; + virtual Matrix32 multimesh_instance_get_transform_2d(RID p_multimesh,int p_index) const=0; + virtual Color multimesh_instance_get_color(RID p_multimesh,int p_index) const=0; + + virtual void multimesh_set_visible_instances(RID p_multimesh,int p_visible)=0; + virtual int multimesh_get_visible_instances(RID p_multimesh) const=0; + + virtual AABB multimesh_get_aabb(RID p_mesh) const=0; + + /* IMMEDIATE API */ + + virtual RID immediate_create()=0; + virtual void immediate_begin(RID p_immediate,VS::PrimitiveType p_rimitive,RID p_texture=RID())=0; + virtual void immediate_vertex(RID p_immediate,const Vector3& p_vertex)=0; + virtual void immediate_vertex_2d(RID p_immediate,const Vector3& p_vertex)=0; + virtual void immediate_normal(RID p_immediate,const Vector3& p_normal)=0; + virtual void immediate_tangent(RID p_immediate,const Plane& p_tangent)=0; + virtual void immediate_color(RID p_immediate,const Color& p_color)=0; + virtual void immediate_uv(RID p_immediate,const Vector2& tex_uv)=0; + virtual void immediate_uv2(RID p_immediate,const Vector2& tex_uv)=0; + virtual void immediate_end(RID p_immediate)=0; + virtual void immediate_clear(RID p_immediate)=0; + virtual void immediate_set_material(RID p_immediate,RID p_material)=0; + virtual RID immediate_get_material(RID p_immediate) const=0; + + /* SKELETON API */ + + virtual RID skeleton_create()=0; + virtual void skeleton_allocate(RID p_skeleton,int p_bones,bool p_2d_skeleton=false)=0; + virtual int skeleton_get_bone_count(RID p_skeleton) const=0; + virtual void skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform)=0; + virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone)=0; + virtual void skeleton_bone_set_transform_2d(RID p_skeleton,int p_bone, const Matrix32& p_transform)=0; + virtual Matrix32 skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone)=0; + + /* Light API */ + + virtual RID light_create(VS::LightType p_type)=0; + + virtual void light_set_color(RID p_light,const Color& p_color)=0; + virtual void light_set_param(RID p_light,VS::LightParam p_param,float p_value)=0; + virtual void light_set_shadow(RID p_light,bool p_enabled)=0; + virtual void light_set_projector(RID p_light,RID p_texture)=0; + virtual void light_set_attenuation_texure(RID p_light,RID p_texture)=0; + virtual void light_set_negative(RID p_light,bool p_enable)=0; + virtual void light_set_cull_mask(RID p_light,uint32_t p_mask)=0; + virtual void light_set_shader(RID p_light,RID p_shader)=0; + + + virtual void light_directional_set_shadow_mode(RID p_light,VS::LightDirectionalShadowMode p_mode)=0; + + /* PROBE API */ + + virtual RID reflection_probe_create()=0; + + virtual void reflection_probe_set_intensity(RID p_probe, float p_intensity)=0; + virtual void reflection_probe_set_clip(RID p_probe, float p_near, float p_far)=0; + virtual void reflection_probe_set_min_blend_distance(RID p_probe, float p_distance)=0; + virtual void reflection_probe_set_extents(RID p_probe, const Vector3& p_extents)=0; + virtual void reflection_probe_set_origin_offset(RID p_probe, const Vector3& p_offset)=0; + virtual void reflection_probe_set_enable_parallax_correction(RID p_probe, bool p_enable)=0; + virtual void reflection_probe_set_resolution(RID p_probe, int p_resolution)=0; + virtual void reflection_probe_set_hide_skybox(RID p_probe, bool p_hide)=0; + virtual void reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers)=0; + + + /* ROOM API */ + + virtual RID room_create()=0; + virtual void room_add_bounds(RID p_room, const DVector& p_convex_polygon,float p_height,const Transform& p_transform)=0; + virtual void room_clear_bounds()=0; + + /* PORTAL API */ + + // portals are only (x/y) points, forming a convex shape, which its clockwise + // order points outside. (z is 0)=0; + + virtual RID portal_create()=0; + virtual void portal_set_shape(RID p_portal, const Vector& p_shape)=0; + virtual void portal_set_enabled(RID p_portal, bool p_enabled)=0; + virtual void portal_set_disable_distance(RID p_portal, float p_distance)=0; + virtual void portal_set_disabled_color(RID p_portal, const Color& p_color)=0; + + + /* RENDER TARGET */ + + enum RenderTargetFlags { + RENDER_TARGET_VFLIP, + RENDER_TARGET_TRANSPARENT, + RENDER_TARGET_NO_3D, + RENDER_TARGET_NO_SAMPLING, + RENDER_TARGET_FLAG_MAX + }; + + virtual RID render_target_create()=0; + virtual void render_target_set_size(RID p_render_target,int p_width, int p_height)=0; + virtual RID render_target_get_texture(RID p_render_target) const=0; + virtual Image render_target_get_image(RID p_render_target) const=0; + virtual void render_target_set_flag(RID p_render_target,RenderTargetFlags p_flag,bool p_value)=0; + virtual bool render_target_renedered_in_frame(RID p_render_target)=0; + + + /* CANVAS SHADOW */ + + virtual RID canvas_light_shadow_buffer_create(int p_width)=0; + + /* LIGHT SHADOW MAPPING */ + + virtual RID canvas_light_occluder_create()=0; + virtual void canvas_light_occluder_set_polylines(RID p_occluder, const DVector& p_lines)=0; + + virtual bool free(RID p_rid)=0; + + + static RasterizerStorage*base_signleton; + RasterizerStorage(); + virtual ~RasterizerStorage() {} +}; + + + + +class RasterizerCanvas { +public: + + enum CanvasRectFlags { + + CANVAS_RECT_REGION=1, + CANVAS_RECT_TILE=2, + CANVAS_RECT_FLIP_H=4, + CANVAS_RECT_FLIP_V=8, + CANVAS_RECT_TRANSPOSE=16 + }; + + + struct Light : public RID_Data { + + + + bool enabled; + Color color; + Matrix32 xform; + float height; + float energy; + float scale; + int z_min; + int z_max; + int layer_min; + int layer_max; + int item_mask; + int item_shadow_mask; + VS::CanvasLightMode mode; + RID texture; + Vector2 texture_offset; + RID canvas; + RID shadow_buffer; + int shadow_buffer_size; + float shadow_gradient_length; + VS::CanvasLightShadowFilter shadow_filter; + Color shadow_color; + + + void *texture_cache; // implementation dependent + Rect2 rect_cache; + Matrix32 xform_cache; + float radius_cache; //used for shadow far plane + CameraMatrix shadow_matrix_cache; + + Matrix32 light_shader_xform; + Vector2 light_shader_pos; + + Light *shadows_next_ptr; + Light *filter_next_ptr; + Light *next_ptr; + Light *mask_next_ptr; + + RID light_internal; + + Light() { + enabled=true; + color=Color(1,1,1); + shadow_color=Color(0,0,0,0); + height=0; + z_min=-1024; + z_max=1024; + layer_min=0; + layer_max=0; + item_mask=1; + scale=1.0; + energy=1.0; + item_shadow_mask=-1; + mode=VS::CANVAS_LIGHT_MODE_ADD; + texture_cache=NULL; + next_ptr=NULL; + mask_next_ptr=NULL; + filter_next_ptr=NULL; + shadow_buffer_size=256; + shadow_gradient_length=0; + shadow_filter=VS::CANVAS_LIGHT_FILTER_NONE; + + } + }; + + virtual RID light_internal_create()=0; + virtual void light_internal_update(RID p_rid, Light* p_light)=0; + virtual void light_internal_free(RID p_rid)=0; + + struct Item : public RID_Data { + + struct Command { + + enum Type { + + TYPE_LINE, + TYPE_RECT, + TYPE_NINEPATCH, + TYPE_PRIMITIVE, + TYPE_POLYGON, + TYPE_MESH, + TYPE_MULTIMESH, + TYPE_CIRCLE, + TYPE_TRANSFORM, + TYPE_CLIP_IGNORE, + }; + + Type type; + virtual ~Command(){} + }; + + struct CommandLine : public Command { + + Point2 from,to; + Color color; + float width; + bool antialiased; + CommandLine() { type = TYPE_LINE; } + }; + + struct CommandRect : public Command { + + Rect2 rect; + RID texture; + Color modulate; + Rect2 source; + uint8_t flags; + + CommandRect() { flags=0; type = TYPE_RECT; } + }; + + struct CommandNinePatch : public Command { + + Rect2 rect; + Rect2 source; + RID texture; + float margin[4]; + bool draw_center; + Color color; + VS::NinePatchAxisMode axis_x; + VS::NinePatchAxisMode axis_y; + CommandNinePatch() { draw_center=true; type = TYPE_NINEPATCH; } + }; + + struct CommandPrimitive : public Command { + + Vector points; + Vector uvs; + Vector colors; + RID texture; + float width; + + CommandPrimitive() { type = TYPE_PRIMITIVE; width=1;} + }; + + struct CommandPolygon : public Command { + + Vector indices; + Vector points; + Vector uvs; + Vector colors; + RID texture; + int count; + + CommandPolygon() { type = TYPE_POLYGON; count = 0; } + }; + + + struct CommandMesh : public Command { + + RID mesh; + RID skeleton; + CommandMesh() { type = TYPE_MESH; } + }; + + struct CommandMultiMesh : public Command { + + RID multimesh; + RID skeleton; + CommandMultiMesh() { type = TYPE_MULTIMESH; } + }; + + struct CommandCircle : public Command { + + Point2 pos; + float radius; + Color color; + CommandCircle() { type = TYPE_CIRCLE; } + }; + + struct CommandTransform : public Command { + + Matrix32 xform; + CommandTransform() { type = TYPE_TRANSFORM; } + }; + + + struct CommandClipIgnore : public Command { + + bool ignore; + CommandClipIgnore() { type = TYPE_CLIP_IGNORE; ignore=false; } + }; + + + struct ViewportRender { + VisualServer*owner; + void* udata; + Rect2 rect; + }; + + Matrix32 xform; + bool clip; + bool visible; + bool behind; + //VS::MaterialBlendMode blend_mode; + int light_mask; + Vector commands; + mutable bool custom_rect; + mutable bool rect_dirty; + mutable Rect2 rect; + RID material; + Item*next; + + struct CopyBackBuffer { + Rect2 rect; + Rect2 screen_rect; + bool full; + }; + CopyBackBuffer *copy_back_buffer; + + + Color final_modulate; + Matrix32 final_transform; + Rect2 final_clip_rect; + Item* final_clip_owner; + Item* material_owner; + ViewportRender *vp_render; + bool distance_field; + bool light_masked; + + Rect2 global_rect_cache; + + const Rect2& get_rect() const { + if (custom_rect || !rect_dirty) + return rect; + + //must update rect + int s=commands.size(); + if (s==0) { + + rect=Rect2(); + rect_dirty=false; + return rect; + } + + Matrix32 xf; + bool found_xform=false; + bool first=true; + + const Item::Command * const *cmd = &commands[0]; + + + for (int i=0;itype) { + case Item::Command::TYPE_LINE: { + + const Item::CommandLine* line = static_cast< const Item::CommandLine*>(c); + r.pos=line->from; + r.expand_to(line->to); + } break; + case Item::Command::TYPE_RECT: { + + const Item::CommandRect* crect = static_cast< const Item::CommandRect*>(c); + r=crect->rect; + + } break; + case Item::Command::TYPE_NINEPATCH: { + + const Item::CommandNinePatch* style = static_cast< const Item::CommandNinePatch*>(c); + r=style->rect; + } break; + case Item::Command::TYPE_PRIMITIVE: { + + const Item::CommandPrimitive* primitive = static_cast< const Item::CommandPrimitive*>(c); + r.pos=primitive->points[0]; + for(int i=1;ipoints.size();i++) { + + r.expand_to(primitive->points[i]); + + } + } break; + case Item::Command::TYPE_POLYGON: { + + const Item::CommandPolygon* polygon = static_cast< const Item::CommandPolygon*>(c); + int l = polygon->points.size(); + const Point2*pp=&polygon->points[0]; + r.pos=pp[0]; + for(int i=1;i(c); + AABB aabb = RasterizerStorage::base_signleton->mesh_get_aabb(mesh->mesh); + + r=Rect2(aabb.pos.x,aabb.pos.y,aabb.size.x,aabb.size.y); + + } break; + case Item::Command::TYPE_MULTIMESH: { + + const Item::CommandMultiMesh* multimesh = static_cast< const Item::CommandMultiMesh*>(c); + AABB aabb = RasterizerStorage::base_signleton->multimesh_get_aabb(multimesh->multimesh); + + r=Rect2(aabb.pos.x,aabb.pos.y,aabb.size.x,aabb.size.y); + + } break; + case Item::Command::TYPE_CIRCLE: { + + const Item::CommandCircle* circle = static_cast< const Item::CommandCircle*>(c); + r.pos=Point2(-circle->radius,-circle->radius)+circle->pos; + r.size=Point2(circle->radius*2.0,circle->radius*2.0); + } break; + case Item::Command::TYPE_TRANSFORM: { + + const Item::CommandTransform* transform = static_cast(c); + xf=transform->xform; + found_xform=true; + continue; + } break; + + case Item::Command::TYPE_CLIP_IGNORE: { + + } break; + } + + if (found_xform) { + r = xf.xform(r); + found_xform=false; + } + + + if (first) { + rect=r; + first=false; + } else + rect=rect.merge(r); + } + + rect_dirty=false; + return rect; + } + + void clear() { for (int i=0;i */ @@ -41,7 +705,7 @@ class Rasterizer { protected: - typedef void (*CanvasItemDrawViewportFunc)(VisualServer*owner,void*ud,const Rect2& p_rect); + typedef void (*ItemDrawViewportFunc)(VisualServer*owner,void*ud,const Rect2& p_rect); RID create_default_material(); RID create_overdraw_debug_material(); @@ -581,7 +1245,7 @@ public: }; - struct CanvasLight { + struct Light { @@ -597,7 +1261,7 @@ public: int layer_max; int item_mask; int item_shadow_mask; - VS::CanvasLightMode mode; + VS::LightMode mode; RID texture; Vector2 texture_offset; RID canvas; @@ -616,12 +1280,12 @@ public: Matrix32 light_shader_xform; Vector2 light_shader_pos; - CanvasLight *shadows_next_ptr; - CanvasLight *filter_next_ptr; - CanvasLight *next_ptr; - CanvasLight *mask_next_ptr; + Light *shadows_next_ptr; + Light *filter_next_ptr; + Light *next_ptr; + Light *mask_next_ptr; - CanvasLight() { + Light() { enabled=true; color=Color(1,1,1); shadow_color=Color(0,0,0,0); @@ -645,20 +1309,20 @@ public: } }; - struct CanvasItem; + struct Item; - struct CanvasItemMaterial { + struct ItemMaterial { RID shader; Map shader_param; uint32_t shader_version; - Set owners; - VS::CanvasItemShadingMode shading_mode; + Set owners; + VS::ItemShadingMode shading_mode; - CanvasItemMaterial() {shading_mode=VS::CANVAS_ITEM_SHADING_NORMAL; shader_version=0; } + ItemMaterial() {shading_mode=VS::CANVAS_ITEM_SHADING_NORMAL; shader_version=0; } }; - struct CanvasItem { + struct Item { struct Command { @@ -788,8 +1452,8 @@ public: mutable bool custom_rect; mutable bool rect_dirty; mutable Rect2 rect; - CanvasItem*next; - CanvasItemMaterial* material; + Item*next; + ItemMaterial* material; struct CopyBackBuffer { Rect2 rect; Rect2 screen_rect; @@ -801,8 +1465,8 @@ public: float final_opacity; Matrix32 final_transform; Rect2 final_clip_rect; - CanvasItem* final_clip_owner; - CanvasItem* material_owner; + Item* final_clip_owner; + Item* material_owner; ViewportRender *vp_render; bool distance_field; bool light_masked; @@ -826,35 +1490,35 @@ public: bool found_xform=false; bool first=true; - const CanvasItem::Command * const *cmd = &commands[0]; + const Item::Command * const *cmd = &commands[0]; for (int i=0;itype) { - case CanvasItem::Command::TYPE_LINE: { + case Item::Command::TYPE_LINE: { - const CanvasItem::CommandLine* line = static_cast< const CanvasItem::CommandLine*>(c); + const Item::CommandLine* line = static_cast< const Item::CommandLine*>(c); r.pos=line->from; r.expand_to(line->to); } break; - case CanvasItem::Command::TYPE_RECT: { + case Item::Command::TYPE_RECT: { - const CanvasItem::CommandRect* crect = static_cast< const CanvasItem::CommandRect*>(c); + const Item::CommandRect* crect = static_cast< const Item::CommandRect*>(c); r=crect->rect; } break; - case CanvasItem::Command::TYPE_STYLE: { + case Item::Command::TYPE_STYLE: { - const CanvasItem::CommandStyle* style = static_cast< const CanvasItem::CommandStyle*>(c); + const Item::CommandStyle* style = static_cast< const Item::CommandStyle*>(c); r=style->rect; } break; - case CanvasItem::Command::TYPE_PRIMITIVE: { + case Item::Command::TYPE_PRIMITIVE: { - const CanvasItem::CommandPrimitive* primitive = static_cast< const CanvasItem::CommandPrimitive*>(c); + const Item::CommandPrimitive* primitive = static_cast< const Item::CommandPrimitive*>(c); r.pos=primitive->points[0]; for(int i=1;ipoints.size();i++) { @@ -862,9 +1526,9 @@ public: } } break; - case CanvasItem::Command::TYPE_POLYGON: { + case Item::Command::TYPE_POLYGON: { - const CanvasItem::CommandPolygon* polygon = static_cast< const CanvasItem::CommandPolygon*>(c); + const Item::CommandPolygon* polygon = static_cast< const Item::CommandPolygon*>(c); int l = polygon->points.size(); const Point2*pp=&polygon->points[0]; r.pos=pp[0]; @@ -875,9 +1539,9 @@ public: } } break; - case CanvasItem::Command::TYPE_POLYGON_PTR: { + case Item::Command::TYPE_POLYGON_PTR: { - const CanvasItem::CommandPolygonPtr* polygon = static_cast< const CanvasItem::CommandPolygonPtr*>(c); + const Item::CommandPolygonPtr* polygon = static_cast< const Item::CommandPolygonPtr*>(c); int l = polygon->count; if (polygon->indices != NULL) { @@ -894,23 +1558,23 @@ public: } } } break; - case CanvasItem::Command::TYPE_CIRCLE: { + case Item::Command::TYPE_CIRCLE: { - const CanvasItem::CommandCircle* circle = static_cast< const CanvasItem::CommandCircle*>(c); + const Item::CommandCircle* circle = static_cast< const Item::CommandCircle*>(c); r.pos=Point2(-circle->radius,-circle->radius)+circle->pos; r.size=Point2(circle->radius*2.0,circle->radius*2.0); } break; - case CanvasItem::Command::TYPE_TRANSFORM: { + case Item::Command::TYPE_TRANSFORM: { - const CanvasItem::CommandTransform* transform = static_cast(c); + const Item::CommandTransform* transform = static_cast(c); xf=transform->xform; found_xform=true; continue; } break; - case CanvasItem::Command::TYPE_BLEND_MODE: { + case Item::Command::TYPE_BLEND_MODE: { } break; - case CanvasItem::Command::TYPE_CLIP_IGNORE: { + case Item::Command::TYPE_CLIP_IGNORE: { } break; } @@ -933,12 +1597,12 @@ public: } void clear() { for (int i=0;iwidth=p_width; - texture->height=p_height; - texture->format=p_format; - texture->flags=p_flags; -} - -void RasterizerDummy::texture_set_data(RID p_texture,const Image& p_image,VS::CubeMapSide p_cube_side) { - - Texture * texture = texture_owner.get(p_texture); - - ERR_FAIL_COND(!texture); - ERR_FAIL_COND(texture->format != p_image.get_format() ); - - texture->image[p_cube_side]=p_image; - -} - -Image RasterizerDummy::texture_get_data(RID p_texture,VS::CubeMapSide p_cube_side) const { - - Texture * texture = texture_owner.get(p_texture); - - ERR_FAIL_COND_V(!texture,Image()); - - return texture->image[p_cube_side]; -} - -void RasterizerDummy::texture_set_flags(RID p_texture,uint32_t p_flags) { - - Texture *texture = texture_owner.get( p_texture ); - ERR_FAIL_COND(!texture); - uint32_t cube = texture->flags & VS::TEXTURE_FLAG_CUBEMAP; - texture->flags=p_flags|cube; // can't remove a cube from being a cube - -} -uint32_t RasterizerDummy::texture_get_flags(RID p_texture) const { - - Texture * texture = texture_owner.get(p_texture); - - ERR_FAIL_COND_V(!texture,0); - - return texture->flags; - -} -Image::Format RasterizerDummy::texture_get_format(RID p_texture) const { - - Texture * texture = texture_owner.get(p_texture); - - ERR_FAIL_COND_V(!texture,Image::FORMAT_GRAYSCALE); - - return texture->format; -} -uint32_t RasterizerDummy::texture_get_width(RID p_texture) const { - - Texture * texture = texture_owner.get(p_texture); - - ERR_FAIL_COND_V(!texture,0); - - return texture->width; -} -uint32_t RasterizerDummy::texture_get_height(RID p_texture) const { - - Texture * texture = texture_owner.get(p_texture); - - ERR_FAIL_COND_V(!texture,0); - - return texture->height; -} - -bool RasterizerDummy::texture_has_alpha(RID p_texture) const { - - Texture * texture = texture_owner.get(p_texture); - - ERR_FAIL_COND_V(!texture,0); - - return false; - -} - -void RasterizerDummy::texture_set_size_override(RID p_texture,int p_width, int p_height) { - - Texture * texture = texture_owner.get(p_texture); - - ERR_FAIL_COND(!texture); - - ERR_FAIL_COND(p_width<=0 || p_width>4096); - ERR_FAIL_COND(p_height<=0 || p_height>4096); - //real texture size is in alloc width and height -// texture->width=p_width; -// texture->height=p_height; - -} - -void RasterizerDummy::texture_set_reload_hook(RID p_texture,ObjectID p_owner,const StringName& p_function) const { - - -} - -/* SHADER API */ - -/* SHADER API */ - -RID RasterizerDummy::shader_create(VS::ShaderMode p_mode) { - - Shader *shader = memnew( Shader ); - shader->mode=p_mode; - shader->fragment_line=0; - shader->vertex_line=0; - shader->light_line=0; - RID rid = shader_owner.make_rid(shader); - - return rid; - -} - - - -void RasterizerDummy::shader_set_mode(RID p_shader,VS::ShaderMode p_mode) { - - ERR_FAIL_INDEX(p_mode,3); - Shader *shader=shader_owner.get(p_shader); - ERR_FAIL_COND(!shader); - shader->mode=p_mode; - -} -VS::ShaderMode RasterizerDummy::shader_get_mode(RID p_shader) const { - - Shader *shader=shader_owner.get(p_shader); - ERR_FAIL_COND_V(!shader,VS::SHADER_MATERIAL); - return shader->mode; -} - -void RasterizerDummy::shader_set_code(RID p_shader, const String& p_vertex, const String& p_fragment,const String& p_light,int p_vertex_ofs,int p_fragment_ofs,int p_light_ofs) { - - - Shader *shader=shader_owner.get(p_shader); - ERR_FAIL_COND(!shader); - shader->fragment_code=p_fragment; - shader->vertex_code=p_vertex; - shader->light_code=p_light; - shader->fragment_line=p_fragment_ofs; - shader->vertex_line=p_vertex_ofs; - shader->light_line=p_vertex_ofs; - -} - - -String RasterizerDummy::shader_get_vertex_code(RID p_shader) const { - - Shader *shader=shader_owner.get(p_shader); - ERR_FAIL_COND_V(!shader,String()); - return shader->vertex_code; - -} - -String RasterizerDummy::shader_get_fragment_code(RID p_shader) const { - - Shader *shader=shader_owner.get(p_shader); - ERR_FAIL_COND_V(!shader,String()); - return shader->fragment_code; - -} - -String RasterizerDummy::shader_get_light_code(RID p_shader) const { - - Shader *shader=shader_owner.get(p_shader); - ERR_FAIL_COND_V(!shader,String()); - return shader->light_code; - -} - -void RasterizerDummy::shader_get_param_list(RID p_shader, List *p_param_list) const { - - Shader *shader=shader_owner.get(p_shader); - ERR_FAIL_COND(!shader); - -} - - -void RasterizerDummy::shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture) { - -} - -RID RasterizerDummy::shader_get_default_texture_param(RID p_shader, const StringName& p_name) const { - - return RID(); -} - -Variant RasterizerDummy::shader_get_default_param(RID p_shader, const StringName& p_name) { - - return Variant(); -} - -/* COMMON MATERIAL API */ - - -RID RasterizerDummy::material_create() { - - return material_owner.make_rid( memnew( Material ) ); -} - -void RasterizerDummy::material_set_shader(RID p_material, RID p_shader) { - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND(!material); - material->shader=p_shader; - -} - -RID RasterizerDummy::material_get_shader(RID p_material) const { - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND_V(!material,RID()); - return material->shader; -} - -void RasterizerDummy::material_set_param(RID p_material, const StringName& p_param, const Variant& p_value) { - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND(!material); - - if (p_value.get_type()==Variant::NIL) - material->shader_params.erase(p_param); - else - material->shader_params[p_param]=p_value; -} -Variant RasterizerDummy::material_get_param(RID p_material, const StringName& p_param) const { - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND_V(!material,Variant()); - - if (material->shader_params.has(p_param)) - return material->shader_params[p_param]; - else - return Variant(); -} - - -void RasterizerDummy::material_set_flag(RID p_material, VS::MaterialFlag p_flag,bool p_enabled) { - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND(!material); - ERR_FAIL_INDEX(p_flag,VS::MATERIAL_FLAG_MAX); - material->flags[p_flag]=p_enabled; - -} -bool RasterizerDummy::material_get_flag(RID p_material,VS::MaterialFlag p_flag) const { - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND_V(!material,false); - ERR_FAIL_INDEX_V(p_flag,VS::MATERIAL_FLAG_MAX,false); - return material->flags[p_flag]; - - -} - -void RasterizerDummy::material_set_depth_draw_mode(RID p_material, VS::MaterialDepthDrawMode p_mode) { - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND(!material); - material->depth_draw_mode=p_mode; -} - -VS::MaterialDepthDrawMode RasterizerDummy::material_get_depth_draw_mode(RID p_material) const{ - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND_V(!material,VS::MATERIAL_DEPTH_DRAW_ALWAYS); - return material->depth_draw_mode; - -} - - -void RasterizerDummy::material_set_blend_mode(RID p_material,VS::MaterialBlendMode p_mode) { - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND(!material); - material->blend_mode=p_mode; - -} -VS::MaterialBlendMode RasterizerDummy::material_get_blend_mode(RID p_material) const { - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND_V(!material,VS::MATERIAL_BLEND_MODE_ADD); - return material->blend_mode; -} - -void RasterizerDummy::material_set_line_width(RID p_material,float p_line_width) { - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND(!material); - material->line_width=p_line_width; - -} -float RasterizerDummy::material_get_line_width(RID p_material) const { - - Material *material = material_owner.get(p_material); - ERR_FAIL_COND_V(!material,0); - - return material->line_width; -} - -/* MESH API */ - - -RID RasterizerDummy::mesh_create() { - - - return mesh_owner.make_rid( memnew( Mesh ) ); -} - - -void RasterizerDummy::mesh_add_surface(RID p_mesh,VS::PrimitiveType p_primitive,const Array& p_arrays,const Array& p_blend_shapes,bool p_alpha_sort) { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND(!mesh); - - ERR_FAIL_INDEX( p_primitive, VS::PRIMITIVE_MAX ); - ERR_FAIL_COND(p_arrays.size()!=VS::ARRAY_MAX); - - Surface s; - - - s.format=0; - - for(int i=0;imorph_target_count; - s.morph_format=s.format; - - - Surface *surface = memnew( Surface ); - *surface=s; - - mesh->surfaces.push_back(surface); - - -} - - - -void RasterizerDummy::mesh_add_custom_surface(RID p_mesh,const Variant& p_dat) { - - ERR_EXPLAIN("Dummy Rasterizer does not support custom surfaces. Running on wrong platform?"); - ERR_FAIL_V(); -} - -Array RasterizerDummy::mesh_get_surface_arrays(RID p_mesh,int p_surface) const { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,Array()); - ERR_FAIL_INDEX_V(p_surface, mesh->surfaces.size(), Array() ); - Surface *surface = mesh->surfaces[p_surface]; - ERR_FAIL_COND_V( !surface, Array() ); - - return surface->data; - - -} -Array RasterizerDummy::mesh_get_surface_morph_arrays(RID p_mesh,int p_surface) const{ - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,Array()); - ERR_FAIL_INDEX_V(p_surface, mesh->surfaces.size(), Array() ); - Surface *surface = mesh->surfaces[p_surface]; - ERR_FAIL_COND_V( !surface, Array() ); - - return surface->morph_data; - -} - - -void RasterizerDummy::mesh_set_morph_target_count(RID p_mesh,int p_amount) { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND(!mesh); - ERR_FAIL_COND( mesh->surfaces.size()!=0 ); - - mesh->morph_target_count=p_amount; - -} - -int RasterizerDummy::mesh_get_morph_target_count(RID p_mesh) const{ - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,-1); - - return mesh->morph_target_count; - -} - -void RasterizerDummy::mesh_set_morph_target_mode(RID p_mesh,VS::MorphTargetMode p_mode) { - - ERR_FAIL_INDEX(p_mode,2); - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND(!mesh); - - mesh->morph_target_mode=p_mode; - -} - -VS::MorphTargetMode RasterizerDummy::mesh_get_morph_target_mode(RID p_mesh) const { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,VS::MORPH_MODE_NORMALIZED); - - return mesh->morph_target_mode; - -} - - - -void RasterizerDummy::mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material,bool p_owned) { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND(!mesh); - ERR_FAIL_INDEX(p_surface, mesh->surfaces.size() ); - Surface *surface = mesh->surfaces[p_surface]; - ERR_FAIL_COND( !surface); - - if (surface->material_owned && surface->material.is_valid()) - free(surface->material); - - surface->material_owned=p_owned; - surface->material=p_material; -} - -RID RasterizerDummy::mesh_surface_get_material(RID p_mesh, int p_surface) const { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,RID()); - ERR_FAIL_INDEX_V(p_surface, mesh->surfaces.size(), RID() ); - Surface *surface = mesh->surfaces[p_surface]; - ERR_FAIL_COND_V( !surface, RID() ); - - return surface->material; -} - -int RasterizerDummy::mesh_surface_get_array_len(RID p_mesh, int p_surface) const { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,-1); - ERR_FAIL_INDEX_V(p_surface, mesh->surfaces.size(), -1 ); - Surface *surface = mesh->surfaces[p_surface]; - ERR_FAIL_COND_V( !surface, -1 ); - - Vector3Array arr = surface->data[VS::ARRAY_VERTEX]; - return arr.size(); - -} - -int RasterizerDummy::mesh_surface_get_array_index_len(RID p_mesh, int p_surface) const { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,-1); - ERR_FAIL_INDEX_V(p_surface, mesh->surfaces.size(), -1 ); - Surface *surface = mesh->surfaces[p_surface]; - ERR_FAIL_COND_V( !surface, -1 ); - - IntArray arr = surface->data[VS::ARRAY_INDEX]; - return arr.size(); - -} -uint32_t RasterizerDummy::mesh_surface_get_format(RID p_mesh, int p_surface) const { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,0); - ERR_FAIL_INDEX_V(p_surface, mesh->surfaces.size(), 0 ); - Surface *surface = mesh->surfaces[p_surface]; - ERR_FAIL_COND_V( !surface, 0 ); - - return surface->format; -} -VS::PrimitiveType RasterizerDummy::mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,VS::PRIMITIVE_POINTS); - ERR_FAIL_INDEX_V(p_surface, mesh->surfaces.size(), VS::PRIMITIVE_POINTS ); - Surface *surface = mesh->surfaces[p_surface]; - ERR_FAIL_COND_V( !surface, VS::PRIMITIVE_POINTS ); - - return surface->primitive; -} - -void RasterizerDummy::mesh_remove_surface(RID p_mesh,int p_index) { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND(!mesh); - ERR_FAIL_INDEX(p_index, mesh->surfaces.size() ); - Surface *surface = mesh->surfaces[p_index]; - ERR_FAIL_COND( !surface); - - memdelete( mesh->surfaces[p_index] ); - mesh->surfaces.remove(p_index); - -} -int RasterizerDummy::mesh_get_surface_count(RID p_mesh) const { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,-1); - - return mesh->surfaces.size(); -} - -AABB RasterizerDummy::mesh_get_aabb(RID p_mesh,RID p_skeleton) const { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,AABB()); - - AABB aabb; - - for (int i=0;isurfaces.size();i++) { - - if (i==0) - aabb=mesh->surfaces[i]->aabb; - else - aabb.merge_with(mesh->surfaces[i]->aabb); - } - - return aabb; -} - -void RasterizerDummy::mesh_set_custom_aabb(RID p_mesh,const AABB& p_aabb) { - - Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND(!mesh); - - mesh->custom_aabb=p_aabb; -} - -AABB RasterizerDummy::mesh_get_custom_aabb(RID p_mesh) const { - - const Mesh *mesh = mesh_owner.get( p_mesh ); - ERR_FAIL_COND_V(!mesh,AABB()); - - return mesh->custom_aabb; - -} - -/* MULTIMESH API */ - -RID RasterizerDummy::multimesh_create() { - - return multimesh_owner.make_rid( memnew( MultiMesh )); -} - -void RasterizerDummy::multimesh_set_instance_count(RID p_multimesh,int p_count) { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND(!multimesh); - - multimesh->elements.clear(); // make sure to delete everything, so it "fails" in all implementations - multimesh->elements.resize(p_count); - -} -int RasterizerDummy::multimesh_get_instance_count(RID p_multimesh) const { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND_V(!multimesh,-1); - - return multimesh->elements.size(); -} - -void RasterizerDummy::multimesh_set_mesh(RID p_multimesh,RID p_mesh) { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND(!multimesh); - - multimesh->mesh=p_mesh; - -} -void RasterizerDummy::multimesh_set_aabb(RID p_multimesh,const AABB& p_aabb) { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND(!multimesh); - multimesh->aabb=p_aabb; -} -void RasterizerDummy::multimesh_instance_set_transform(RID p_multimesh,int p_index,const Transform& p_transform) { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND(!multimesh); - ERR_FAIL_INDEX(p_index,multimesh->elements.size()); - multimesh->elements[p_index].xform=p_transform; - -} -void RasterizerDummy::multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color) { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND(!multimesh) - ERR_FAIL_INDEX(p_index,multimesh->elements.size()); - multimesh->elements[p_index].color=p_color; - -} - -RID RasterizerDummy::multimesh_get_mesh(RID p_multimesh) const { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND_V(!multimesh,RID()); - - return multimesh->mesh; -} -AABB RasterizerDummy::multimesh_get_aabb(RID p_multimesh) const { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND_V(!multimesh,AABB()); - - return multimesh->aabb; -} - -Transform RasterizerDummy::multimesh_instance_get_transform(RID p_multimesh,int p_index) const { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND_V(!multimesh,Transform()); - - ERR_FAIL_INDEX_V(p_index,multimesh->elements.size(),Transform()); - - return multimesh->elements[p_index].xform; - -} -Color RasterizerDummy::multimesh_instance_get_color(RID p_multimesh,int p_index) const { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND_V(!multimesh,Color()); - ERR_FAIL_INDEX_V(p_index,multimesh->elements.size(),Color()); - - return multimesh->elements[p_index].color; -} - -void RasterizerDummy::multimesh_set_visible_instances(RID p_multimesh,int p_visible) { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND(!multimesh); - multimesh->visible=p_visible; - -} - -int RasterizerDummy::multimesh_get_visible_instances(RID p_multimesh) const { - - MultiMesh *multimesh = multimesh_owner.get(p_multimesh); - ERR_FAIL_COND_V(!multimesh,-1); - return multimesh->visible; - -} - -/* IMMEDIATE API */ - - -RID RasterizerDummy::immediate_create() { - - Immediate *im = memnew( Immediate ); - return immediate_owner.make_rid(im); - -} - -void RasterizerDummy::immediate_begin(RID p_immediate,VS::PrimitiveType p_rimitive,RID p_texture){ - - -} -void RasterizerDummy::immediate_vertex(RID p_immediate,const Vector3& p_vertex){ - - -} -void RasterizerDummy::immediate_normal(RID p_immediate,const Vector3& p_normal){ - - -} -void RasterizerDummy::immediate_tangent(RID p_immediate,const Plane& p_tangent){ - - -} -void RasterizerDummy::immediate_color(RID p_immediate,const Color& p_color){ - - -} -void RasterizerDummy::immediate_uv(RID p_immediate,const Vector2& tex_uv){ - - -} -void RasterizerDummy::immediate_uv2(RID p_immediate,const Vector2& tex_uv){ - - -} - -void RasterizerDummy::immediate_end(RID p_immediate){ - - -} -void RasterizerDummy::immediate_clear(RID p_immediate) { - - -} - -AABB RasterizerDummy::immediate_get_aabb(RID p_immediate) const { - - return AABB(Vector3(-1,-1,-1),Vector3(2,2,2)); -} - -void RasterizerDummy::immediate_set_material(RID p_immediate,RID p_material) { - - Immediate *im = immediate_owner.get(p_immediate); - ERR_FAIL_COND(!im); - im->material=p_material; - -} - -RID RasterizerDummy::immediate_get_material(RID p_immediate) const { - - const Immediate *im = immediate_owner.get(p_immediate); - ERR_FAIL_COND_V(!im,RID()); - return im->material; - -} - -/* PARTICLES API */ - -RID RasterizerDummy::particles_create() { - - Particles *particles = memnew( Particles ); - ERR_FAIL_COND_V(!particles,RID()); - return particles_owner.make_rid(particles); -} - -void RasterizerDummy::particles_set_amount(RID p_particles, int p_amount) { - - ERR_FAIL_COND(p_amount<1); - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - particles->data.amount=p_amount; - -} - -int RasterizerDummy::particles_get_amount(RID p_particles) const { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,-1); - return particles->data.amount; - -} - -void RasterizerDummy::particles_set_emitting(RID p_particles, bool p_emitting) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - particles->data.emitting=p_emitting;; - -} -bool RasterizerDummy::particles_is_emitting(RID p_particles) const { - - const Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,false); - return particles->data.emitting; - -} - -void RasterizerDummy::particles_set_visibility_aabb(RID p_particles, const AABB& p_visibility) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - particles->data.visibility_aabb=p_visibility; - -} - -void RasterizerDummy::particles_set_emission_half_extents(RID p_particles, const Vector3& p_half_extents) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - - particles->data.emission_half_extents=p_half_extents; -} -Vector3 RasterizerDummy::particles_get_emission_half_extents(RID p_particles) const { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,Vector3()); - - return particles->data.emission_half_extents; -} - -void RasterizerDummy::particles_set_emission_base_velocity(RID p_particles, const Vector3& p_base_velocity) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - - particles->data.emission_base_velocity=p_base_velocity; -} - -Vector3 RasterizerDummy::particles_get_emission_base_velocity(RID p_particles) const { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,Vector3()); - - return particles->data.emission_base_velocity; -} - - -void RasterizerDummy::particles_set_emission_points(RID p_particles, const DVector& p_points) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - - particles->data.emission_points=p_points; -} - -DVector RasterizerDummy::particles_get_emission_points(RID p_particles) const { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,DVector()); - - return particles->data.emission_points; - -} - -void RasterizerDummy::particles_set_gravity_normal(RID p_particles, const Vector3& p_normal) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - - particles->data.gravity_normal=p_normal; - -} -Vector3 RasterizerDummy::particles_get_gravity_normal(RID p_particles) const { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,Vector3()); - - return particles->data.gravity_normal; -} - - -AABB RasterizerDummy::particles_get_visibility_aabb(RID p_particles) const { - - const Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,AABB()); - return particles->data.visibility_aabb; - -} - -void RasterizerDummy::particles_set_variable(RID p_particles, VS::ParticleVariable p_variable,float p_value) { - - ERR_FAIL_INDEX(p_variable,VS::PARTICLE_VAR_MAX); - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - particles->data.particle_vars[p_variable]=p_value; - -} -float RasterizerDummy::particles_get_variable(RID p_particles, VS::ParticleVariable p_variable) const { - - const Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,-1); - return particles->data.particle_vars[p_variable]; -} - -void RasterizerDummy::particles_set_randomness(RID p_particles, VS::ParticleVariable p_variable,float p_randomness) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - particles->data.particle_randomness[p_variable]=p_randomness; - -} -float RasterizerDummy::particles_get_randomness(RID p_particles, VS::ParticleVariable p_variable) const { - - const Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,-1); - return particles->data.particle_randomness[p_variable]; - -} - -void RasterizerDummy::particles_set_color_phases(RID p_particles, int p_phases) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - ERR_FAIL_COND( p_phases<0 || p_phases>VS::MAX_PARTICLE_COLOR_PHASES ); - particles->data.color_phase_count=p_phases; - -} -int RasterizerDummy::particles_get_color_phases(RID p_particles) const { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,-1); - return particles->data.color_phase_count; -} - - -void RasterizerDummy::particles_set_color_phase_pos(RID p_particles, int p_phase, float p_pos) { - - ERR_FAIL_INDEX(p_phase, VS::MAX_PARTICLE_COLOR_PHASES); - if (p_pos<0.0) - p_pos=0.0; - if (p_pos>1.0) - p_pos=1.0; - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - particles->data.color_phases[p_phase].pos=p_pos; - -} -float RasterizerDummy::particles_get_color_phase_pos(RID p_particles, int p_phase) const { - - ERR_FAIL_INDEX_V(p_phase, VS::MAX_PARTICLE_COLOR_PHASES, -1.0); - - const Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,-1); - return particles->data.color_phases[p_phase].pos; - -} - -void RasterizerDummy::particles_set_color_phase_color(RID p_particles, int p_phase, const Color& p_color) { - - ERR_FAIL_INDEX(p_phase, VS::MAX_PARTICLE_COLOR_PHASES); - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - particles->data.color_phases[p_phase].color=p_color; - - //update alpha - particles->has_alpha=false; - for(int i=0;idata.color_phases[i].color.a<0.99) - particles->has_alpha=true; - } - -} - -Color RasterizerDummy::particles_get_color_phase_color(RID p_particles, int p_phase) const { - - ERR_FAIL_INDEX_V(p_phase, VS::MAX_PARTICLE_COLOR_PHASES, Color()); - - const Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,Color()); - return particles->data.color_phases[p_phase].color; - -} - -void RasterizerDummy::particles_set_attractors(RID p_particles, int p_attractors) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - ERR_FAIL_COND( p_attractors<0 || p_attractors>VisualServer::MAX_PARTICLE_ATTRACTORS ); - particles->data.attractor_count=p_attractors; - -} -int RasterizerDummy::particles_get_attractors(RID p_particles) const { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,-1); - return particles->data.attractor_count; -} - -void RasterizerDummy::particles_set_attractor_pos(RID p_particles, int p_attractor, const Vector3& p_pos) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - ERR_FAIL_INDEX(p_attractor,particles->data.attractor_count); - particles->data.attractors[p_attractor].pos=p_pos;; -} -Vector3 RasterizerDummy::particles_get_attractor_pos(RID p_particles,int p_attractor) const { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,Vector3()); - ERR_FAIL_INDEX_V(p_attractor,particles->data.attractor_count,Vector3()); - return particles->data.attractors[p_attractor].pos; -} - -void RasterizerDummy::particles_set_attractor_strength(RID p_particles, int p_attractor, float p_force) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - ERR_FAIL_INDEX(p_attractor,particles->data.attractor_count); - particles->data.attractors[p_attractor].force=p_force; -} - -float RasterizerDummy::particles_get_attractor_strength(RID p_particles,int p_attractor) const { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,0); - ERR_FAIL_INDEX_V(p_attractor,particles->data.attractor_count,0); - return particles->data.attractors[p_attractor].force; -} - -void RasterizerDummy::particles_set_material(RID p_particles, RID p_material,bool p_owned) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - if (particles->material_owned && particles->material.is_valid()) - free(particles->material); - - particles->material_owned=p_owned; - - particles->material=p_material; - -} -RID RasterizerDummy::particles_get_material(RID p_particles) const { - - const Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,RID()); - return particles->material; - -} - -void RasterizerDummy::particles_set_use_local_coordinates(RID p_particles, bool p_enable) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - particles->data.local_coordinates=p_enable; - -} - -bool RasterizerDummy::particles_is_using_local_coordinates(RID p_particles) const { - - const Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,false); - return particles->data.local_coordinates; -} -bool RasterizerDummy::particles_has_height_from_velocity(RID p_particles) const { - - const Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,false); - return particles->data.height_from_velocity; -} - -void RasterizerDummy::particles_set_height_from_velocity(RID p_particles, bool p_enable) { - - Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND(!particles); - particles->data.height_from_velocity=p_enable; - -} - -AABB RasterizerDummy::particles_get_aabb(RID p_particles) const { - - const Particles* particles = particles_owner.get( p_particles ); - ERR_FAIL_COND_V(!particles,AABB()); - return particles->data.visibility_aabb; -} - -/* SKELETON API */ - -RID RasterizerDummy::skeleton_create() { - - Skeleton *skeleton = memnew( Skeleton ); - ERR_FAIL_COND_V(!skeleton,RID()); - return skeleton_owner.make_rid( skeleton ); -} -void RasterizerDummy::skeleton_resize(RID p_skeleton,int p_bones) { - - Skeleton *skeleton = skeleton_owner.get( p_skeleton ); - ERR_FAIL_COND(!skeleton); - if (p_bones == skeleton->bones.size()) { - return; - }; - - skeleton->bones.resize(p_bones); - -} -int RasterizerDummy::skeleton_get_bone_count(RID p_skeleton) const { - - Skeleton *skeleton = skeleton_owner.get( p_skeleton ); - ERR_FAIL_COND_V(!skeleton, -1); - return skeleton->bones.size(); -} -void RasterizerDummy::skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform) { - - Skeleton *skeleton = skeleton_owner.get( p_skeleton ); - ERR_FAIL_COND(!skeleton); - ERR_FAIL_INDEX( p_bone, skeleton->bones.size() ); - - skeleton->bones[p_bone] = p_transform; -} - -Transform RasterizerDummy::skeleton_bone_get_transform(RID p_skeleton,int p_bone) { - - Skeleton *skeleton = skeleton_owner.get( p_skeleton ); - ERR_FAIL_COND_V(!skeleton, Transform()); - ERR_FAIL_INDEX_V( p_bone, skeleton->bones.size(), Transform() ); - - // something - return skeleton->bones[p_bone]; -} - - -/* LIGHT API */ - -RID RasterizerDummy::light_create(VS::LightType p_type) { - - Light *light = memnew( Light ); - light->type=p_type; - return light_owner.make_rid(light); -} - -VS::LightType RasterizerDummy::light_get_type(RID p_light) const { - - Light *light = light_owner.get(p_light); - ERR_FAIL_COND_V(!light,VS::LIGHT_OMNI); - return light->type; -} - -void RasterizerDummy::light_set_color(RID p_light,VS::LightColor p_type, const Color& p_color) { - - Light *light = light_owner.get(p_light); - ERR_FAIL_COND(!light); - ERR_FAIL_INDEX( p_type, 3 ); - light->colors[p_type]=p_color; -} -Color RasterizerDummy::light_get_color(RID p_light,VS::LightColor p_type) const { - - Light *light = light_owner.get(p_light); - ERR_FAIL_COND_V(!light, Color()); - ERR_FAIL_INDEX_V( p_type, 3, Color() ); - return light->colors[p_type]; -} - -void RasterizerDummy::light_set_shadow(RID p_light,bool p_enabled) { - - Light *light = light_owner.get(p_light); - ERR_FAIL_COND(!light); - light->shadow_enabled=p_enabled; -} - -bool RasterizerDummy::light_has_shadow(RID p_light) const { - - Light *light = light_owner.get(p_light); - ERR_FAIL_COND_V(!light,false); - return light->shadow_enabled; -} - -void RasterizerDummy::light_set_volumetric(RID p_light,bool p_enabled) { - - Light *light = light_owner.get(p_light); - ERR_FAIL_COND(!light); - light->volumetric_enabled=p_enabled; - -} -bool RasterizerDummy::light_is_volumetric(RID p_light) const { - - Light *light = light_owner.get(p_light); - ERR_FAIL_COND_V(!light,false); - return light->volumetric_enabled; -} - -void RasterizerDummy::light_set_projector(RID p_light,RID p_texture) { - - Light *light = light_owner.get(p_light); - ERR_FAIL_COND(!light); - light->projector=p_texture; -} -RID RasterizerDummy::light_get_projector(RID p_light) const { - - Light *light = light_owner.get(p_light); - ERR_FAIL_COND_V(!light,RID()); - return light->projector; -} - -void RasterizerDummy::light_set_var(RID p_light, VS::LightParam p_var, float p_value) { - - Light * light = light_owner.get( p_light ); - ERR_FAIL_COND(!light); - ERR_FAIL_INDEX( p_var, VS::LIGHT_PARAM_MAX ); - - light->vars[p_var]=p_value; -} -float RasterizerDummy::light_get_var(RID p_light, VS::LightParam p_var) const { - - Light * light = light_owner.get( p_light ); - ERR_FAIL_COND_V(!light,0); - - ERR_FAIL_INDEX_V( p_var, VS::LIGHT_PARAM_MAX,0 ); - - return light->vars[p_var]; -} - -void RasterizerDummy::light_set_operator(RID p_light,VS::LightOp p_op) { - - Light * light = light_owner.get( p_light ); - ERR_FAIL_COND(!light); - - -}; - -VS::LightOp RasterizerDummy::light_get_operator(RID p_light) const { - - return VS::LightOp(0); -}; - -void RasterizerDummy::light_omni_set_shadow_mode(RID p_light,VS::LightOmniShadowMode p_mode) { - - -} - -VS::LightOmniShadowMode RasterizerDummy::light_omni_get_shadow_mode(RID p_light) const{ - - return VS::LightOmniShadowMode(0); -} - -void RasterizerDummy::light_directional_set_shadow_mode(RID p_light,VS::LightDirectionalShadowMode p_mode) { - - -} - -VS::LightDirectionalShadowMode RasterizerDummy::light_directional_get_shadow_mode(RID p_light) const { - - return VS::LIGHT_DIRECTIONAL_SHADOW_ORTHOGONAL; -} - -void RasterizerDummy::light_directional_set_shadow_param(RID p_light,VS::LightDirectionalShadowParam p_param, float p_value) { - - -} - -float RasterizerDummy::light_directional_get_shadow_param(RID p_light,VS::LightDirectionalShadowParam p_param) const { - - return 0; -} - - -AABB RasterizerDummy::light_get_aabb(RID p_light) const { - - Light *light = light_owner.get( p_light ); - ERR_FAIL_COND_V(!light,AABB()); - - switch( light->type ) { - - case VS::LIGHT_SPOT: { - - float len=light->vars[VS::LIGHT_PARAM_RADIUS]; - float size=Math::tan(Math::deg2rad(light->vars[VS::LIGHT_PARAM_SPOT_ANGLE]))*len; - return AABB( Vector3( -size,-size,-len ), Vector3( size*2, size*2, len ) ); - } break; - case VS::LIGHT_OMNI: { - - float r = light->vars[VS::LIGHT_PARAM_RADIUS]; - return AABB( -Vector3(r,r,r), Vector3(r,r,r)*2 ); - } break; - case VS::LIGHT_DIRECTIONAL: { - - return AABB(); - } break; - default: {} - } - - ERR_FAIL_V( AABB() ); -} - - -RID RasterizerDummy::light_instance_create(RID p_light) { - - Light *light = light_owner.get( p_light ); - ERR_FAIL_COND_V(!light, RID()); - - LightInstance *light_instance = memnew( LightInstance ); - - light_instance->light=p_light; - light_instance->base=light; - - - return light_instance_owner.make_rid( light_instance ); -} -void RasterizerDummy::light_instance_set_transform(RID p_light_instance,const Transform& p_transform) { - - LightInstance *lighti = light_instance_owner.get( p_light_instance ); - ERR_FAIL_COND(!lighti); - lighti->transform=p_transform; - -} - -bool RasterizerDummy::light_instance_has_shadow(RID p_light_instance) const { - - return false; - -} - - -bool RasterizerDummy::light_instance_assign_shadow(RID p_light_instance) { - - return false; - -} - - -Rasterizer::ShadowType RasterizerDummy::light_instance_get_shadow_type(RID p_light_instance) const { - - LightInstance *lighti = light_instance_owner.get( p_light_instance ); - ERR_FAIL_COND_V(!lighti,Rasterizer::SHADOW_NONE); - - switch(lighti->base->type) { - - case VS::LIGHT_DIRECTIONAL: return SHADOW_PSM; break; - case VS::LIGHT_OMNI: return SHADOW_DUAL_PARABOLOID; break; - case VS::LIGHT_SPOT: return SHADOW_SIMPLE; break; - } - - return Rasterizer::SHADOW_NONE; -} - -Rasterizer::ShadowType RasterizerDummy::light_instance_get_shadow_type(RID p_light_instance,bool p_far) const { - - return SHADOW_NONE; -} -void RasterizerDummy::light_instance_set_shadow_transform(RID p_light_instance, int p_index, const CameraMatrix& p_camera, const Transform& p_transform, float p_split_near,float p_split_far) { - - -} - -int RasterizerDummy::light_instance_get_shadow_passes(RID p_light_instance) const { - - return 0; -} - -bool RasterizerDummy::light_instance_get_pssm_shadow_overlap(RID p_light_instance) const { - - return false; -} - - -void RasterizerDummy::light_instance_set_custom_transform(RID p_light_instance, int p_index, const CameraMatrix& p_camera, const Transform& p_transform, float p_split_near,float p_split_far) { - - LightInstance *lighti = light_instance_owner.get( p_light_instance ); - ERR_FAIL_COND(!lighti); - - ERR_FAIL_COND(lighti->base->type!=VS::LIGHT_DIRECTIONAL); - ERR_FAIL_INDEX(p_index,1); - - lighti->custom_projection=p_camera; - lighti->custom_transform=p_transform; - -} -void RasterizerDummy::shadow_clear_near() { - - -} - -bool RasterizerDummy::shadow_allocate_near(RID p_light) { - - return false; -} - -bool RasterizerDummy::shadow_allocate_far(RID p_light) { - - return false; -} - -/* PARTICLES INSTANCE */ - -RID RasterizerDummy::particles_instance_create(RID p_particles) { - - ERR_FAIL_COND_V(!particles_owner.owns(p_particles),RID()); - ParticlesInstance *particles_instance = memnew( ParticlesInstance ); - ERR_FAIL_COND_V(!particles_instance, RID() ); - particles_instance->particles=p_particles; - return particles_instance_owner.make_rid(particles_instance); -} - -void RasterizerDummy::particles_instance_set_transform(RID p_particles_instance,const Transform& p_transform) { - - ParticlesInstance *particles_instance=particles_instance_owner.get(p_particles_instance); - ERR_FAIL_COND(!particles_instance); - particles_instance->transform=p_transform; -} - - -/* RENDER API */ -/* all calls (inside begin/end shadow) are always warranted to be in the following order: */ - - -RID RasterizerDummy::viewport_data_create() { - - return RID(); -} - -RID RasterizerDummy::render_target_create(){ - - return RID(); - -} -void RasterizerDummy::render_target_set_size(RID p_render_target, int p_width, int p_height){ - - -} -RID RasterizerDummy::render_target_get_texture(RID p_render_target) const{ - - return RID(); - -} -bool RasterizerDummy::render_target_renedered_in_frame(RID p_render_target){ - - return false; -} - - -void RasterizerDummy::begin_frame() { - - - -} - -void RasterizerDummy::capture_viewport(Image* r_capture) { - - -} - - -void RasterizerDummy::clear_viewport(const Color& p_color) { - - -}; - -void RasterizerDummy::set_viewport(const VS::ViewportRect& p_viewport) { - - - -} - -void RasterizerDummy::set_render_target(RID p_render_target, bool p_transparent_bg, bool p_vflip) { - - -} - - -void RasterizerDummy::begin_scene(RID p_viewport_data,RID p_env,VS::ScenarioDebugMode p_debug) { - - -}; - -void RasterizerDummy::begin_shadow_map( RID p_light_instance, int p_shadow_pass ) { - -} - -void RasterizerDummy::set_camera(const Transform& p_world, const CameraMatrix& p_projection, bool p_ortho_hint) { - - -} - -void RasterizerDummy::add_light( RID p_light_instance ) { - - - -} - - - - -void RasterizerDummy::add_mesh( const RID& p_mesh, const InstanceData *p_data) { - - -} - -void RasterizerDummy::add_multimesh( const RID& p_multimesh, const InstanceData *p_data){ - - - - -} - -void RasterizerDummy::add_particles( const RID& p_particle_instance, const InstanceData *p_data){ - - - -} - - - -void RasterizerDummy::end_scene() { - - -} -void RasterizerDummy::end_shadow_map() { - -} - - -void RasterizerDummy::end_frame() { - - -} - -RID RasterizerDummy::canvas_light_occluder_create() { - return RID(); -} - -void RasterizerDummy::canvas_light_occluder_set_polylines(RID p_occluder, const DVector& p_lines) { - - -} - -RID RasterizerDummy::canvas_light_shadow_buffer_create(int p_width) { - - return RID(); -} - -void RasterizerDummy::canvas_light_shadow_buffer_update(RID p_buffer, const Matrix32& p_light_xform, int p_light_mask,float p_near, float p_far, CanvasLightOccluderInstance* p_occluders, CameraMatrix *p_xform_cache) { - - -} - -void RasterizerDummy::canvas_debug_viewport_shadows(CanvasLight* p_lights_with_shadow) { - - -} - -/* CANVAS API */ - - -void RasterizerDummy::begin_canvas_bg() { - -} -void RasterizerDummy::canvas_begin() { - - - -} -void RasterizerDummy::canvas_disable_blending() { - - - -} - -void RasterizerDummy::canvas_set_opacity(float p_opacity) { - - -} - -void RasterizerDummy::canvas_set_blend_mode(VS::MaterialBlendMode p_mode) { - - -} - - -void RasterizerDummy::canvas_begin_rect(const Matrix32& p_transform) { - - - -} - -void RasterizerDummy::canvas_set_clip(bool p_clip, const Rect2& p_rect) { - - - - -} - -void RasterizerDummy::canvas_end_rect() { - - -} - -void RasterizerDummy::canvas_draw_line(const Point2& p_from, const Point2& p_to, const Color& p_color, float p_width, bool p_antialiased) { - - - -} - -void RasterizerDummy::canvas_draw_rect(const Rect2& p_rect, int p_flags, const Rect2& p_source,RID p_texture,const Color& p_modulate) { - - - - -} -void RasterizerDummy::canvas_draw_style_box(const Rect2& p_rect, const Rect2& p_src_region, RID p_texture,const float *p_margin, bool p_draw_center,const Color& p_modulate) { - - -} -void RasterizerDummy::canvas_draw_primitive(const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture,float p_width) { - - - -} - - -void RasterizerDummy::canvas_draw_polygon(int p_vertex_count, const int* p_indices, const Vector2* p_vertices, const Vector2* p_uvs, const Color* p_colors,const RID& p_texture,bool p_singlecolor) { - - - -} - -void RasterizerDummy::canvas_set_transform(const Matrix32& p_transform) { - - -} - -void RasterizerDummy::canvas_render_items(CanvasItem *p_item_list,int p_z,const Color& p_modulate,CanvasLight *p_light) { - - -} - -/* ENVIRONMENT */ - -RID RasterizerDummy::environment_create() { - - Environment * env = memnew( Environment ); - return environment_owner.make_rid(env); -} - -void RasterizerDummy::environment_set_background(RID p_env,VS::EnvironmentBG p_bg) { - - ERR_FAIL_INDEX(p_bg,VS::ENV_BG_MAX); - Environment * env = environment_owner.get(p_env); - ERR_FAIL_COND(!env); - env->bg_mode=p_bg; -} - -VS::EnvironmentBG RasterizerDummy::environment_get_background(RID p_env) const{ - - const Environment * env = environment_owner.get(p_env); - ERR_FAIL_COND_V(!env,VS::ENV_BG_MAX); - return env->bg_mode; -} - -void RasterizerDummy::environment_set_background_param(RID p_env,VS::EnvironmentBGParam p_param, const Variant& p_value){ - - ERR_FAIL_INDEX(p_param,VS::ENV_BG_PARAM_MAX); - Environment * env = environment_owner.get(p_env); - ERR_FAIL_COND(!env); - env->bg_param[p_param]=p_value; - -} -Variant RasterizerDummy::environment_get_background_param(RID p_env,VS::EnvironmentBGParam p_param) const{ - - ERR_FAIL_INDEX_V(p_param,VS::ENV_BG_PARAM_MAX,Variant()); - const Environment * env = environment_owner.get(p_env); - ERR_FAIL_COND_V(!env,Variant()); - return env->bg_param[p_param]; - -} - -void RasterizerDummy::environment_set_enable_fx(RID p_env,VS::EnvironmentFx p_effect,bool p_enabled){ - - ERR_FAIL_INDEX(p_effect,VS::ENV_FX_MAX); - Environment * env = environment_owner.get(p_env); - ERR_FAIL_COND(!env); - env->fx_enabled[p_effect]=p_enabled; -} -bool RasterizerDummy::environment_is_fx_enabled(RID p_env,VS::EnvironmentFx p_effect) const{ - - ERR_FAIL_INDEX_V(p_effect,VS::ENV_FX_MAX,false); - const Environment * env = environment_owner.get(p_env); - ERR_FAIL_COND_V(!env,false); - return env->fx_enabled[p_effect]; - -} - -void RasterizerDummy::environment_fx_set_param(RID p_env,VS::EnvironmentFxParam p_param,const Variant& p_value){ - - ERR_FAIL_INDEX(p_param,VS::ENV_FX_PARAM_MAX); - Environment * env = environment_owner.get(p_env); - ERR_FAIL_COND(!env); - env->fx_param[p_param]=p_value; -} -Variant RasterizerDummy::environment_fx_get_param(RID p_env,VS::EnvironmentFxParam p_param) const{ - - ERR_FAIL_INDEX_V(p_param,VS::ENV_FX_PARAM_MAX,Variant()); - const Environment * env = environment_owner.get(p_env); - ERR_FAIL_COND_V(!env,Variant()); - return env->fx_param[p_param]; - -} - - -RID RasterizerDummy::sampled_light_dp_create(int p_width,int p_height) { - - return sampled_light_owner.make_rid(memnew(SampledLight)); -} - -void RasterizerDummy::sampled_light_dp_update(RID p_sampled_light, const Color *p_data, float p_multiplier) { - - -} - - -/*MISC*/ - -bool RasterizerDummy::is_texture(const RID& p_rid) const { - - return texture_owner.owns(p_rid); -} -bool RasterizerDummy::is_material(const RID& p_rid) const { - - return material_owner.owns(p_rid); -} -bool RasterizerDummy::is_mesh(const RID& p_rid) const { - - return mesh_owner.owns(p_rid); -} - -bool RasterizerDummy::is_immediate(const RID& p_rid) const { - - return immediate_owner.owns(p_rid); -} - -bool RasterizerDummy::is_multimesh(const RID& p_rid) const { - - return multimesh_owner.owns(p_rid); -} -bool RasterizerDummy::is_particles(const RID &p_beam) const { - - return particles_owner.owns(p_beam); -} - -bool RasterizerDummy::is_light(const RID& p_rid) const { - - return light_owner.owns(p_rid); -} -bool RasterizerDummy::is_light_instance(const RID& p_rid) const { - - return light_instance_owner.owns(p_rid); -} -bool RasterizerDummy::is_particles_instance(const RID& p_rid) const { - - return particles_instance_owner.owns(p_rid); -} -bool RasterizerDummy::is_skeleton(const RID& p_rid) const { - - return skeleton_owner.owns(p_rid); -} -bool RasterizerDummy::is_environment(const RID& p_rid) const { - - return environment_owner.owns(p_rid); -} - -bool RasterizerDummy::is_canvas_light_occluder(const RID& p_rid) const { - - return false; -} - -bool RasterizerDummy::is_shader(const RID& p_rid) const { - - return false; -} - -void RasterizerDummy::free(const RID& p_rid) { - - if (texture_owner.owns(p_rid)) { - - // delete the texture - Texture *texture = texture_owner.get(p_rid); - texture_owner.free(p_rid); - memdelete(texture); - - } else if (shader_owner.owns(p_rid)) { - - // delete the texture - Shader *shader = shader_owner.get(p_rid); - shader_owner.free(p_rid); - memdelete(shader); - - } else if (material_owner.owns(p_rid)) { - - Material *material = material_owner.get( p_rid ); - material_owner.free(p_rid); - memdelete(material); - - } else if (mesh_owner.owns(p_rid)) { - - Mesh *mesh = mesh_owner.get(p_rid); - - for (int i=0;isurfaces.size();i++) { - - memdelete( mesh->surfaces[i] ); - }; - - mesh->surfaces.clear(); - mesh_owner.free(p_rid); - memdelete(mesh); - - } else if (multimesh_owner.owns(p_rid)) { - - MultiMesh *multimesh = multimesh_owner.get(p_rid); - multimesh_owner.free(p_rid); - memdelete(multimesh); - - } else if (immediate_owner.owns(p_rid)) { - - Immediate *immediate = immediate_owner.get(p_rid); - immediate_owner.free(p_rid); - memdelete(immediate); - - } else if (particles_owner.owns(p_rid)) { - - Particles *particles = particles_owner.get(p_rid); - particles_owner.free(p_rid); - memdelete(particles); - } else if (particles_instance_owner.owns(p_rid)) { - - ParticlesInstance *particles_isntance = particles_instance_owner.get(p_rid); - particles_instance_owner.free(p_rid); - memdelete(particles_isntance); - - } else if (skeleton_owner.owns(p_rid)) { - - Skeleton *skeleton = skeleton_owner.get( p_rid ); - skeleton_owner.free(p_rid); - memdelete(skeleton); - - } else if (light_owner.owns(p_rid)) { - - Light *light = light_owner.get( p_rid ); - light_owner.free(p_rid); - memdelete(light); - - } else if (light_instance_owner.owns(p_rid)) { - - LightInstance *light_instance = light_instance_owner.get( p_rid ); - light_instance_owner.free(p_rid); - memdelete( light_instance ); - - - } else if (environment_owner.owns(p_rid)) { - - Environment *env = environment_owner.get( p_rid ); - environment_owner.free(p_rid); - memdelete( env ); - } else if (sampled_light_owner.owns(p_rid)) { - - SampledLight *sampled_light = sampled_light_owner.get( p_rid ); - ERR_FAIL_COND(!sampled_light); - - sampled_light_owner.free(p_rid); - memdelete( sampled_light ); - - }; -} - - -void RasterizerDummy::custom_shade_model_set_shader(int p_model, RID p_shader) { - - -}; - -RID RasterizerDummy::custom_shade_model_get_shader(int p_model) const { - - return RID(); -}; - -void RasterizerDummy::custom_shade_model_set_name(int p_model, const String& p_name) { - -}; - -String RasterizerDummy::custom_shade_model_get_name(int p_model) const { - - return String(); -}; - -void RasterizerDummy::custom_shade_model_set_param_info(int p_model, const List& p_info) { - -}; - -void RasterizerDummy::custom_shade_model_get_param_info(int p_model, List* p_info) const { - -}; - - - -void RasterizerDummy::init() { - - -} - -void RasterizerDummy::finish() { - - -} - -int RasterizerDummy::get_render_info(VS::RenderInfo p_info) { - - return 0; -} - -bool RasterizerDummy::needs_to_draw_next_frame() const { - - return false; -} - - -bool RasterizerDummy::has_feature(VS::Features p_feature) const { - - return false; - -} - -void RasterizerDummy::restore_framebuffer() { - -} - -RasterizerDummy::RasterizerDummy() { - -}; - -RasterizerDummy::~RasterizerDummy() { - -}; diff --git a/servers/visual/rasterizer_dummy.h b/servers/visual/rasterizer_dummy.h deleted file mode 100644 index cac36eb6fc3..00000000000 --- a/servers/visual/rasterizer_dummy.h +++ /dev/null @@ -1,791 +0,0 @@ -/*************************************************************************/ -/* rasterizer_dummy.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#ifndef RASTERIZER_DUMMY_H -#define RASTERIZER_DUMMY_H - -#include "servers/visual/rasterizer.h" - - -#include "image.h" -#include "rid.h" -#include "servers/visual_server.h" -#include "list.h" -#include "map.h" -#include "camera_matrix.h" -#include "sort.h" - - -#include "servers/visual/particle_system_sw.h" - -/** - @author Juan Linietsky -*/ -class RasterizerDummy : public Rasterizer { - - struct Texture { - - uint32_t flags; - int width,height; - Image::Format format; - Image image[6]; - Texture() { - - flags=width=height=0; - format=Image::FORMAT_GRAYSCALE; - } - - ~Texture() { - - } - }; - - mutable RID_Owner texture_owner; - - struct Shader { - - String vertex_code; - String fragment_code; - String light_code; - VS::ShaderMode mode; - Map params; - int fragment_line; - int vertex_line; - int light_line; - bool valid; - bool has_alpha; - bool use_world_transform; - - }; - - mutable RID_Owner shader_owner; - - - struct Material { - - bool flags[VS::MATERIAL_FLAG_MAX]; - - VS::MaterialDepthDrawMode depth_draw_mode; - - VS::MaterialBlendMode blend_mode; - - float line_width; - float point_size; - - RID shader; // shader material - - Map shader_params; - - - Material() { - - - for(int i=0;i material_owner; - - void _material_check_alpha(Material *p_material); - - - struct Geometry { - - enum Type { - GEOMETRY_INVALID, - GEOMETRY_SURFACE, - GEOMETRY_POLY, - GEOMETRY_PARTICLES, - GEOMETRY_MULTISURFACE, - }; - - Type type; - RID material; - bool has_alpha; - bool material_owned; - - Geometry() { has_alpha=false; material_owned = false; } - virtual ~Geometry() {}; - }; - - struct GeometryOwner { - - virtual ~GeometryOwner() {} - }; - - class Mesh; - - struct Surface : public Geometry { - - Array data; - Array morph_data; - - bool packed; - bool alpha_sort; - int morph_target_count; - AABB aabb; - - VS::PrimitiveType primitive; - - uint32_t format; - uint32_t morph_format; - - Surface() { - - packed=false; - morph_target_count=0; - material_owned=false; - format=0; - morph_format=0; - - primitive=VS::PRIMITIVE_POINTS; - } - - ~Surface() { - - } - }; - - - struct Mesh { - - bool active; - Vector surfaces; - int morph_target_count; - VS::MorphTargetMode morph_target_mode; - AABB custom_aabb; - - mutable uint64_t last_pass; - Mesh() { - morph_target_mode=VS::MORPH_MODE_NORMALIZED; - morph_target_count=0; - last_pass=0; - active=false; - } - }; - mutable RID_Owner mesh_owner; - - struct MultiMesh; - - struct MultiMeshSurface : public Geometry { - - Surface *surface; - MultiMeshSurface() { type=GEOMETRY_MULTISURFACE; } - }; - - struct MultiMesh : public GeometryOwner { - - struct Element { - - Transform xform; - Color color; - }; - - AABB aabb; - RID mesh; - int visible; - - //IDirect3DVertexBuffer9* instance_buffer; - Vector elements; - - MultiMesh() { - visible=-1; - } - - - }; - - - mutable RID_Owner multimesh_owner; - - struct Immediate { - - - RID material; - int empty; - }; - - mutable RID_Owner immediate_owner; - - struct Particles : public Geometry { - - ParticleSystemSW data; // software particle system - - Particles() { - type=GEOMETRY_PARTICLES; - - } - }; - - mutable RID_Owner particles_owner; - - struct ParticlesInstance : public GeometryOwner { - - RID particles; - - ParticleSystemProcessSW particles_process; - Transform transform; - - ParticlesInstance() { } - }; - - mutable RID_Owner particles_instance_owner; - ParticleSystemDrawInfoSW particle_draw_info; - - struct Skeleton { - - Vector bones; - - }; - - mutable RID_Owner skeleton_owner; - - - struct Light { - - VS::LightType type; - float vars[VS::LIGHT_PARAM_MAX]; - Color colors[3]; - bool shadow_enabled; - RID projector; - bool volumetric_enabled; - Color volumetric_color; - - - Light() { - - vars[VS::LIGHT_PARAM_SPOT_ATTENUATION]=1; - vars[VS::LIGHT_PARAM_SPOT_ANGLE]=45; - vars[VS::LIGHT_PARAM_ATTENUATION]=1.0; - vars[VS::LIGHT_PARAM_ENERGY]=1.0; - vars[VS::LIGHT_PARAM_RADIUS]=1.0; - vars[VS::LIGHT_PARAM_SHADOW_Z_OFFSET]=0.05; - - colors[VS::LIGHT_COLOR_DIFFUSE]=Color(1,1,1); - colors[VS::LIGHT_COLOR_SPECULAR]=Color(1,1,1); - shadow_enabled=false; - volumetric_enabled=false; - } - }; - - - struct Environment { - - - VS::EnvironmentBG bg_mode; - Variant bg_param[VS::ENV_BG_PARAM_MAX]; - bool fx_enabled[VS::ENV_FX_MAX]; - Variant fx_param[VS::ENV_FX_PARAM_MAX]; - - Environment() { - - bg_mode=VS::ENV_BG_DEFAULT_COLOR; - bg_param[VS::ENV_BG_PARAM_COLOR]=Color(0,0,0); - bg_param[VS::ENV_BG_PARAM_TEXTURE]=RID(); - bg_param[VS::ENV_BG_PARAM_CUBEMAP]=RID(); - bg_param[VS::ENV_BG_PARAM_ENERGY]=1.0; - - for(int i=0;i environment_owner; - - struct SampledLight { - - int w,h; - }; - - mutable RID_Owner sampled_light_owner; - - struct ShadowBuffer; - - struct LightInstance { - - struct SplitInfo { - - CameraMatrix camera; - Transform transform; - float near; - float far; - }; - - RID light; - Light *base; - Transform transform; - CameraMatrix projection; - - Transform custom_transform; - CameraMatrix custom_projection; - - Vector3 light_vector; - Vector3 spot_vector; - float linear_att; - - - LightInstance() { linear_att=1.0; } - - }; - - mutable RID_Owner light_owner; - mutable RID_Owner light_instance_owner; - - - RID default_material; - - - - -public: - - /* TEXTURE API */ - - virtual RID texture_create(); - virtual void texture_allocate(RID p_texture,int p_width, int p_height,Image::Format p_format,uint32_t p_flags=VS::TEXTURE_FLAGS_DEFAULT); - virtual void texture_set_data(RID p_texture,const Image& p_image,VS::CubeMapSide p_cube_side=VS::CUBEMAP_LEFT); - virtual Image texture_get_data(RID p_texture,VS::CubeMapSide p_cube_side=VS::CUBEMAP_LEFT) const; - virtual void texture_set_flags(RID p_texture,uint32_t p_flags); - virtual uint32_t texture_get_flags(RID p_texture) const; - virtual Image::Format texture_get_format(RID p_texture) const; - virtual uint32_t texture_get_width(RID p_texture) const; - virtual uint32_t texture_get_height(RID p_texture) const; - virtual bool texture_has_alpha(RID p_texture) const; - virtual void texture_set_size_override(RID p_texture,int p_width, int p_height); - virtual void texture_set_reload_hook(RID p_texture,ObjectID p_owner,const StringName& p_function) const; - - virtual void texture_set_path(RID p_texture,const String& p_path) {} - virtual String texture_get_path(RID p_texture) const { return String(); } - virtual void texture_debug_usage(List *r_info) {} - - virtual void texture_set_shrink_all_x2_on_set_data(bool p_enable) {} - - /* SHADER API */ - - virtual RID shader_create(VS::ShaderMode p_mode=VS::SHADER_MATERIAL); - - virtual void shader_set_mode(RID p_shader,VS::ShaderMode p_mode); - virtual VS::ShaderMode shader_get_mode(RID p_shader) const; - - virtual void shader_set_code(RID p_shader, const String& p_vertex, const String& p_fragment,const String& p_light,int p_vertex_ofs=0,int p_fragment_ofs=0,int p_light_ofs=0); - virtual String shader_get_fragment_code(RID p_shader) const; - virtual String shader_get_vertex_code(RID p_shader) const; - virtual String shader_get_light_code(RID p_shader) const; - - virtual void shader_get_param_list(RID p_shader, List *p_param_list) const; - - - virtual void shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture); - virtual RID shader_get_default_texture_param(RID p_shader, const StringName& p_name) const; - - virtual Variant shader_get_default_param(RID p_shader, const StringName& p_name); - - /* COMMON MATERIAL API */ - - virtual RID material_create(); - - virtual void material_set_shader(RID p_shader_material, RID p_shader); - virtual RID material_get_shader(RID p_shader_material) const; - - virtual void material_set_param(RID p_material, const StringName& p_param, const Variant& p_value); - virtual Variant material_get_param(RID p_material, const StringName& p_param) const; - - virtual void material_set_flag(RID p_material, VS::MaterialFlag p_flag,bool p_enabled); - virtual bool material_get_flag(RID p_material,VS::MaterialFlag p_flag) const; - - virtual void material_set_depth_draw_mode(RID p_material, VS::MaterialDepthDrawMode p_mode); - virtual VS::MaterialDepthDrawMode material_get_depth_draw_mode(RID p_material) const; - - virtual void material_set_blend_mode(RID p_material,VS::MaterialBlendMode p_mode); - virtual VS::MaterialBlendMode material_get_blend_mode(RID p_material) const; - - virtual void material_set_line_width(RID p_material,float p_line_width); - virtual float material_get_line_width(RID p_material) const; - - /* MESH API */ - - - virtual RID mesh_create(); - - virtual void mesh_add_surface(RID p_mesh,VS::PrimitiveType p_primitive,const Array& p_arrays,const Array& p_blend_shapes=Array(),bool p_alpha_sort=false); - virtual Array mesh_get_surface_arrays(RID p_mesh,int p_surface) const; - virtual Array mesh_get_surface_morph_arrays(RID p_mesh,int p_surface) const; - virtual void mesh_add_custom_surface(RID p_mesh,const Variant& p_dat); - - virtual void mesh_set_morph_target_count(RID p_mesh,int p_amount); - virtual int mesh_get_morph_target_count(RID p_mesh) const; - - virtual void mesh_set_morph_target_mode(RID p_mesh,VS::MorphTargetMode p_mode); - virtual VS::MorphTargetMode mesh_get_morph_target_mode(RID p_mesh) const; - - virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material,bool p_owned=false); - virtual RID mesh_surface_get_material(RID p_mesh, int p_surface) const; - - virtual int mesh_surface_get_array_len(RID p_mesh, int p_surface) const; - virtual int mesh_surface_get_array_index_len(RID p_mesh, int p_surface) const; - virtual uint32_t mesh_surface_get_format(RID p_mesh, int p_surface) const; - virtual VS::PrimitiveType mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const; - - virtual void mesh_remove_surface(RID p_mesh,int p_index); - virtual int mesh_get_surface_count(RID p_mesh) const; - - virtual AABB mesh_get_aabb(RID p_mesh,RID p_skeleton=RID()) const; - - virtual void mesh_set_custom_aabb(RID p_mesh,const AABB& p_aabb); - virtual AABB mesh_get_custom_aabb(RID p_mesh) const; - - - /* MULTIMESH API */ - - virtual RID multimesh_create(); - - virtual void multimesh_set_instance_count(RID p_multimesh,int p_count); - virtual int multimesh_get_instance_count(RID p_multimesh) const; - - virtual void multimesh_set_mesh(RID p_multimesh,RID p_mesh); - virtual void multimesh_set_aabb(RID p_multimesh,const AABB& p_aabb); - virtual void multimesh_instance_set_transform(RID p_multimesh,int p_index,const Transform& p_transform); - virtual void multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color); - - virtual RID multimesh_get_mesh(RID p_multimesh) const; - virtual AABB multimesh_get_aabb(RID p_multimesh) const;; - - virtual Transform multimesh_instance_get_transform(RID p_multimesh,int p_index) const; - virtual Color multimesh_instance_get_color(RID p_multimesh,int p_index) const; - - virtual void multimesh_set_visible_instances(RID p_multimesh,int p_visible); - virtual int multimesh_get_visible_instances(RID p_multimesh) const; - - /* IMMEDIATE API */ - - virtual RID immediate_create(); - virtual void immediate_begin(RID p_immediate,VS::PrimitiveType p_rimitive,RID p_texture=RID()); - virtual void immediate_vertex(RID p_immediate,const Vector3& p_vertex); - virtual void immediate_normal(RID p_immediate,const Vector3& p_normal); - virtual void immediate_tangent(RID p_immediate,const Plane& p_tangent); - virtual void immediate_color(RID p_immediate,const Color& p_color); - virtual void immediate_uv(RID p_immediate,const Vector2& tex_uv); - virtual void immediate_uv2(RID p_immediate,const Vector2& tex_uv); - virtual void immediate_end(RID p_immediate); - virtual void immediate_clear(RID p_immediate); - virtual void immediate_set_material(RID p_immediate,RID p_material); - virtual RID immediate_get_material(RID p_immediate) const; - - virtual AABB immediate_get_aabb(RID p_mesh) const; - - /* PARTICLES API */ - - virtual RID particles_create(); - - virtual void particles_set_amount(RID p_particles, int p_amount); - virtual int particles_get_amount(RID p_particles) const; - - virtual void particles_set_emitting(RID p_particles, bool p_emitting); - virtual bool particles_is_emitting(RID p_particles) const; - - virtual void particles_set_visibility_aabb(RID p_particles, const AABB& p_visibility); - virtual AABB particles_get_visibility_aabb(RID p_particles) const; - - virtual void particles_set_emission_half_extents(RID p_particles, const Vector3& p_half_extents); - virtual Vector3 particles_get_emission_half_extents(RID p_particles) const; - - virtual void particles_set_emission_base_velocity(RID p_particles, const Vector3& p_base_velocity); - virtual Vector3 particles_get_emission_base_velocity(RID p_particles) const; - - virtual void particles_set_emission_points(RID p_particles, const DVector& p_points); - virtual DVector particles_get_emission_points(RID p_particles) const; - - virtual void particles_set_gravity_normal(RID p_particles, const Vector3& p_normal); - virtual Vector3 particles_get_gravity_normal(RID p_particles) const; - - virtual void particles_set_variable(RID p_particles, VS::ParticleVariable p_variable,float p_value); - virtual float particles_get_variable(RID p_particles, VS::ParticleVariable p_variable) const; - - virtual void particles_set_randomness(RID p_particles, VS::ParticleVariable p_variable,float p_randomness); - virtual float particles_get_randomness(RID p_particles, VS::ParticleVariable p_variable) const; - - virtual void particles_set_color_phase_pos(RID p_particles, int p_phase, float p_pos); - virtual float particles_get_color_phase_pos(RID p_particles, int p_phase) const; - - virtual void particles_set_color_phases(RID p_particles, int p_phases); - virtual int particles_get_color_phases(RID p_particles) const; - - virtual void particles_set_color_phase_color(RID p_particles, int p_phase, const Color& p_color); - virtual Color particles_get_color_phase_color(RID p_particles, int p_phase) const; - - virtual void particles_set_attractors(RID p_particles, int p_attractors); - virtual int particles_get_attractors(RID p_particles) const; - - virtual void particles_set_attractor_pos(RID p_particles, int p_attractor, const Vector3& p_pos); - virtual Vector3 particles_get_attractor_pos(RID p_particles,int p_attractor) const; - - virtual void particles_set_attractor_strength(RID p_particles, int p_attractor, float p_force); - virtual float particles_get_attractor_strength(RID p_particles,int p_attractor) const; - - virtual void particles_set_material(RID p_particles, RID p_material,bool p_owned=false); - virtual RID particles_get_material(RID p_particles) const; - - virtual AABB particles_get_aabb(RID p_particles) const; - - virtual void particles_set_height_from_velocity(RID p_particles, bool p_enable); - virtual bool particles_has_height_from_velocity(RID p_particles) const; - - virtual void particles_set_use_local_coordinates(RID p_particles, bool p_enable); - virtual bool particles_is_using_local_coordinates(RID p_particles) const; - - /* SKELETON API */ - - virtual RID skeleton_create(); - virtual void skeleton_resize(RID p_skeleton,int p_bones); - virtual int skeleton_get_bone_count(RID p_skeleton) const; - virtual void skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform); - virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone); - - - /* LIGHT API */ - - virtual RID light_create(VS::LightType p_type); - virtual VS::LightType light_get_type(RID p_light) const; - - virtual void light_set_color(RID p_light,VS::LightColor p_type, const Color& p_color); - virtual Color light_get_color(RID p_light,VS::LightColor p_type) const; - - virtual void light_set_shadow(RID p_light,bool p_enabled); - virtual bool light_has_shadow(RID p_light) const; - - virtual void light_set_volumetric(RID p_light,bool p_enabled); - virtual bool light_is_volumetric(RID p_light) const; - - virtual void light_set_projector(RID p_light,RID p_texture); - virtual RID light_get_projector(RID p_light) const; - - virtual void light_set_var(RID p_light, VS::LightParam p_var, float p_value); - virtual float light_get_var(RID p_light, VS::LightParam p_var) const; - - virtual void light_set_operator(RID p_light,VS::LightOp p_op); - virtual VS::LightOp light_get_operator(RID p_light) const; - - virtual void light_omni_set_shadow_mode(RID p_light,VS::LightOmniShadowMode p_mode); - virtual VS::LightOmniShadowMode light_omni_get_shadow_mode(RID p_light) const; - - - virtual void light_directional_set_shadow_mode(RID p_light,VS::LightDirectionalShadowMode p_mode); - virtual VS::LightDirectionalShadowMode light_directional_get_shadow_mode(RID p_light) const; - virtual void light_directional_set_shadow_param(RID p_light,VS::LightDirectionalShadowParam p_param, float p_value); - virtual float light_directional_get_shadow_param(RID p_light,VS::LightDirectionalShadowParam p_param) const; - - virtual AABB light_get_aabb(RID p_poly) const; - - - virtual RID light_instance_create(RID p_light); - virtual void light_instance_set_transform(RID p_light_instance,const Transform& p_transform); - - virtual bool light_instance_has_shadow(RID p_light_instance) const; - virtual bool light_instance_assign_shadow(RID p_light_instance); - virtual ShadowType light_instance_get_shadow_type(RID p_light_instance) const; - virtual int light_instance_get_shadow_passes(RID p_light_instance) const; - virtual bool light_instance_get_pssm_shadow_overlap(RID p_light_instance) const; - virtual void light_instance_set_custom_transform(RID p_light_instance, int p_index, const CameraMatrix& p_camera, const Transform& p_transform, float p_split_near=0,float p_split_far=0); - virtual int light_instance_get_shadow_size(RID p_light_instance, int p_index=0) const { return 1; } - - virtual ShadowType light_instance_get_shadow_type(RID p_light_instance,bool p_far=false) const; - virtual void light_instance_set_shadow_transform(RID p_light_instance, int p_index, const CameraMatrix& p_camera, const Transform& p_transform, float p_split_near=0,float p_split_far=0); - - virtual void shadow_clear_near(); - virtual bool shadow_allocate_near(RID p_light); - virtual bool shadow_allocate_far(RID p_light); - - - /* PARTICLES INSTANCE */ - - virtual RID particles_instance_create(RID p_particles); - virtual void particles_instance_set_transform(RID p_particles_instance,const Transform& p_transform); - - /* VIEWPORT */ - - virtual RID viewport_data_create(); - - virtual RID render_target_create(); - virtual void render_target_set_size(RID p_render_target, int p_width, int p_height); - virtual RID render_target_get_texture(RID p_render_target) const; - virtual bool render_target_renedered_in_frame(RID p_render_target); - - /* RENDER API */ - /* all calls (inside begin/end shadow) are always warranted to be in the following order: */ - - virtual void begin_frame(); - - virtual void set_viewport(const VS::ViewportRect& p_viewport); - virtual void set_render_target(RID p_render_target,bool p_transparent_bg=false,bool p_vflip=false); - virtual void clear_viewport(const Color& p_color); - virtual void capture_viewport(Image* r_capture); - - - virtual void begin_scene(RID p_viewport_data,RID p_env,VS::ScenarioDebugMode p_debug); - virtual void begin_shadow_map( RID p_light_instance, int p_shadow_pass ); - - virtual void set_camera(const Transform& p_world,const CameraMatrix& p_projection,bool p_ortho_hint); - - virtual void add_light( RID p_light_instance ); ///< all "add_light" calls happen before add_geometry calls - - - virtual void add_mesh( const RID& p_mesh, const InstanceData *p_data); - virtual void add_multimesh( const RID& p_multimesh, const InstanceData *p_data); - virtual void add_immediate( const RID& p_immediate, const InstanceData *p_data) {} - virtual void add_particles( const RID& p_particle_instance, const InstanceData *p_data); - - virtual void end_scene(); - virtual void end_shadow_map(); - - virtual void end_frame(); - - /* CANVAS API */ - - virtual void begin_canvas_bg(); - virtual void canvas_begin(); - virtual void canvas_disable_blending(); - virtual void canvas_set_opacity(float p_opacity); - virtual void canvas_set_blend_mode(VS::MaterialBlendMode p_mode); - virtual void canvas_begin_rect(const Matrix32& p_transform); - virtual void canvas_set_clip(bool p_clip, const Rect2& p_rect); - virtual void canvas_end_rect(); - virtual void canvas_draw_line(const Point2& p_from, const Point2& p_to,const Color& p_color,float p_width,bool p_antialiased); - virtual void canvas_draw_rect(const Rect2& p_rect, int p_flags, const Rect2& p_source,RID p_texture,const Color& p_modulate); - virtual void canvas_draw_style_box(const Rect2& p_rect, const Rect2& p_src_region, RID p_texture,const float *p_margins, bool p_draw_center=true,const Color& p_modulate=Color(1,1,1)); - virtual void canvas_draw_primitive(const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture,float p_width); - virtual void canvas_draw_polygon(int p_vertex_count, const int* p_indices, const Vector2* p_vertices, const Vector2* p_uvs, const Color* p_colors,const RID& p_texture,bool p_singlecolor); - virtual void canvas_set_transform(const Matrix32& p_transform); - - virtual void canvas_render_items(CanvasItem *p_item_list,int p_z,const Color& p_modulate,CanvasLight *p_light); - - virtual RID canvas_light_occluder_create(); - virtual void canvas_light_occluder_set_polylines(RID p_occluder, const DVector& p_lines); - - virtual RID canvas_light_shadow_buffer_create(int p_width); - virtual void canvas_light_shadow_buffer_update(RID p_buffer, const Matrix32& p_light_xform, int p_light_mask,float p_near, float p_far, CanvasLightOccluderInstance* p_occluders, CameraMatrix *p_xform_cache); - - virtual void canvas_debug_viewport_shadows(CanvasLight* p_lights_with_shadow); - - /* ENVIRONMENT */ - - virtual RID environment_create(); - - virtual void environment_set_background(RID p_env,VS::EnvironmentBG p_bg); - virtual VS::EnvironmentBG environment_get_background(RID p_env) const; - - virtual void environment_set_background_param(RID p_env,VS::EnvironmentBGParam p_param, const Variant& p_value); - virtual Variant environment_get_background_param(RID p_env,VS::EnvironmentBGParam p_param) const; - - virtual void environment_set_enable_fx(RID p_env,VS::EnvironmentFx p_effect,bool p_enabled); - virtual bool environment_is_fx_enabled(RID p_env,VS::EnvironmentFx p_effect) const; - - virtual void environment_fx_set_param(RID p_env,VS::EnvironmentFxParam p_param,const Variant& p_value); - virtual Variant environment_fx_get_param(RID p_env,VS::EnvironmentFxParam p_param) const; - - /* SAMPLED LIGHT */ - virtual RID sampled_light_dp_create(int p_width,int p_height); - virtual void sampled_light_dp_update(RID p_sampled_light,const Color *p_data,float p_multiplier); - - - /*MISC*/ - - virtual bool is_texture(const RID& p_rid) const; - virtual bool is_material(const RID& p_rid) const; - virtual bool is_mesh(const RID& p_rid) const; - virtual bool is_immediate(const RID& p_rid) const; - virtual bool is_multimesh(const RID& p_rid) const; - virtual bool is_particles(const RID &p_beam) const; - - virtual bool is_light(const RID& p_rid) const; - virtual bool is_light_instance(const RID& p_rid) const; - virtual bool is_particles_instance(const RID& p_rid) const; - virtual bool is_skeleton(const RID& p_rid) const; - virtual bool is_environment(const RID& p_rid) const; - virtual bool is_canvas_light_occluder(const RID& p_rid) const; - - virtual bool is_shader(const RID& p_rid) const; - - virtual void free(const RID& p_rid); - - virtual void custom_shade_model_set_shader(int p_model, RID p_shader); - virtual RID custom_shade_model_get_shader(int p_model) const; - virtual void custom_shade_model_set_name(int p_model, const String& p_name); - virtual String custom_shade_model_get_name(int p_model) const; - virtual void custom_shade_model_set_param_info(int p_model, const List& p_info); - virtual void custom_shade_model_get_param_info(int p_model, List* p_info) const; - - - virtual void init(); - virtual void finish(); - - virtual int get_render_info(VS::RenderInfo p_info); - - virtual bool needs_to_draw_next_frame() const; - - virtual bool has_feature(VS::Features p_feature) const; - - virtual void restore_framebuffer(); - - RasterizerDummy(); - virtual ~RasterizerDummy(); -}; - - -#endif // RASTERIZER_DUMMY_H diff --git a/servers/visual/shader_language.cpp b/servers/visual/shader_language.cpp index fdf3cb622d2..287293a4379 100644 --- a/servers/visual/shader_language.cpp +++ b/servers/visual/shader_language.cpp @@ -46,7 +46,7 @@ static bool _is_hex(CharType c) { const char * ShaderLanguage::token_names[TK_MAX]={ "EMPTY", - "INDENTIFIER", + "IDENTIFIER", "TRUE", "FALSE", "REAL_CONSTANT", @@ -97,401 +97,402 @@ const char * ShaderLanguage::token_names[TK_MAX]={ "ERROR", }; -ShaderLanguage::Token ShaderLanguage::read_token(const CharType* p_text,int p_len,int &r_line,int &r_chars) { +String ShaderLanguage::get_token_text(Token p_token) { -#define GETCHAR(m_idx) ((m_idx': { - if (GETCHAR(1)=='=') { - r_chars++; - return Token(TK_OP_GREATER_EQUAL); - }/* else if (GETCHAR(1)=='<') { - r_chars++; - if (GETCHAR(2)=='=') { - r_chars++; - return Token(TK_OP_ASSIGN_SHIFT_RIGHT); - } - - return Token(TK_OP_SHIFT_RIGHT); - }*/ - - return Token(TK_OP_GREATER); - - } break; - case '!': { - if (GETCHAR(1)=='=') { - r_chars++; - return Token(TK_OP_NOT_EQUAL); - } - - return Token(TK_OP_NOT); - - } break; - //case '"' //string - no strings in shader - //case '\'' //string - no strings in shader - case '{': - return Token(TK_CURLY_BRACKET_OPEN); - case '}': - return Token(TK_CURLY_BRACKET_CLOSE); - //case '[': - // return Token(TK_BRACKET_OPEN); - //case ']': - // return Token(TK_BRACKET_CLOSE); - case '(': - return Token(TK_PARENTHESIS_OPEN); - case ')': - return Token(TK_PARENTHESIS_CLOSE); - case ',': - return Token(TK_COMMA); - case ';': - return Token(TK_SEMICOLON); - //case '?': - // return Token(TK_QUESTION_MARK); - //case ':': - // return Token(TK_COLON); //for methods maybe but now useless. - //case '^': - // return Token(TK_OP_BIT_XOR); - //case '~': - // return Token(TK_OP_BIT_INVERT); - case '&': { - - if (GETCHAR(1)=='&') { - - r_chars++; - return Token(TK_OP_AND); - } - - return Token(TK_ERROR,"Unknown character"); - -/* - if (GETCHAR(1)=='=') { - r_chars++; - return Token(TK_OP_ASSIGN_BIT_AND); - } else if (GETCHAR(1)=='&') { - r_chars++; - return Token(TK_OP_AND); - } - return TK_OP_BIT_AND;*/ - } break; - case '|': { - - if (GETCHAR(1)=='|') { - - r_chars++; - return Token(TK_OP_OR); - } - - return Token(TK_ERROR,"Unknown character"); - - /* - if (GETCHAR(1)=='=') { - r_chars++; - return Token(TK_OP_ASSIGN_BIT_OR); - } else if (GETCHAR(1)=='|') { - r_chars++; - return Token(TK_OP_OR); - } - return TK_OP_BIT_OR; - */ - } break; - case '*': { - - if (GETCHAR(1)=='=') { - r_chars++; - return Token(TK_OP_ASSIGN_MUL); - } - return TK_OP_MUL; - } break; - case '+': { - - if (GETCHAR(1)=='=') { - r_chars++; - return Token(TK_OP_ASSIGN_ADD); - } /*else if (GETCHAR(1)=='+') { - - r_chars++; - return Token(TK_OP_PLUS_PLUS); - }*/ - - return TK_OP_ADD; - } break; - case '-': { - - if (GETCHAR(1)=='=') { - r_chars++; - return Token(TK_OP_ASSIGN_SUB); - }/* else if (GETCHAR(1)=='-') { - - r_chars++; - return Token(TK_OP_MINUS_MINUS); - }*/ - - return TK_OP_SUB; - } break; - /*case '%': { - - if (GETCHAR(1)=='=') { - r_chars++; - return Token(TK_OP_ASSIGN_MOD); - } - - return TK_OP_MOD; - } break;*/ - default: { - - if (_is_number(GETCHAR(0)) || (GETCHAR(0)=='.' && _is_number(GETCHAR(1)))) { - // parse number - bool period_found=false; - bool exponent_found=false; - bool hexa_found=false; - bool sign_found=false; - - String str; - int i=0; - - while(true) { - if (GETCHAR(i)=='.') { - if (period_found || exponent_found) - return Token(TK_ERROR,"Invalid numeric constant"); - period_found=true; - } else if (GETCHAR(i)=='x') { - if (hexa_found || str.length()!=1 || str[0]!='0') - return Token(TK_ERROR,"Invalid numeric constant"); - hexa_found=true; - } else if (GETCHAR(i)=='e') { - if (hexa_found || exponent_found) - return Token(TK_ERROR,"Invalid numeric constant"); - exponent_found=true; - } else if (_is_number(GETCHAR(i))) { - //all ok - } else if (hexa_found && _is_hex(GETCHAR(i))) { - - } else if ((GETCHAR(i)=='-' || GETCHAR(i)=='+') && exponent_found) { - if (sign_found) - return Token(TK_ERROR,"Invalid numeric constant"); - sign_found=true; - } else - break; - - str+=CharType(GETCHAR(i)); - i++; - } - - if (!_is_number(str[str.length()-1])) - return Token(TK_ERROR,"Invalid numeric constant"); - - r_chars+=str.length()-1; - return Token(TK_REAL_CONSTANT,str); - /* - if (period_found) - return Token(TK_NUMBER_REAL,str); - else - return Token(TK_NUMBER_INTEGER,str);*/ - - } - - if (GETCHAR(0)=='.') { - //parse period - return Token(TK_PERIOD); - } - - if (_is_text_char(GETCHAR(0))) { - // parse identifier - String str; - str+=CharType(GETCHAR(0)); - - while(_is_text_char(GETCHAR(r_chars))) { - - str+=CharType(GETCHAR(r_chars)); - r_chars++; - } - - //see if keyword - struct _kws { TokenType token; const char *text;}; - static const _kws keyword_list[]={ - {TK_TRUE,"true"}, - {TK_FALSE,"false"}, - {TK_TYPE_VOID,"void"}, - {TK_TYPE_BOOL,"bool"}, - /*{TK_TYPE_INT,"int"}, - {TK_TYPE_INT2,"int2"}, - {TK_TYPE_INT3,"int3"}, - {TK_TYPE_INT4,"int4"},*/ - {TK_TYPE_FLOAT,"float"}, - /*{TK_TYPE_FLOAT2,"float2"}, - {TK_TYPE_FLOAT3,"float3"}, - {TK_TYPE_FLOAT4,"float4"},*/ - {TK_TYPE_VEC2,"vec2"}, - {TK_TYPE_VEC3,"vec3"}, - {TK_TYPE_VEC4,"vec4"}, - {TK_TYPE_TEXTURE,"texture"}, - {TK_TYPE_CUBEMAP,"cubemap"}, - {TK_TYPE_COLOR,"color"}, - - {TK_TYPE_MAT2,"mat2"}, - /*{TK_TYPE_MAT3,"mat3"}, - {TK_TYPE_MAT4,"mat3"},*/ - {TK_TYPE_MAT3,"mat3"}, - {TK_TYPE_MAT4,"mat4"}, - {TK_CF_IF,"if"}, - {TK_CF_ELSE,"else"}, - /* - {TK_CF_FOR,"for"}, - {TK_CF_WHILE,"while"}, - {TK_CF_DO,"do"}, - {TK_CF_SWITCH,"switch"}, - {TK_CF_BREAK,"break"}, - {TK_CF_CONTINUE,"continue"},*/ - {TK_CF_RETURN,"return"}, - {TK_UNIFORM,"uniform"}, - {TK_ERROR,NULL} - }; - - int idx=0; - - while(keyword_list[idx].text) { - - if (str==keyword_list[idx].text) - return Token(keyword_list[idx].token); - idx++; - } - - - return Token(TK_INDENTIFIER,str); - } - - if (GETCHAR(0)>32) - return Token(TK_ERROR,"Tokenizer: Unknown character #"+itos(GETCHAR(0))+": '"+String::chr(GETCHAR(0))+"'"); - else - return Token(TK_ERROR,"Tokenizer: Unknown character #"+itos(GETCHAR(0))); - - } break; + String name=token_names[p_token.type]; + if (p_token.type==TK_INT_CONSTANT || p_token.type==TK_REAL_CONSTANT) { + name+="("+rtos(p_token.constant)+")"; + } else if (p_token.type==TK_IDENTIFIER) { + name+="("+String(p_token.text)+")"; } + return name; +} + +ShaderLanguage::Token ShaderLanguage::_make_token(TokenType p_type,const StringName& p_text) { + + Token tk; + tk.type=p_type; + tk.text=p_text; + tk.line=tk_line; + return tk; +} + + +ShaderLanguage::Token ShaderLanguage::_get_token() { + +#define GETCHAR(m_idx) ((char_idx': { + if (GETCHAR(0)=='=') { + char_idx++; + return _make_token(TK_OP_GREATER_EQUAL); + } else if (GETCHAR(0)=='<') { + char_idx++;; + if (GETCHAR(0)=='=') { + char_idx++; + return _make_token(TK_OP_ASSIGN_SHIFT_RIGHT); + } + + return _make_token(TK_OP_SHIFT_RIGHT); + } + + return _make_token(TK_OP_GREATER); + + } break; + case '!': { + if (GETCHAR(0)=='=') { + char_idx++; + return _make_token(TK_OP_NOT_EQUAL); + } + + return _make_token(TK_OP_NOT); + + } break; + //case '"' //string - no strings in shader + //case '\'' //string - no strings in shader + case '{': + return _make_token(TK_CURLY_BRACKET_OPEN); + case '}': + return _make_token(TK_CURLY_BRACKET_CLOSE); + case '[': + return _make_token(TK_BRACKET_OPEN); + case ']': + return _make_token(TK_BRACKET_CLOSE); + case '(': + return _make_token(TK_PARENTHESIS_OPEN); + case ')': + return _make_token(TK_PARENTHESIS_CLOSE); + case ',': + return _make_token(TK_COMMA); + case ';': + return _make_token(TK_SEMICOLON); + case '?': + return _make_token(TK_QUESTION); + case ':': + return _make_token(TK_COLON); + case '^': + return _make_token(TK_OP_BIT_XOR); + case '~': + return _make_token(TK_OP_BIT_INVERT); + case '&': { + if (GETCHAR(0)=='=') { + char_idx++; + return _make_token(TK_OP_ASSIGN_BIT_AND); + } else if (GETCHAR(0)=='&') { + char_idx++; + return _make_token(TK_OP_AND); + } + return _make_token(TK_OP_BIT_AND); + } break; + case '|': { + + if (GETCHAR(0)=='=') { + char_idx++; + return _make_token(TK_OP_ASSIGN_BIT_OR); + } else if (GETCHAR(0)=='|') { + char_idx++; + return _make_token(TK_OP_OR); + } + return _make_token(TK_OP_BIT_OR); + + } break; + case '*': { + + if (GETCHAR(0)=='=') { + char_idx++; + return _make_token(TK_OP_ASSIGN_MUL); + } + return _make_token(TK_OP_MUL); + } break; + case '+': { + + if (GETCHAR(0)=='=') { + char_idx++; + return _make_token(TK_OP_ASSIGN_ADD); + } else if (GETCHAR(0)=='+') { + + char_idx++; + return _make_token(TK_OP_INCREMENT); + } + + return _make_token(TK_OP_ADD); + } break; + case '-': { + + if (GETCHAR(0)=='=') { + char_idx++; + return _make_token(TK_OP_ASSIGN_SUB); + }else if (GETCHAR(0)=='-') { + + char_idx++; + return _make_token(TK_OP_DECREMENT); + } + + return _make_token(TK_OP_SUB); + } break; + case '%': { + + if (GETCHAR(0)=='=') { + char_idx++; + return _make_token(TK_OP_ASSIGN_MOD); + } + + return _make_token(TK_OP_MOD); + } break; + default: { + + char_idx--; //go back one, since we have no idea what this is + + if (_is_number(GETCHAR(0)) || (GETCHAR(0)=='.' && _is_number(GETCHAR(1)))) { + // parse number + bool period_found=false; + bool exponent_found=false; + bool hexa_found=false; + bool sign_found=false; + bool minus_exponent_found=false; + + String str; + int i=0; + + while(true) { + if (GETCHAR(i)=='.') { + if (period_found || exponent_found) + return _make_token(TK_ERROR,"Invalid numeric constant"); + period_found=true; + } else if (GETCHAR(i)=='x') { + if (hexa_found || str.length()!=1 || str[0]!='0') + return _make_token(TK_ERROR,"Invalid numeric constant"); + hexa_found=true; + } else if (GETCHAR(i)=='e') { + if (hexa_found || exponent_found) + return _make_token(TK_ERROR,"Invalid numeric constant"); + exponent_found=true; + } else if (_is_number(GETCHAR(i))) { + //all ok + } else if (hexa_found && _is_hex(GETCHAR(i))) { + + } else if ((GETCHAR(i)=='-' || GETCHAR(i)=='+') && exponent_found) { + if (sign_found) + return _make_token(TK_ERROR,"Invalid numeric constant"); + sign_found=true; + if (GETCHAR(i)=='-') + minus_exponent_found=true; + } else + break; + + str+=CharType(GETCHAR(i)); + i++; + } + + if (!_is_number(str[str.length()-1])) + return _make_token(TK_ERROR,"Invalid numeric constant"); + + char_idx+=str.length()-1; + Token tk; + if (period_found || minus_exponent_found) + tk.type=TK_REAL_CONSTANT; + else + tk.type=TK_INT_CONSTANT; + + if (!str.is_valid_float()) { + return _make_token(TK_ERROR,"Invalid numeric constant"); + } + + tk.constant=str.to_double(); + tk.line=tk_line; + + return tk; + + } + + if (GETCHAR(0)=='.') { + //parse period + return _make_token(TK_PERIOD); + } + + if (_is_text_char(GETCHAR(0))) { + // parse identifier + String str; + str+=CharType(GETCHAR(0)); + + + while(_is_text_char(GETCHAR(1))) { + + str+=CharType(GETCHAR(1)); + char_idx++; + } + + //see if keyword + //should be converted to a static map + struct _kws { TokenType token; const char *text;}; + static const _kws keyword_list[]={ + {TK_TRUE,"true"}, + {TK_FALSE,"false"}, + {TK_TYPE_VOID,"void"}, + {TK_TYPE_BOOL,"bool"}, + {TK_TYPE_BVEC2,"bvec2"}, + {TK_TYPE_BVEC3,"bvec3"}, + {TK_TYPE_BVEC4,"bvec4"}, + {TK_TYPE_INT,"int"}, + {TK_TYPE_IVEC2,"ivec2"}, + {TK_TYPE_IVEC3,"ivec3"}, + {TK_TYPE_IVEC4,"ivec4"}, + {TK_TYPE_UINT,"uint"}, + {TK_TYPE_UVEC2,"uvec2"}, + {TK_TYPE_UVEC3,"uvec3"}, + {TK_TYPE_UVEC4,"uvec4"}, + {TK_TYPE_FLOAT,"float"}, + {TK_TYPE_VEC2,"vec2"}, + {TK_TYPE_VEC3,"vec3"}, + {TK_TYPE_VEC4,"vec4"}, + {TK_TYPE_MAT2,"mat2"}, + {TK_TYPE_MAT3,"mat3"}, + {TK_TYPE_MAT4,"mat4"}, + {TK_TYPE_SAMPLER2D,"sampler2D"}, + {TK_TYPE_ISAMPLER2D,"isampler2D"}, + {TK_TYPE_USAMPLER2D,"usampler2D"}, + {TK_TYPE_SAMPLERCUBE,"samplerCube"}, + {TK_PRECISION_LOW,"lowp"}, + {TK_PRECISION_MID,"mediump"}, + {TK_PRECISION_HIGH,"highp"}, + {TK_CF_IF,"if"}, + {TK_CF_ELSE,"else"}, + {TK_CF_FOR,"for"}, + {TK_CF_WHILE,"while"}, + {TK_CF_DO,"do"}, + {TK_CF_SWITCH,"switch"}, + {TK_CF_CASE,"case"}, + {TK_CF_BREAK,"break"}, + {TK_CF_CONTINUE,"continue"}, + {TK_CF_RETURN,"return"}, + {TK_UNIFORM,"uniform"}, + {TK_VARYING,"varying"}, + {TK_RENDER_MODE,"render_mode"}, + {TK_HINT_WHITE_TEXTURE,"hint_white"}, + {TK_HINT_BLACK_TEXTURE,"hint_black"}, + {TK_HINT_NORMAL_TEXTURE,"hint_normal"}, + {TK_HINT_RANGE,"hint_range"}, + + {TK_ERROR,NULL} + }; + + int idx=0; + + while(keyword_list[idx].text) { + + if (str==keyword_list[idx].text) { + + _make_token(keyword_list[idx].token); + } + idx++; + } + + + return _make_token(TK_IDENTIFIER,str); + } + + if (GETCHAR(0)>32) + return _make_token(TK_ERROR,"Tokenizer: Unknown character #"+itos(GETCHAR(0))+": '"+String::chr(GETCHAR(0))+"'"); + else + return _make_token(TK_ERROR,"Tokenizer: Unknown character #"+itos(GETCHAR(0))); + + } break; + } + } ERR_PRINT("BUG"); return Token(); } -Error ShaderLanguage::tokenize(const String& p_text,Vector *p_tokens,String *r_error,int *r_err_line,int *r_err_column) { - - - int len =p_text.length(); - int pos=0; - - int line=0; - - while(pospush_back(t); - //if (prev_line!=line) - // p_tokens->push_back(Token(TK_LINE,itos(line))); - - pos+=advance; - - } - - return OK; - -} String ShaderLanguage::lex_debug(const String& p_code) { +#if 0 Vector tokens; String error; int errline,errcol; @@ -503,47 +504,64 @@ String ShaderLanguage::lex_debug(const String& p_code) { } return ret; +#endif + return String(); } bool ShaderLanguage::is_token_datatype(TokenType p_type) { - return - (p_type==TK_TYPE_VOID) || - (p_type==TK_TYPE_BOOL) || - (p_type==TK_TYPE_FLOAT) || - (p_type==TK_TYPE_VEC2) || - (p_type==TK_TYPE_VEC3) || - (p_type==TK_TYPE_VEC4) || - (p_type==TK_TYPE_COLOR) || - (p_type==TK_TYPE_MAT2) || - (p_type==TK_TYPE_MAT3) || - (p_type==TK_TYPE_MAT4) || - (p_type==TK_TYPE_CUBEMAP) || - (p_type==TK_TYPE_TEXTURE); + return ( + p_type==TK_TYPE_VOID || + p_type==TK_TYPE_BOOL || + p_type==TK_TYPE_BVEC2 || + p_type==TK_TYPE_BVEC3 || + p_type==TK_TYPE_BVEC4 || + p_type==TK_TYPE_INT || + p_type==TK_TYPE_IVEC2 || + p_type==TK_TYPE_IVEC3 || + p_type==TK_TYPE_IVEC4 || + p_type==TK_TYPE_UINT || + p_type==TK_TYPE_UVEC2 || + p_type==TK_TYPE_UVEC3 || + p_type==TK_TYPE_UVEC4 || + p_type==TK_TYPE_FLOAT || + p_type==TK_TYPE_VEC2 || + p_type==TK_TYPE_VEC3 || + p_type==TK_TYPE_VEC4 || + p_type==TK_TYPE_MAT2 || + p_type==TK_TYPE_MAT3 || + p_type==TK_TYPE_MAT4 || + p_type==TK_TYPE_SAMPLER2D || + p_type==TK_TYPE_ISAMPLER2D || + p_type==TK_TYPE_USAMPLER2D || + p_type==TK_TYPE_SAMPLERCUBE ); } ShaderLanguage::DataType ShaderLanguage::get_token_datatype(TokenType p_type) { - switch(p_type) { + return DataType(p_type-TK_TYPE_VOID); +} - case TK_TYPE_VOID: return TYPE_VOID; - case TK_TYPE_BOOL: return TYPE_BOOL; - case TK_TYPE_FLOAT: return TYPE_FLOAT; - case TK_TYPE_VEC2: return TYPE_VEC2; - case TK_TYPE_VEC3: return TYPE_VEC3; - case TK_TYPE_VEC4: return TYPE_VEC4; - case TK_TYPE_COLOR: return TYPE_VEC4; - case TK_TYPE_MAT2: return TYPE_MAT2; - case TK_TYPE_MAT3: return TYPE_MAT3; - case TK_TYPE_MAT4: return TYPE_MAT4; - case TK_TYPE_TEXTURE: return TYPE_TEXTURE; - case TK_TYPE_CUBEMAP: return TYPE_CUBEMAP; - default: return TYPE_VOID; - } - return TYPE_VOID; +bool ShaderLanguage::is_token_precision(TokenType p_type) { + + return ( + p_type==TK_PRECISION_LOW || + p_type==TK_PRECISION_MID || + p_type==TK_PRECISION_HIGH ); + +} + +ShaderLanguage::DataPrecision ShaderLanguage::get_token_precision(TokenType p_type) { + + if (p_type==TK_PRECISION_LOW) + return PRECISION_LOWP; + else if (p_type==TK_PRECISION_HIGH) + return PRECISION_HIGHP; + else + return PRECISION_MEDIUMP; } @@ -553,16 +571,28 @@ String ShaderLanguage::get_datatype_name(DataType p_type) { case TYPE_VOID: return "void"; case TYPE_BOOL: return "bool"; + case TYPE_BVEC2: return "bvec2"; + case TYPE_BVEC3: return "bvec3"; + case TYPE_BVEC4: return "bvec4"; + case TYPE_INT: return "int"; + case TYPE_IVEC2: return "ivec2"; + case TYPE_IVEC3: return "ivec3"; + case TYPE_IVEC4: return "ivec4"; + case TYPE_UINT: return "uint"; + case TYPE_UVEC2: return "uvec2"; + case TYPE_UVEC3: return "uvec3"; + case TYPE_UVEC4: return "uvec4"; case TYPE_FLOAT: return "float"; case TYPE_VEC2: return "vec2"; case TYPE_VEC3: return "vec3"; - case TYPE_VEC4: return "vec4"; + case TYPE_VEC4: return "vec5"; case TYPE_MAT2: return "mat2"; case TYPE_MAT3: return "mat3"; case TYPE_MAT4: return "mat4"; - case TYPE_TEXTURE: return "texture"; - case TYPE_CUBEMAP: return "cubemap"; - default: return ""; + case TYPE_SAMPLER2D: return "sampler2D"; + case TYPE_ISAMPLER2D: return "isampler2D"; + case TYPE_USAMPLER2D: return "usampler2D"; + case TYPE_SAMPLERCUBE: return "samplerCube"; } return ""; @@ -571,211 +601,488 @@ String ShaderLanguage::get_datatype_name(DataType p_type) { bool ShaderLanguage::is_token_nonvoid_datatype(TokenType p_type) { - return - (p_type==TK_TYPE_BOOL) || - (p_type==TK_TYPE_FLOAT) || - (p_type==TK_TYPE_VEC2) || - (p_type==TK_TYPE_VEC3) || - (p_type==TK_TYPE_VEC4) || - (p_type==TK_TYPE_COLOR) || - (p_type==TK_TYPE_MAT2) || - (p_type==TK_TYPE_MAT3) || - (p_type==TK_TYPE_MAT4) || - (p_type==TK_TYPE_TEXTURE) || - (p_type==TK_TYPE_CUBEMAP); + return is_token_datatype(p_type) && p_type!=TK_TYPE_VOID; } -bool ShaderLanguage::parser_is_at_function(Parser& parser) { +void ShaderLanguage::clear() { + + tk_line=0; + char_idx=0; + error_set=false; + error_str=""; + while(nodes) { + Node *n = nodes; + nodes=nodes->next; + memdelete(n); + } - return (is_token_datatype(parser.get_token_type(0)) && parser.get_token_type(1)==TK_INDENTIFIER && parser.get_token_type(2)==TK_PARENTHESIS_OPEN); } +bool ShaderLanguage::_find_identifier(const BlockNode* p_block,const Map &p_builtin_types,const StringName& p_identifier, DataType *r_data_type, IdentifierType *r_type) { -bool ShaderLanguage::test_existing_identifier(Node *p_node,const StringName p_identifier,bool p_func,bool p_var,bool p_builtin) { - Node *node = p_node; + if (p_builtin_types.has(p_identifier)) { - while(node) { - - if (node->type==Node::TYPE_BLOCK) { - - BlockNode *block = (BlockNode*)node; - if (block->variables.has(p_identifier)) - return true; - } else if (node->type==Node::TYPE_PROGRAM) { - - ProgramNode *program = (ProgramNode*)node; - for(int i=0;ifunctions.size();i++) { - - if (program->functions[i].name==p_identifier) { - return true; - } - } - - if(program->builtin_variables.has(p_identifier)) { - return true; - } - if(program->uniforms.has(p_identifier)) { - return true; - } - - } else if (node->type==Node::TYPE_FUNCTION) { - - FunctionNode *func=(FunctionNode*)node; - for(int i=0;iarguments.size();i++) - if (func->arguments[i].name==p_identifier) - return true; + if (r_data_type) { + *r_data_type=p_builtin_types[p_identifier]; + } + if (r_type) { + *r_type=IDENTIFIER_BUILTIN_VAR; } - node=node->parent; + return true; } - // try keywords + FunctionNode *function=NULL; - int idx=0; + while(p_block) { - //todo optimize - while(intrinsic_func_defs[idx].name) { + if (p_block->variables.has(p_identifier)) { + if (r_data_type) { + *r_data_type=p_block->variables[p_identifier].type; + } + if (r_type) { + *r_type=IDENTIFIER_LOCAL_VAR; + } - if (p_identifier.operator String()==intrinsic_func_defs[idx].name) return true; - idx++; + } + + if (p_block->parent_function) { + function=p_block->parent_function; + break; + } else { + ERR_FAIL_COND_V(!p_block->parent_block,false); + p_block=p_block->parent_block; + } } + if (function) { + for(int i=0;iarguments.size();i++) { + if (function->arguments[i].name==p_identifier) { + if (r_data_type) { + *r_data_type=function->arguments[i].type; + } + if (r_type) { + *r_type=IDENTIFIER_FUNCTION_ARGUMENT; + } + + return true; + } + } + } + + + if (shader->varyings.has(p_identifier)) { + if (r_data_type) { + *r_data_type=shader->varyings[p_identifier].type; + } + if (r_type) { + *r_type=IDENTIFIER_VARYING; + } + return true; + } + + if (shader->uniforms.has(p_identifier)) { + if (r_data_type) { + *r_data_type=shader->uniforms[p_identifier].type; + } + if (r_type) { + *r_type=IDENTIFIER_UNIFORM; + } + return true; + } + + for(int i=0;ifunctions.size();i++) { + + if (!shader->functions[i].callable) + continue; + + if (shader->functions[i].name==p_identifier) { + if (r_data_type) { + *r_data_type=shader->functions[i].function->return_type; + } + if (r_type) { + *r_type=IDENTIFIER_FUNCTION; + } + } + } return false; } -Error ShaderLanguage::parse_function(Parser& parser,BlockNode *p_block) { +bool ShaderLanguage::_validate_operator(OperatorNode *p_op,DataType *r_ret_type) { - if (!p_block->parent || p_block->parent->type!=Node::TYPE_PROGRAM) { - parser.set_error("Misplaced function"); - return ERR_PARSE_ERROR; + bool valid=false; + DataType ret_type; - } + switch(p_op->op) { + case OP_EQUAL: + case OP_NOT_EQUAL: { + DataType na = p_op->arguments[0]->get_datatype(); + DataType nb = p_op->arguments[1]->get_datatype(); + valid=na==nb; + ret_type=TYPE_BOOL; + } break; + case OP_LESS: + case OP_LESS_EQUAL: + case OP_GREATER: + case OP_GREATER_EQUAL: { + DataType na = p_op->arguments[0]->get_datatype(); + DataType nb = p_op->arguments[1]->get_datatype(); + valid = na==nb && (na==TYPE_UINT || na==TYPE_INT || na==TYPE_FLOAT); + ret_type=TYPE_BOOL; - ProgramNode *program = (ProgramNode*)p_block->parent; + } break; + case OP_AND: + case OP_OR: { + DataType na = p_op->arguments[0]->get_datatype(); + DataType nb = p_op->arguments[1]->get_datatype(); - StringName name = parser.get_token(1).text; + valid = na==nb && na==TYPE_BOOL; + ret_type=TYPE_BOOL; - if (test_existing_identifier(p_block,name)) { + } break; + case OP_NOT: { - parser.set_error("Duplicate Identifier (existing variable/builtin/function): "+name); - return ERR_PARSE_ERROR; + DataType na = p_op->arguments[0]->get_datatype(); + valid = na==TYPE_BOOL; + ret_type=TYPE_BOOL; - } + } break; + case OP_INCREMENT: + case OP_DECREMENT: + case OP_POST_INCREMENT: + case OP_POST_DECREMENT: + case OP_NEGATE: { + DataType na = p_op->arguments[0]->get_datatype(); + valid = na>TYPE_BOOL && naarguments[0]->get_datatype(); + DataType nb = p_op->arguments[1]->get_datatype(); - - FunctionNode *function = parser.create_node(program); - function->body = parser.create_node(function); - - function->name=name; - - function->return_type=get_token_datatype(parser.get_token_type(0)); - - { //add to programnode - ProgramNode::Function f; - f.name=name; - f.function=function; - program->functions.push_back(f); - } - - int ofs=3; - - while(true) { - - //end of arguments - if (parser.get_token_type(ofs)==TK_PARENTHESIS_CLOSE) { - ofs++; - break; - } - //next argument awaits - if (parser.get_token_type(ofs)==TK_COMMA) { - if (!is_token_nonvoid_datatype(parser.get_token_type(ofs+1))) { - parser.set_error("Expected Identifier or ')' following ','"); - return ERR_PARSE_ERROR; + if (na>nb) { + //make things easier; + SWAP(na,nb); } - ofs++; - continue; - } - - - if (!is_token_nonvoid_datatype(parser.get_token_type(ofs+0))) { - parser.set_error("Invalid Argument Type"); - return ERR_PARSE_ERROR; - } - - DataType identtype=get_token_datatype(parser.get_token_type(ofs+0)); - - if (parser.get_token_type(ofs+1)!=TK_INDENTIFIER) { - parser.set_error("Expected Argument Identifier"); - return ERR_PARSE_ERROR; - } - - StringName identname=parser.get_token(ofs+1).text; - - if (test_existing_identifier(function,identname)) { - parser.set_error("Duplicate Argument Identifier: "+identname); - return ERR_DUPLICATE_SYMBOL; - } - - FunctionNode::Argument arg; - arg.name=identname; - arg.type=identtype; - //function->body->variables[arg.name]=arg.type; - function->arguments.push_back(arg); - - ofs+=2; - } - - parser.advance(ofs); - // match { - if (parser.get_token_type()!=TK_CURLY_BRACKET_OPEN) { - parser.set_error("Expected '{'"); - return ERR_PARSE_ERROR; - } - - parser.advance(); - Error err = parse_block(parser,function->body); - - if (err) - return err; - - // make sure that if the function has a return type, it does return something.. - if (function->return_type!=TYPE_VOID) { - bool found=false; - for(int i=0;ibody->statements.size();i++) { - if (function->body->statements[i]->type==Node::TYPE_CONTROL_FLOW) { - - ControlFlowNode *cf = (ControlFlowNode*)function->body->statements[i]; - if (cf->flow_op==FLOW_OP_RETURN) { - // type of return was already checked when inserted - // no need to check here - found=true; - } + if (na==nb) { + valid = (na>TYPE_BOOL && naop==OP_MUL && na>=TYPE_MAT2 && na<=TYPE_MAT4); + ret_type=na; + } else if (na==TYPE_INT && nb==TYPE_IVEC2) { + valid=true; + ret_type=TYPE_IVEC2; + } else if (na==TYPE_INT && nb==TYPE_IVEC3) { + valid=true; + ret_type=TYPE_IVEC3; + } else if (na==TYPE_INT && nb==TYPE_IVEC4) { + valid=true; + ret_type=TYPE_IVEC4; + } else if (na==TYPE_UINT && nb==TYPE_UVEC2) { + valid=true; + ret_type=TYPE_UVEC2; + } else if (na==TYPE_UINT && nb==TYPE_UVEC3) { + valid=true; + ret_type=TYPE_UVEC3; + } else if (na==TYPE_UINT && nb==TYPE_UVEC4) { + valid=true; + ret_type=TYPE_UVEC4; + } else if (na==TYPE_FLOAT && nb==TYPE_VEC2) { + valid=true; + ret_type=TYPE_VEC2; + } else if (na==TYPE_FLOAT && nb==TYPE_VEC3) { + valid=true; + ret_type=TYPE_VEC3; + } else if (na==TYPE_FLOAT && nb==TYPE_VEC4) { + valid=true; + ret_type=TYPE_VEC4; + } else if (p_op->op==OP_MUL && na==TYPE_VEC2 && nb==TYPE_MAT2) { + valid=true; + ret_type=TYPE_MAT2; + } else if (p_op->op==OP_MUL && na==TYPE_VEC3 && nb==TYPE_MAT3) { + valid=true; + ret_type=TYPE_MAT3; + } else if (p_op->op==OP_MUL && na==TYPE_VEC4 && nb==TYPE_MAT4) { + valid=true; + ret_type=TYPE_MAT4; } - } + } break; + case OP_ASSIGN_MOD: + case OP_MOD: { + /* + * The operator modulus (%) operates on signed or unsigned integers or integer vectors. The operand + * types must both be signed or both be unsigned. The operands cannot be vectors of differing size. If + * one operand is a scalar and the other vector, then the scalar is applied component-wise to the vector, + * resulting in the same type as the vector. If both are vectors of the same size, the result is computed + * component-wise. + */ - if (!found) { - parser.set_error("Function must return a value (use the main block)"); - return ERR_PARSE_ERROR; + DataType na = p_op->arguments[0]->get_datatype(); + DataType nb = p_op->arguments[1]->get_datatype(); + + + if (na==TYPE_INT && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_INT; + } else if (na==TYPE_IVEC2 && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_IVEC2; + } else if (na==TYPE_IVEC3 && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_IVEC3; + } else if (na==TYPE_IVEC4 && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_IVEC4; + } else if (na==TYPE_IVEC2 && nb==TYPE_IVEC2) { + valid=true; + ret_type=TYPE_IVEC2; + } else if (na==TYPE_IVEC3 && nb==TYPE_IVEC3) { + valid=true; + ret_type=TYPE_IVEC3; + } else if (na==TYPE_IVEC4 && nb==TYPE_IVEC4) { + valid=true; + ret_type=TYPE_IVEC4; + ///// + } else if (na==TYPE_UINT && nb==TYPE_UINT) { + valid=true; + ret_type=TYPE_UINT; + } else if (na==TYPE_UVEC2 && nb==TYPE_UINT) { + valid=true; + ret_type=TYPE_UVEC2; + } else if (na==TYPE_UVEC3 && nb==TYPE_UINT) { + valid=true; + ret_type=TYPE_UVEC3; + } else if (na==TYPE_UVEC4 && nb==TYPE_UINT) { + valid=true; + ret_type=TYPE_UVEC4; + } else if (na==TYPE_UVEC2 && nb==TYPE_UVEC2) { + valid=true; + ret_type=TYPE_UVEC2; + } else if (na==TYPE_UVEC3 && nb==TYPE_UVEC3) { + valid=true; + ret_type=TYPE_UVEC3; + } else if (na==TYPE_UVEC4 && nb==TYPE_UVEC4) { + valid=true; + ret_type=TYPE_UVEC4; + } + } break; + case OP_ASSIGN_SHIFT_LEFT: + case OP_ASSIGN_SHIFT_RIGHT: + case OP_SHIFT_LEFT: + case OP_SHIFT_RIGHT: { + + DataType na = p_op->arguments[0]->get_datatype(); + DataType nb = p_op->arguments[1]->get_datatype(); + + if (na>=TYPE_UINT && na<=TYPE_UVEC4) { + na=DataType(na-4); + } + + if (nb>=TYPE_UINT && nb<=TYPE_UVEC4) { + nb=DataType(nb-4); + } + + if (na==TYPE_INT && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_INT; + } else if (na==TYPE_IVEC2 && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_IVEC2; + } else if (na==TYPE_IVEC3 && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_IVEC3; + } else if (na==TYPE_IVEC4 && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_IVEC4; + } else if (na==TYPE_IVEC2 && nb==TYPE_IVEC2) { + valid=true; + ret_type=TYPE_IVEC2; + } else if (na==TYPE_IVEC3 && nb==TYPE_IVEC3) { + valid=true; + ret_type=TYPE_IVEC3; + } else if (na==TYPE_IVEC4 && nb==TYPE_IVEC4) { + valid=true; + ret_type=TYPE_IVEC4; + } + } break; + case OP_ASSIGN: { + DataType na = p_op->arguments[0]->get_datatype(); + DataType nb = p_op->arguments[1]->get_datatype(); + valid=na==nb; + ret_type=na; + } break; + case OP_ASSIGN_ADD: + case OP_ASSIGN_SUB: + case OP_ASSIGN_MUL: + case OP_ASSIGN_DIV: { + + DataType na = p_op->arguments[0]->get_datatype(); + DataType nb = p_op->arguments[1]->get_datatype(); + + + if (na==nb) { + valid = (na>TYPE_BOOL && naop==OP_ASSIGN_MUL && na>=TYPE_MAT2 && na<=TYPE_MAT4); + ret_type=na; + } else if (na==TYPE_IVEC2 && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_IVEC2; + } else if (na==TYPE_IVEC3 && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_IVEC3; + } else if (na==TYPE_IVEC4 && nb==TYPE_INT ) { + valid=true; + ret_type=TYPE_IVEC4; + } else if (na==TYPE_UVEC2 && nb==TYPE_UINT) { + valid=true; + ret_type=TYPE_UVEC2; + } else if (na==TYPE_UVEC3 && nb==TYPE_UINT) { + valid=true; + ret_type=TYPE_UVEC3; + } else if (na==TYPE_UVEC4 && nb==TYPE_UINT) { + valid=true; + ret_type=TYPE_UVEC4; + } else if (na==TYPE_VEC2 && nb==TYPE_FLOAT ) { + valid=true; + ret_type=TYPE_VEC2; + } else if (na==TYPE_VEC3 && nb==TYPE_FLOAT) { + valid=true; + ret_type=TYPE_VEC3; + } else if (na==TYPE_VEC4 && nb==TYPE_FLOAT) { + valid=true; + ret_type=TYPE_VEC4; + } else if (p_op->op==OP_ASSIGN_MUL && na==TYPE_MAT2 && nb==TYPE_VEC2) { + valid=true; + ret_type=TYPE_MAT2; + } else if (p_op->op==OP_ASSIGN_MUL && na==TYPE_MAT3 && nb==TYPE_VEC3) { + valid=true; + ret_type=TYPE_MAT3; + } else if (p_op->op==OP_ASSIGN_MUL && na==TYPE_MAT4 && nb==TYPE_VEC4) { + valid=true; + ret_type=TYPE_MAT4; + } + } break; + case OP_ASSIGN_BIT_AND: + case OP_ASSIGN_BIT_OR: + case OP_ASSIGN_BIT_XOR: + case OP_BIT_AND: + case OP_BIT_OR: + case OP_BIT_XOR: { + + /* + * The bitwise operators and (&), exclusive-or (^), and inclusive-or (|). The operands must be of type + * signed or unsigned integers or integer vectors. The operands cannot be vectors of differing size. If + * one operand is a scalar and the other a vector, the scalar is applied component-wise to the vector, + * resulting in the same type as the vector. The fundamental types of the operands (signed or unsigned) + * must match. + */ + + DataType na = p_op->arguments[0]->get_datatype(); + DataType nb = p_op->arguments[1]->get_datatype(); + + if (na>nb && p_op->op>=OP_BIT_AND) { + //can swap for non assign + SWAP(na,nb); + } + + if (na==TYPE_INT && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_INT; + } else if (na==TYPE_IVEC2 && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_IVEC2; + } else if (na==TYPE_IVEC3 && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_IVEC3; + } else if (na==TYPE_IVEC4 && nb==TYPE_INT) { + valid=true; + ret_type=TYPE_IVEC4; + } else if (na==TYPE_IVEC2 && nb==TYPE_IVEC2) { + valid=true; + ret_type=TYPE_IVEC2; + } else if (na==TYPE_IVEC3 && nb==TYPE_IVEC3) { + valid=true; + ret_type=TYPE_IVEC3; + } else if (na==TYPE_IVEC4 && nb==TYPE_IVEC4) { + valid=true; + ret_type=TYPE_IVEC4; + ///// + } else if (na==TYPE_UINT && nb==TYPE_UINT) { + valid=true; + ret_type=TYPE_UINT; + } else if (na==TYPE_UVEC2 && nb==TYPE_UINT) { + valid=true; + ret_type=TYPE_UVEC2; + } else if (na==TYPE_UVEC3 && nb==TYPE_UINT) { + valid=true; + ret_type=TYPE_UVEC3; + } else if (na==TYPE_UVEC4 && nb==TYPE_UINT) { + valid=true; + ret_type=TYPE_UVEC4; + } else if (na==TYPE_UVEC2 && nb==TYPE_UVEC2) { + valid=true; + ret_type=TYPE_UVEC2; + } else if (na==TYPE_UVEC3 && nb==TYPE_UVEC3) { + valid=true; + ret_type=TYPE_UVEC3; + } else if (na==TYPE_UVEC4 && nb==TYPE_UVEC4) { + valid=true; + ret_type=TYPE_UVEC4; + } + } break; + case OP_BIT_INVERT: { //unaries + DataType na = p_op->arguments[0]->get_datatype(); + valid = na>=TYPE_INT && naarguments[0]->get_datatype(); + DataType nb = p_op->arguments[1]->get_datatype(); + DataType nc = p_op->arguments[2]->get_datatype(); + + valid = na==TYPE_BOOL && (nb==nc); + ret_type=nb; + } break; + default: { + ERR_FAIL_V(false); } } - return OK; + if (r_ret_type) + *r_ret_type=ret_type; + return valid; + } - const ShaderLanguage::IntrinsicFuncDef ShaderLanguage::intrinsic_func_defs[]={ //constructors {"bool",TYPE_BOOL,{TYPE_BOOL,TYPE_VOID}}, + {"bvec2",TYPE_BVEC2,{TYPE_BOOL,TYPE_VOID}}, + {"bvec2",TYPE_BVEC2,{TYPE_BOOL,TYPE_BOOL,TYPE_VOID}}, + {"bvec3",TYPE_BVEC3,{TYPE_BOOL,TYPE_VOID}}, + {"bvec3",TYPE_BVEC3,{TYPE_BOOL,TYPE_BOOL,TYPE_BOOL,TYPE_VOID}}, + {"bvec3",TYPE_BVEC3,{TYPE_BVEC2,TYPE_BOOL,TYPE_VOID}}, + {"bvec3",TYPE_BVEC3,{TYPE_BOOL,TYPE_BVEC2,TYPE_VOID}}, + {"bvec4",TYPE_BVEC4,{TYPE_BOOL,TYPE_VOID}}, + {"bvec4",TYPE_BVEC4,{TYPE_BOOL,TYPE_BOOL,TYPE_BOOL,TYPE_BOOL,TYPE_VOID}}, + {"bvec4",TYPE_BVEC4,{TYPE_BOOL,TYPE_BVEC2,TYPE_BOOL,TYPE_VOID}}, + {"bvec4",TYPE_BVEC4,{TYPE_BVEC2,TYPE_BOOL,TYPE_BOOL,TYPE_VOID}}, + {"bvec4",TYPE_BVEC4,{TYPE_BOOL,TYPE_BOOL,TYPE_BVEC2,TYPE_VOID}}, + {"bvec4",TYPE_BVEC4,{TYPE_BOOL,TYPE_BVEC3,TYPE_VOID}}, + {"bvec4",TYPE_BVEC4,{TYPE_BVEC3,TYPE_BOOL,TYPE_VOID}}, + {"bvec4",TYPE_BVEC4,{TYPE_BVEC2,TYPE_BVEC2,TYPE_VOID}}, + + {"float",TYPE_FLOAT,{TYPE_FLOAT,TYPE_VOID}}, {"vec2",TYPE_VEC2,{TYPE_FLOAT,TYPE_VOID}}, {"vec2",TYPE_VEC2,{TYPE_FLOAT,TYPE_FLOAT,TYPE_VOID}}, @@ -791,9 +1098,131 @@ const ShaderLanguage::IntrinsicFuncDef ShaderLanguage::intrinsic_func_defs[]={ {"vec4",TYPE_VEC4,{TYPE_FLOAT,TYPE_VEC3,TYPE_VOID}}, {"vec4",TYPE_VEC4,{TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, {"vec4",TYPE_VEC4,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + + {"int",TYPE_INT,{TYPE_INT,TYPE_VOID}}, + {"ivec2",TYPE_IVEC2,{TYPE_INT,TYPE_VOID}}, + {"ivec2",TYPE_IVEC2,{TYPE_INT,TYPE_INT,TYPE_VOID}}, + {"ivec3",TYPE_IVEC3,{TYPE_INT,TYPE_VOID}}, + {"ivec3",TYPE_IVEC3,{TYPE_INT,TYPE_INT,TYPE_INT,TYPE_VOID}}, + {"ivec3",TYPE_IVEC3,{TYPE_IVEC2,TYPE_INT,TYPE_VOID}}, + {"ivec3",TYPE_IVEC3,{TYPE_INT,TYPE_IVEC2,TYPE_VOID}}, + {"ivec4",TYPE_IVEC4,{TYPE_INT,TYPE_VOID}}, + {"ivec4",TYPE_IVEC4,{TYPE_INT,TYPE_INT,TYPE_INT,TYPE_INT,TYPE_VOID}}, + {"ivec4",TYPE_IVEC4,{TYPE_INT,TYPE_IVEC2,TYPE_INT,TYPE_VOID}}, + {"ivec4",TYPE_IVEC4,{TYPE_IVEC2,TYPE_INT,TYPE_INT,TYPE_VOID}}, + {"ivec4",TYPE_IVEC4,{TYPE_INT,TYPE_INT,TYPE_IVEC2,TYPE_VOID}}, + {"ivec4",TYPE_IVEC4,{TYPE_INT,TYPE_IVEC3,TYPE_VOID}}, + {"ivec4",TYPE_IVEC4,{TYPE_IVEC3,TYPE_INT,TYPE_VOID}}, + {"ivec4",TYPE_IVEC4,{TYPE_IVEC2,TYPE_IVEC2,TYPE_VOID}}, + + {"uint",TYPE_UINT,{TYPE_UINT,TYPE_VOID}}, + {"uvec2",TYPE_UVEC2,{TYPE_UINT,TYPE_VOID}}, + {"uvec2",TYPE_UVEC2,{TYPE_UINT,TYPE_UINT,TYPE_VOID}}, + {"uvec3",TYPE_UVEC3,{TYPE_UINT,TYPE_VOID}}, + {"uvec3",TYPE_UVEC3,{TYPE_UINT,TYPE_UINT,TYPE_UINT,TYPE_VOID}}, + {"uvec3",TYPE_UVEC3,{TYPE_UVEC2,TYPE_UINT,TYPE_VOID}}, + {"uvec3",TYPE_UVEC3,{TYPE_UINT,TYPE_UVEC2,TYPE_VOID}}, + {"uvec4",TYPE_UVEC4,{TYPE_UINT,TYPE_VOID}}, + {"uvec4",TYPE_UVEC4,{TYPE_UINT,TYPE_UINT,TYPE_UINT,TYPE_UINT,TYPE_VOID}}, + {"uvec4",TYPE_UVEC4,{TYPE_UINT,TYPE_UVEC2,TYPE_UINT,TYPE_VOID}}, + {"uvec4",TYPE_UVEC4,{TYPE_UVEC2,TYPE_UINT,TYPE_UINT,TYPE_VOID}}, + {"uvec4",TYPE_UVEC4,{TYPE_UINT,TYPE_UINT,TYPE_UVEC2,TYPE_VOID}}, + {"uvec4",TYPE_UVEC4,{TYPE_UINT,TYPE_UVEC3,TYPE_VOID}}, + {"uvec4",TYPE_UVEC4,{TYPE_UVEC3,TYPE_UINT,TYPE_VOID}}, + {"uvec4",TYPE_UVEC4,{TYPE_UVEC2,TYPE_UVEC2,TYPE_VOID}}, + {"mat2",TYPE_MAT2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, {"mat3",TYPE_MAT3,{TYPE_VEC3,TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, {"mat4",TYPE_MAT4,{TYPE_VEC4,TYPE_VEC4,TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + + {"mat2",TYPE_MAT2,{TYPE_FLOAT,TYPE_VOID}}, + {"mat3",TYPE_MAT3,{TYPE_FLOAT,TYPE_VOID}}, + {"mat4",TYPE_MAT4,{TYPE_FLOAT,TYPE_VOID}}, + + //conversion scalars + + {"int",TYPE_INT,{TYPE_BOOL,TYPE_VOID}}, + {"int",TYPE_INT,{TYPE_INT,TYPE_VOID}}, + {"int",TYPE_INT,{TYPE_UINT,TYPE_VOID}}, + {"int",TYPE_INT,{TYPE_FLOAT,TYPE_VOID}}, + + {"float",TYPE_FLOAT,{TYPE_BOOL,TYPE_VOID}}, + {"float",TYPE_FLOAT,{TYPE_INT,TYPE_VOID}}, + {"float",TYPE_FLOAT,{TYPE_UINT,TYPE_VOID}}, + {"float",TYPE_FLOAT,{TYPE_FLOAT,TYPE_VOID}}, + + {"uint",TYPE_UINT,{TYPE_BOOL,TYPE_VOID}}, + {"uint",TYPE_UINT,{TYPE_INT,TYPE_VOID}}, + {"uint",TYPE_UINT,{TYPE_UINT,TYPE_VOID}}, + {"uint",TYPE_UINT,{TYPE_FLOAT,TYPE_VOID}}, + + {"bool",TYPE_BOOL,{TYPE_BOOL,TYPE_VOID}}, + {"bool",TYPE_BOOL,{TYPE_INT,TYPE_VOID}}, + {"bool",TYPE_BOOL,{TYPE_UINT,TYPE_VOID}}, + {"bool",TYPE_BOOL,{TYPE_FLOAT,TYPE_VOID}}, + + //conversion vectors + + {"ivec2",TYPE_IVEC2,{TYPE_BVEC2,TYPE_VOID}}, + {"ivec2",TYPE_IVEC2,{TYPE_IVEC2,TYPE_VOID}}, + {"ivec2",TYPE_IVEC2,{TYPE_UVEC2,TYPE_VOID}}, + {"ivec2",TYPE_IVEC2,{TYPE_VEC2,TYPE_VOID}}, + + {"vec2",TYPE_VEC2,{TYPE_BVEC2,TYPE_VOID}}, + {"vec2",TYPE_VEC2,{TYPE_IVEC2,TYPE_VOID}}, + {"vec2",TYPE_VEC2,{TYPE_UVEC2,TYPE_VOID}}, + {"vec2",TYPE_VEC2,{TYPE_VEC2,TYPE_VOID}}, + + {"uvec2",TYPE_UVEC2,{TYPE_BVEC2,TYPE_VOID}}, + {"uvec2",TYPE_UVEC2,{TYPE_IVEC2,TYPE_VOID}}, + {"uvec2",TYPE_UVEC2,{TYPE_UVEC2,TYPE_VOID}}, + {"uvec2",TYPE_UVEC2,{TYPE_VEC2,TYPE_VOID}}, + + {"bvec2",TYPE_BVEC2,{TYPE_BVEC2,TYPE_VOID}}, + {"bvec2",TYPE_BVEC2,{TYPE_IVEC2,TYPE_VOID}}, + {"bvec2",TYPE_BVEC2,{TYPE_UVEC2,TYPE_VOID}}, + {"bvec2",TYPE_BVEC2,{TYPE_VEC2,TYPE_VOID}}, + + {"ivec3",TYPE_IVEC3,{TYPE_BVEC3,TYPE_VOID}}, + {"ivec3",TYPE_IVEC3,{TYPE_IVEC3,TYPE_VOID}}, + {"ivec3",TYPE_IVEC3,{TYPE_UVEC3,TYPE_VOID}}, + {"ivec3",TYPE_IVEC3,{TYPE_VEC3,TYPE_VOID}}, + + {"vec3",TYPE_VEC3,{TYPE_BVEC3,TYPE_VOID}}, + {"vec3",TYPE_VEC3,{TYPE_IVEC3,TYPE_VOID}}, + {"vec3",TYPE_VEC3,{TYPE_UVEC3,TYPE_VOID}}, + {"vec3",TYPE_VEC3,{TYPE_VEC3,TYPE_VOID}}, + + {"uvec3",TYPE_UVEC3,{TYPE_BVEC3,TYPE_VOID}}, + {"uvec3",TYPE_UVEC3,{TYPE_IVEC3,TYPE_VOID}}, + {"uvec3",TYPE_UVEC3,{TYPE_UVEC3,TYPE_VOID}}, + {"uvec3",TYPE_UVEC3,{TYPE_VEC3,TYPE_VOID}}, + + {"bvec3",TYPE_BVEC3,{TYPE_BVEC3,TYPE_VOID}}, + {"bvec3",TYPE_BVEC3,{TYPE_IVEC3,TYPE_VOID}}, + {"bvec3",TYPE_BVEC3,{TYPE_UVEC3,TYPE_VOID}}, + {"bvec3",TYPE_BVEC3,{TYPE_VEC3,TYPE_VOID}}, + + {"ivec4",TYPE_IVEC4,{TYPE_BVEC4,TYPE_VOID}}, + {"ivec4",TYPE_IVEC4,{TYPE_IVEC4,TYPE_VOID}}, + {"ivec4",TYPE_IVEC4,{TYPE_UVEC4,TYPE_VOID}}, + {"ivec4",TYPE_IVEC4,{TYPE_VEC4,TYPE_VOID}}, + + {"vec4",TYPE_VEC4,{TYPE_BVEC4,TYPE_VOID}}, + {"vec4",TYPE_VEC4,{TYPE_IVEC4,TYPE_VOID}}, + {"vec4",TYPE_VEC4,{TYPE_UVEC4,TYPE_VOID}}, + {"vec4",TYPE_VEC4,{TYPE_VEC4,TYPE_VOID}}, + + {"uvec4",TYPE_UVEC4,{TYPE_BVEC4,TYPE_VOID}}, + {"uvec4",TYPE_UVEC4,{TYPE_IVEC4,TYPE_VOID}}, + {"uvec4",TYPE_UVEC4,{TYPE_UVEC4,TYPE_VOID}}, + {"uvec4",TYPE_UVEC4,{TYPE_VEC4,TYPE_VOID}}, + + {"bvec4",TYPE_BVEC4,{TYPE_BVEC4,TYPE_VOID}}, + {"bvec4",TYPE_BVEC4,{TYPE_IVEC4,TYPE_VOID}}, + {"bvec4",TYPE_BVEC4,{TYPE_UVEC4,TYPE_VOID}}, + {"bvec4",TYPE_BVEC4,{TYPE_VEC4,TYPE_VOID}}, + //intrinsics - trigonometry {"sin",TYPE_FLOAT,{TYPE_FLOAT,TYPE_VOID}}, {"cos",TYPE_FLOAT,{TYPE_FLOAT,TYPE_VOID}}, @@ -830,10 +1259,29 @@ const ShaderLanguage::IntrinsicFuncDef ShaderLanguage::intrinsic_func_defs[]={ {"abs",TYPE_VEC2,{TYPE_VEC2,TYPE_VOID}}, {"abs",TYPE_VEC3,{TYPE_VEC3,TYPE_VOID}}, {"abs",TYPE_VEC4,{TYPE_VEC4,TYPE_VOID}}, + + {"abs",TYPE_INT,{TYPE_INT,TYPE_VOID}}, + {"abs",TYPE_IVEC2,{TYPE_IVEC2,TYPE_VOID}}, + {"abs",TYPE_IVEC3,{TYPE_IVEC3,TYPE_VOID}}, + {"abs",TYPE_IVEC4,{TYPE_IVEC4,TYPE_VOID}}, + + {"abs",TYPE_UINT,{TYPE_UINT,TYPE_VOID}}, + {"abs",TYPE_UVEC2,{TYPE_UVEC2,TYPE_VOID}}, + {"abs",TYPE_UVEC3,{TYPE_UVEC3,TYPE_VOID}}, + {"abs",TYPE_UVEC4,{TYPE_UVEC4,TYPE_VOID}}, + + {"sign",TYPE_FLOAT,{TYPE_FLOAT,TYPE_VOID}}, {"sign",TYPE_VEC2,{TYPE_VEC2,TYPE_VOID}}, {"sign",TYPE_VEC3,{TYPE_VEC3,TYPE_VOID}}, {"sign",TYPE_VEC4,{TYPE_VEC4,TYPE_VOID}}, + + {"sign",TYPE_INT,{TYPE_INT,TYPE_VOID}}, + {"sign",TYPE_IVEC2,{TYPE_IVEC2,TYPE_VOID}}, + {"sign",TYPE_IVEC3,{TYPE_IVEC3,TYPE_VOID}}, + {"sign",TYPE_IVEC4,{TYPE_IVEC4,TYPE_VOID}}, + + {"floor",TYPE_FLOAT,{TYPE_FLOAT,TYPE_VOID}}, {"floor",TYPE_VEC2,{TYPE_VEC2,TYPE_VOID}}, {"floor",TYPE_VEC3,{TYPE_VEC3,TYPE_VOID}}, @@ -854,18 +1302,51 @@ const ShaderLanguage::IntrinsicFuncDef ShaderLanguage::intrinsic_func_defs[]={ {"fract",TYPE_VEC2,{TYPE_VEC2,TYPE_VOID}}, {"fract",TYPE_VEC3,{TYPE_VEC3,TYPE_VOID}}, {"fract",TYPE_VEC4,{TYPE_VEC4,TYPE_VOID}}, + {"mod",TYPE_FLOAT,{TYPE_FLOAT,TYPE_FLOAT,TYPE_VOID}}, {"mod",TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"mod",TYPE_VEC2,{TYPE_VEC2,TYPE_FLOAT,TYPE_VOID}}, {"mod",TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, + {"mod",TYPE_VEC3,{TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, {"mod",TYPE_VEC4,{TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + {"mod",TYPE_VEC4,{TYPE_VEC4,TYPE_FLOAT,TYPE_VOID}}, + + {"modf",TYPE_FLOAT,{TYPE_FLOAT,TYPE_FLOAT,TYPE_VOID}}, + {"modf",TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"modf",TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, + {"modf",TYPE_VEC4,{TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + {"min",TYPE_FLOAT,{TYPE_FLOAT,TYPE_FLOAT,TYPE_VOID}}, {"min",TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, {"min",TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, {"min",TYPE_VEC4,{TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + + {"min",TYPE_INT,{TYPE_INT,TYPE_INT,TYPE_VOID}}, + {"min",TYPE_IVEC2,{TYPE_IVEC2,TYPE_IVEC2,TYPE_VOID}}, + {"min",TYPE_IVEC3,{TYPE_IVEC3,TYPE_IVEC3,TYPE_VOID}}, + {"min",TYPE_IVEC4,{TYPE_IVEC4,TYPE_IVEC4,TYPE_VOID}}, + + {"min",TYPE_UINT,{TYPE_UINT,TYPE_UINT,TYPE_VOID}}, + {"min",TYPE_UVEC2,{TYPE_UVEC2,TYPE_UVEC2,TYPE_VOID}}, + {"min",TYPE_UVEC3,{TYPE_UVEC3,TYPE_UVEC3,TYPE_VOID}}, + {"min",TYPE_UVEC4,{TYPE_UVEC4,TYPE_UVEC4,TYPE_VOID}}, + {"max",TYPE_FLOAT,{TYPE_FLOAT,TYPE_FLOAT,TYPE_VOID}}, {"max",TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, {"max",TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, {"max",TYPE_VEC4,{TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + + {"max",TYPE_INT,{TYPE_INT,TYPE_INT,TYPE_VOID}}, + {"max",TYPE_IVEC2,{TYPE_IVEC2,TYPE_IVEC2,TYPE_VOID}}, + {"max",TYPE_IVEC3,{TYPE_IVEC3,TYPE_IVEC3,TYPE_VOID}}, + {"max",TYPE_IVEC4,{TYPE_IVEC4,TYPE_IVEC4,TYPE_VOID}}, + + {"max",TYPE_UINT,{TYPE_UINT,TYPE_UINT,TYPE_VOID}}, + {"max",TYPE_UVEC2,{TYPE_UVEC2,TYPE_UVEC2,TYPE_VOID}}, + {"max",TYPE_UVEC3,{TYPE_UVEC3,TYPE_UVEC3,TYPE_VOID}}, + {"max",TYPE_UVEC4,{TYPE_UVEC4,TYPE_UVEC4,TYPE_VOID}}, + + {"clamp",TYPE_FLOAT,{TYPE_FLOAT,TYPE_FLOAT,TYPE_FLOAT,TYPE_VOID}}, {"clamp",TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, {"clamp",TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, @@ -873,6 +1354,24 @@ const ShaderLanguage::IntrinsicFuncDef ShaderLanguage::intrinsic_func_defs[]={ {"clamp",TYPE_VEC2,{TYPE_VEC2,TYPE_FLOAT,TYPE_FLOAT,TYPE_VOID}}, {"clamp",TYPE_VEC3,{TYPE_VEC3,TYPE_FLOAT,TYPE_FLOAT,TYPE_VOID}}, {"clamp",TYPE_VEC4,{TYPE_VEC4,TYPE_FLOAT,TYPE_FLOAT,TYPE_VOID}}, + + + {"clamp",TYPE_INT,{TYPE_INT,TYPE_INT,TYPE_INT,TYPE_VOID}}, + {"clamp",TYPE_IVEC2,{TYPE_IVEC2,TYPE_IVEC2,TYPE_IVEC2,TYPE_VOID}}, + {"clamp",TYPE_IVEC3,{TYPE_IVEC3,TYPE_IVEC3,TYPE_IVEC3,TYPE_VOID}}, + {"clamp",TYPE_IVEC4,{TYPE_IVEC4,TYPE_IVEC4,TYPE_IVEC4,TYPE_VOID}}, + {"clamp",TYPE_IVEC2,{TYPE_IVEC2,TYPE_INT,TYPE_INT,TYPE_VOID}}, + {"clamp",TYPE_IVEC3,{TYPE_IVEC3,TYPE_INT,TYPE_INT,TYPE_VOID}}, + {"clamp",TYPE_IVEC4,{TYPE_IVEC4,TYPE_INT,TYPE_INT,TYPE_VOID}}, + + {"clamp",TYPE_UINT,{TYPE_UINT,TYPE_UINT,TYPE_UINT,TYPE_VOID}}, + {"clamp",TYPE_UVEC2,{TYPE_UVEC2,TYPE_UVEC2,TYPE_UVEC2,TYPE_VOID}}, + {"clamp",TYPE_UVEC3,{TYPE_UVEC3,TYPE_UVEC3,TYPE_UVEC3,TYPE_VOID}}, + {"clamp",TYPE_UVEC4,{TYPE_UVEC4,TYPE_UVEC4,TYPE_UVEC4,TYPE_VOID}}, + {"clamp",TYPE_UVEC2,{TYPE_UVEC2,TYPE_UINT,TYPE_UINT,TYPE_VOID}}, + {"clamp",TYPE_UVEC3,{TYPE_UVEC3,TYPE_UINT,TYPE_UINT,TYPE_VOID}}, + {"clamp",TYPE_UVEC4,{TYPE_UVEC4,TYPE_UINT,TYPE_UINT,TYPE_VOID}}, + {"mix",TYPE_FLOAT,{TYPE_FLOAT,TYPE_FLOAT,TYPE_FLOAT,TYPE_VOID}}, {"mix",TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_FLOAT,TYPE_VOID}}, {"mix",TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, @@ -895,6 +1394,36 @@ const ShaderLanguage::IntrinsicFuncDef ShaderLanguage::intrinsic_func_defs[]={ {"smoothstep",TYPE_VEC3,{TYPE_FLOAT,TYPE_FLOAT,TYPE_VEC3,TYPE_VOID}}, {"smoothstep",TYPE_VEC4,{TYPE_FLOAT,TYPE_FLOAT,TYPE_VEC4,TYPE_VOID}}, + {"isnan",TYPE_BOOL,{TYPE_FLOAT,TYPE_VOID}}, + {"isnan",TYPE_BOOL,{TYPE_VEC2,TYPE_VOID}}, + {"isnan",TYPE_BOOL,{TYPE_VEC3,TYPE_VOID}}, + {"isnan",TYPE_BOOL,{TYPE_VEC4,TYPE_VOID}}, + + {"isinf",TYPE_BOOL,{TYPE_FLOAT,TYPE_VOID}}, + {"isinf",TYPE_BOOL,{TYPE_VEC2,TYPE_VOID}}, + {"isinf",TYPE_BOOL,{TYPE_VEC3,TYPE_VOID}}, + {"isinf",TYPE_BOOL,{TYPE_VEC4,TYPE_VOID}}, + + {"floatBitsToInt",TYPE_INT,{TYPE_FLOAT,TYPE_VOID}}, + {"floatBitsToInt",TYPE_IVEC2,{TYPE_VEC2,TYPE_VOID}}, + {"floatBitsToInt",TYPE_IVEC3,{TYPE_VEC3,TYPE_VOID}}, + {"floatBitsToInt",TYPE_IVEC4,{TYPE_VEC4,TYPE_VOID}}, + + {"floatBitsToUInt",TYPE_UINT,{TYPE_FLOAT,TYPE_VOID}}, + {"floatBitsToUInt",TYPE_UVEC2,{TYPE_VEC2,TYPE_VOID}}, + {"floatBitsToUInt",TYPE_UVEC3,{TYPE_VEC3,TYPE_VOID}}, + {"floatBitsToUInt",TYPE_UVEC4,{TYPE_VEC4,TYPE_VOID}}, + + {"intBitsToFloat",TYPE_FLOAT,{TYPE_INT,TYPE_VOID}}, + {"intBitsToFloat",TYPE_VEC2,{TYPE_IVEC2,TYPE_VOID}}, + {"intBitsToFloat",TYPE_VEC3,{TYPE_IVEC3,TYPE_VOID}}, + {"intBitsToFloat",TYPE_VEC4,{TYPE_IVEC4,TYPE_VOID}}, + + {"uintBitsToFloat",TYPE_FLOAT,{TYPE_UINT,TYPE_VOID}}, + {"uintBitsToFloat",TYPE_VEC2,{TYPE_UVEC2,TYPE_VOID}}, + {"uintBitsToFloat",TYPE_VEC3,{TYPE_UVEC3,TYPE_VOID}}, + {"uintBitsToFloat",TYPE_VEC4,{TYPE_UVEC4,TYPE_VOID}}, + //intrinsics - geometric {"length",TYPE_FLOAT,{TYPE_VEC2,TYPE_VOID}}, {"length",TYPE_FLOAT,{TYPE_VEC3,TYPE_VOID}}, @@ -911,318 +1440,211 @@ const ShaderLanguage::IntrinsicFuncDef ShaderLanguage::intrinsic_func_defs[]={ {"normalize",TYPE_VEC4,{TYPE_VEC4,TYPE_VOID}}, {"reflect",TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, {"refract",TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + + {"facefordward",TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"facefordward",TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, + {"facefordward",TYPE_VEC4,{TYPE_VEC4,TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + + {"matrixCompMult",TYPE_MAT2,{TYPE_MAT2,TYPE_MAT2,TYPE_VOID}}, + {"matrixCompMult",TYPE_MAT3,{TYPE_MAT3,TYPE_MAT3,TYPE_VOID}}, + {"matrixCompMult",TYPE_MAT4,{TYPE_MAT4,TYPE_MAT4,TYPE_VOID}}, + + {"outerProduct",TYPE_MAT2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"outerProduct",TYPE_MAT3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, + {"outerProduct",TYPE_MAT4,{TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + + {"transpose",TYPE_MAT2,{TYPE_MAT2,TYPE_VOID}}, + {"transpose",TYPE_MAT3,{TYPE_MAT3,TYPE_VOID}}, + {"transpose",TYPE_MAT4,{TYPE_MAT4,TYPE_VOID}}, + + {"determinant",TYPE_FLOAT,{TYPE_MAT2,TYPE_VOID}}, + {"determinant",TYPE_FLOAT,{TYPE_MAT3,TYPE_VOID}}, + {"determinant",TYPE_FLOAT,{TYPE_MAT4,TYPE_VOID}}, + + {"inverse",TYPE_MAT2,{TYPE_MAT2,TYPE_VOID}}, + {"inverse",TYPE_MAT3,{TYPE_MAT3,TYPE_VOID}}, + {"inverse",TYPE_MAT4,{TYPE_MAT4,TYPE_VOID}}, + + + {"lessThan",TYPE_BVEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"lessThan",TYPE_BVEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, + {"lessThan",TYPE_BVEC4,{TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + + {"lessThan",TYPE_BVEC2,{TYPE_IVEC2,TYPE_IVEC2,TYPE_VOID}}, + {"lessThan",TYPE_BVEC3,{TYPE_IVEC3,TYPE_IVEC3,TYPE_VOID}}, + {"lessThan",TYPE_BVEC4,{TYPE_IVEC4,TYPE_IVEC4,TYPE_VOID}}, + + {"lessThan",TYPE_BVEC2,{TYPE_UVEC2,TYPE_UVEC2,TYPE_VOID}}, + {"lessThan",TYPE_BVEC3,{TYPE_UVEC3,TYPE_UVEC3,TYPE_VOID}}, + {"lessThan",TYPE_BVEC4,{TYPE_UVEC4,TYPE_UVEC4,TYPE_VOID}}, + + {"greaterThan",TYPE_BVEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"greaterThan",TYPE_BVEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, + {"greaterThan",TYPE_BVEC4,{TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + + {"greaterThan",TYPE_BVEC2,{TYPE_IVEC2,TYPE_IVEC2,TYPE_VOID}}, + {"greaterThan",TYPE_BVEC3,{TYPE_IVEC3,TYPE_IVEC3,TYPE_VOID}}, + {"greaterThan",TYPE_BVEC4,{TYPE_IVEC4,TYPE_IVEC4,TYPE_VOID}}, + + {"greaterThan",TYPE_BVEC2,{TYPE_UVEC2,TYPE_UVEC2,TYPE_VOID}}, + {"greaterThan",TYPE_BVEC3,{TYPE_UVEC3,TYPE_UVEC3,TYPE_VOID}}, + {"greaterThan",TYPE_BVEC4,{TYPE_UVEC4,TYPE_UVEC4,TYPE_VOID}}, + + {"lessThanEqual",TYPE_BVEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"lessThanEqual",TYPE_BVEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, + {"lessThanEqual",TYPE_BVEC4,{TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + + {"lessThanEqual",TYPE_BVEC2,{TYPE_IVEC2,TYPE_IVEC2,TYPE_VOID}}, + {"lessThanEqual",TYPE_BVEC3,{TYPE_IVEC3,TYPE_IVEC3,TYPE_VOID}}, + {"lessThanEqual",TYPE_BVEC4,{TYPE_IVEC4,TYPE_IVEC4,TYPE_VOID}}, + + {"lessThanEqual",TYPE_BVEC2,{TYPE_UVEC2,TYPE_UVEC2,TYPE_VOID}}, + {"lessThanEqual",TYPE_BVEC3,{TYPE_UVEC3,TYPE_UVEC3,TYPE_VOID}}, + {"lessThanEqual",TYPE_BVEC4,{TYPE_UVEC4,TYPE_UVEC4,TYPE_VOID}}, + + {"greaterThanEqual",TYPE_BVEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"greaterThanEqual",TYPE_BVEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, + {"greaterThanEqual",TYPE_BVEC4,{TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + + {"greaterThanEqual",TYPE_BVEC2,{TYPE_IVEC2,TYPE_IVEC2,TYPE_VOID}}, + {"greaterThanEqual",TYPE_BVEC3,{TYPE_IVEC3,TYPE_IVEC3,TYPE_VOID}}, + {"greaterThanEqual",TYPE_BVEC4,{TYPE_IVEC4,TYPE_IVEC4,TYPE_VOID}}, + + {"greaterThanEqual",TYPE_BVEC2,{TYPE_UVEC2,TYPE_UVEC2,TYPE_VOID}}, + {"greaterThanEqual",TYPE_BVEC3,{TYPE_UVEC3,TYPE_UVEC3,TYPE_VOID}}, + {"greaterThanEqual",TYPE_BVEC4,{TYPE_UVEC4,TYPE_UVEC4,TYPE_VOID}}, + + {"equal",TYPE_BVEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"equal",TYPE_BVEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, + {"equal",TYPE_BVEC4,{TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + + {"equal",TYPE_BVEC2,{TYPE_IVEC2,TYPE_IVEC2,TYPE_VOID}}, + {"equal",TYPE_BVEC3,{TYPE_IVEC3,TYPE_IVEC3,TYPE_VOID}}, + {"equal",TYPE_BVEC4,{TYPE_IVEC4,TYPE_IVEC4,TYPE_VOID}}, + + {"equal",TYPE_BVEC2,{TYPE_UVEC2,TYPE_UVEC2,TYPE_VOID}}, + {"equal",TYPE_BVEC3,{TYPE_UVEC3,TYPE_UVEC3,TYPE_VOID}}, + {"equal",TYPE_BVEC4,{TYPE_UVEC4,TYPE_UVEC4,TYPE_VOID}}, + + {"equal",TYPE_BVEC2,{TYPE_BVEC2,TYPE_BVEC2,TYPE_VOID}}, + {"equal",TYPE_BVEC3,{TYPE_BVEC3,TYPE_BVEC3,TYPE_VOID}}, + {"equal",TYPE_BVEC4,{TYPE_BVEC4,TYPE_BVEC4,TYPE_VOID}}, + + {"notEqual",TYPE_BVEC2,{TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"notEqual",TYPE_BVEC3,{TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, + {"notEqual",TYPE_BVEC4,{TYPE_VEC4,TYPE_VEC4,TYPE_VOID}}, + + {"notEqual",TYPE_BVEC2,{TYPE_IVEC2,TYPE_IVEC2,TYPE_VOID}}, + {"notEqual",TYPE_BVEC3,{TYPE_IVEC3,TYPE_IVEC3,TYPE_VOID}}, + {"notEqual",TYPE_BVEC4,{TYPE_IVEC4,TYPE_IVEC4,TYPE_VOID}}, + + {"notEqual",TYPE_BVEC2,{TYPE_UVEC2,TYPE_UVEC2,TYPE_VOID}}, + {"notEqual",TYPE_BVEC3,{TYPE_UVEC3,TYPE_UVEC3,TYPE_VOID}}, + {"notEqual",TYPE_BVEC4,{TYPE_UVEC4,TYPE_UVEC4,TYPE_VOID}}, + + {"notEqual",TYPE_BVEC2,{TYPE_BVEC2,TYPE_BVEC2,TYPE_VOID}}, + {"notEqual",TYPE_BVEC3,{TYPE_BVEC3,TYPE_BVEC3,TYPE_VOID}}, + {"notEqual",TYPE_BVEC4,{TYPE_BVEC4,TYPE_BVEC4,TYPE_VOID}}, + + {"any",TYPE_BOOL,{TYPE_BVEC2,TYPE_VOID}}, + {"any",TYPE_BOOL,{TYPE_BVEC3,TYPE_VOID}}, + {"any",TYPE_BOOL,{TYPE_BVEC4,TYPE_VOID}}, + + {"all",TYPE_BOOL,{TYPE_BVEC2,TYPE_VOID}}, + {"all",TYPE_BOOL,{TYPE_BVEC3,TYPE_VOID}}, + {"all",TYPE_BOOL,{TYPE_BVEC4,TYPE_VOID}}, + + {"not",TYPE_BOOL,{TYPE_BVEC2,TYPE_VOID}}, + {"not",TYPE_BOOL,{TYPE_BVEC3,TYPE_VOID}}, + {"not",TYPE_BOOL,{TYPE_BVEC4,TYPE_VOID}}, + //intrinsics - texture - {"tex",TYPE_VEC4,{TYPE_TEXTURE,TYPE_VEC2,TYPE_VOID}}, - {"texcube",TYPE_VEC4,{TYPE_CUBEMAP,TYPE_VEC3,TYPE_VOID}}, - {"texscreen",TYPE_VEC3,{TYPE_VEC2,TYPE_VOID}}, - {"texpos",TYPE_VEC3,{TYPE_VEC3,TYPE_VOID}}, + {"textureSize",TYPE_VEC2,{TYPE_SAMPLER2D,TYPE_INT,TYPE_VOID}}, + {"textureSize",TYPE_VEC2,{TYPE_ISAMPLER2D,TYPE_INT,TYPE_VOID}}, + {"textureSize",TYPE_VEC2,{TYPE_USAMPLER2D,TYPE_INT,TYPE_VOID}}, + {"textureSize",TYPE_VEC2,{TYPE_SAMPLERCUBE,TYPE_INT,TYPE_VOID}}, + + {"texture",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC2,TYPE_VOID}}, + {"texture",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC3,TYPE_VOID}}, + {"texture",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC2,TYPE_FLOAT,TYPE_VOID}}, + {"texture",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + + {"texture",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC2,TYPE_VOID}}, + {"texture",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC3,TYPE_VOID}}, + {"texture",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC2,TYPE_FLOAT,TYPE_VOID}}, + {"texture",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + + {"texture",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC2,TYPE_VOID}}, + {"texture",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC3,TYPE_VOID}}, + {"texture",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC2,TYPE_FLOAT,TYPE_VOID}}, + {"texture",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + + {"texture",TYPE_VEC4,{TYPE_SAMPLERCUBE,TYPE_VEC3,TYPE_VOID}}, + {"texture",TYPE_VEC4,{TYPE_SAMPLERCUBE,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + + {"textureProj",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC3,TYPE_VOID}}, + {"textureProj",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC4,TYPE_VOID}}, + {"textureProj",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + {"textureProj",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC4,TYPE_FLOAT,TYPE_VOID}}, + + {"textureProj",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC3,TYPE_VOID}}, + {"textureProj",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC4,TYPE_VOID}}, + {"textureProj",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + {"textureProj",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC4,TYPE_FLOAT,TYPE_VOID}}, + + {"textureProj",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC3,TYPE_VOID}}, + {"textureProj",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC4,TYPE_VOID}}, + {"textureProj",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + {"textureProj",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC4,TYPE_FLOAT,TYPE_VOID}}, + + {"textureLod",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + {"textureLod",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + {"textureLod",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + {"textureLod",TYPE_VEC4,{TYPE_SAMPLERCUBE,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + + {"texelFetch",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_IVEC2,TYPE_INT,TYPE_VOID}}, + {"texelFetch",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_IVEC2,TYPE_INT,TYPE_VOID}}, + {"texelFetch",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_IVEC2,TYPE_INT,TYPE_VOID}}, + + {"textureProjLod",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + {"textureProjLod",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC4,TYPE_FLOAT,TYPE_VOID}}, + + {"textureProjLod",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + {"textureProjLod",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC4,TYPE_FLOAT,TYPE_VOID}}, + + {"textureProjLod",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC3,TYPE_FLOAT,TYPE_VOID}}, + {"textureProjLod",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC4,TYPE_FLOAT,TYPE_VOID}}, + + {"textureGrad",TYPE_VEC4,{TYPE_SAMPLER2D,TYPE_VEC2,TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"textureGrad",TYPE_IVEC4,{TYPE_ISAMPLER2D,TYPE_VEC2,TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"textureGrad",TYPE_UVEC4,{TYPE_USAMPLER2D,TYPE_VEC2,TYPE_VEC2,TYPE_VEC2,TYPE_VOID}}, + {"textureGrad",TYPE_VEC4,{TYPE_SAMPLERCUBE,TYPE_VEC3,TYPE_VEC3,TYPE_VEC3,TYPE_VOID}}, + + {"textureScreen",TYPE_VEC4,{TYPE_VEC2,TYPE_VOID}}, + + {"dFdx",TYPE_FLOAT,{TYPE_FLOAT,TYPE_VOID}}, + {"dFdx",TYPE_VEC2,{TYPE_VEC2,TYPE_VOID}}, + {"dFdx",TYPE_VEC3,{TYPE_VEC3,TYPE_VOID}}, + {"dFdx",TYPE_VEC4,{TYPE_VEC4,TYPE_VOID}}, + + {"dFdy",TYPE_FLOAT,{TYPE_FLOAT,TYPE_VOID}}, + {"dFdy",TYPE_VEC2,{TYPE_VEC2,TYPE_VOID}}, + {"dFdy",TYPE_VEC3,{TYPE_VEC3,TYPE_VOID}}, + {"dFdy",TYPE_VEC4,{TYPE_VEC4,TYPE_VOID}}, + + {"fwidth",TYPE_FLOAT,{TYPE_FLOAT,TYPE_VOID}}, + {"fwidth",TYPE_VEC2,{TYPE_VEC2,TYPE_VOID}}, + {"fwidth",TYPE_VEC3,{TYPE_VEC3,TYPE_VOID}}, + {"fwidth",TYPE_VEC4,{TYPE_VEC4,TYPE_VOID}}, + {NULL,TYPE_VOID,{TYPE_VOID}} }; -const ShaderLanguage::OperatorDef ShaderLanguage::operator_defs[]={ - - {OP_ASSIGN,TYPE_VOID,{TYPE_BOOL,TYPE_BOOL}}, - {OP_ASSIGN,TYPE_VOID,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_ASSIGN,TYPE_VOID,{TYPE_VEC2,TYPE_VEC2}}, - {OP_ASSIGN,TYPE_VOID,{TYPE_VEC3,TYPE_VEC3}}, - {OP_ASSIGN,TYPE_VOID,{TYPE_VEC4,TYPE_VEC4}}, - {OP_ASSIGN,TYPE_VOID,{TYPE_MAT2,TYPE_MAT2}}, - {OP_ASSIGN,TYPE_VOID,{TYPE_MAT3,TYPE_MAT3}}, - {OP_ASSIGN,TYPE_VOID,{TYPE_MAT4,TYPE_MAT4}}, - {OP_ADD,TYPE_FLOAT,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_ADD,TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2}}, - {OP_ADD,TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3}}, - {OP_ADD,TYPE_VEC4,{TYPE_VEC4,TYPE_VEC4}}, - {OP_SUB,TYPE_FLOAT,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_SUB,TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2}}, - {OP_SUB,TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3}}, - {OP_SUB,TYPE_VEC4,{TYPE_VEC4,TYPE_VEC4}}, - {OP_MUL,TYPE_FLOAT,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_MUL,TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2}}, - {OP_MUL,TYPE_VEC2,{TYPE_VEC2,TYPE_FLOAT}}, - {OP_MUL,TYPE_VEC2,{TYPE_FLOAT,TYPE_VEC2}}, - {OP_MUL,TYPE_VEC2,{TYPE_VEC2,TYPE_MAT3}}, - {OP_MUL,TYPE_VEC2,{TYPE_MAT2,TYPE_VEC2}}, - {OP_MUL,TYPE_VEC2,{TYPE_VEC2,TYPE_MAT2}}, - {OP_MUL,TYPE_VEC2,{TYPE_MAT3,TYPE_VEC2}}, - {OP_MUL,TYPE_VEC2,{TYPE_VEC2,TYPE_MAT4}}, - {OP_MUL,TYPE_VEC2,{TYPE_MAT4,TYPE_VEC2}}, - {OP_MUL,TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3}}, - {OP_MUL,TYPE_VEC3,{TYPE_VEC3,TYPE_FLOAT}}, - {OP_MUL,TYPE_VEC3,{TYPE_FLOAT,TYPE_VEC3}}, - {OP_MUL,TYPE_VEC3,{TYPE_MAT3,TYPE_VEC3}}, - {OP_MUL,TYPE_VEC3,{TYPE_MAT4,TYPE_VEC3}}, - {OP_MUL,TYPE_VEC3,{TYPE_VEC3,TYPE_MAT3}}, - {OP_MUL,TYPE_VEC4,{TYPE_VEC4,TYPE_VEC4}}, - {OP_MUL,TYPE_VEC4,{TYPE_VEC4,TYPE_FLOAT}}, - {OP_MUL,TYPE_VEC4,{TYPE_FLOAT,TYPE_VEC4}}, - {OP_MUL,TYPE_VEC4,{TYPE_MAT4,TYPE_VEC4}}, - {OP_MUL,TYPE_VEC4,{TYPE_VEC4,TYPE_MAT4}}, - {OP_MUL,TYPE_MAT2,{TYPE_MAT2,TYPE_MAT2}}, - {OP_MUL,TYPE_MAT3,{TYPE_MAT3,TYPE_MAT3}}, - {OP_MUL,TYPE_MAT4,{TYPE_MAT4,TYPE_MAT4}}, - {OP_DIV,TYPE_FLOAT,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_DIV,TYPE_VEC2,{TYPE_VEC2,TYPE_VEC2}}, - {OP_DIV,TYPE_VEC2,{TYPE_VEC2,TYPE_FLOAT}}, - {OP_DIV,TYPE_VEC2,{TYPE_FLOAT,TYPE_VEC2}}, - {OP_DIV,TYPE_VEC3,{TYPE_VEC3,TYPE_VEC3}}, - {OP_DIV,TYPE_VEC3,{TYPE_VEC3,TYPE_FLOAT}}, - {OP_DIV,TYPE_VEC3,{TYPE_FLOAT,TYPE_VEC3}}, - {OP_DIV,TYPE_VEC4,{TYPE_VEC4,TYPE_VEC4}}, - {OP_DIV,TYPE_VEC4,{TYPE_VEC4,TYPE_FLOAT}}, - {OP_DIV,TYPE_VEC4,{TYPE_FLOAT,TYPE_VEC4}}, - {OP_ASSIGN_ADD,TYPE_VOID,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_ASSIGN_ADD,TYPE_VOID,{TYPE_VEC2,TYPE_VEC2}}, - {OP_ASSIGN_ADD,TYPE_VOID,{TYPE_VEC3,TYPE_VEC3}}, - {OP_ASSIGN_ADD,TYPE_VOID,{TYPE_VEC4,TYPE_VEC4}}, - {OP_ASSIGN_ADD,TYPE_VOID,{TYPE_VEC2,TYPE_FLOAT}}, - {OP_ASSIGN_ADD,TYPE_VOID,{TYPE_VEC3,TYPE_FLOAT}}, - {OP_ASSIGN_ADD,TYPE_VOID,{TYPE_VEC4,TYPE_FLOAT}}, - {OP_ASSIGN_SUB,TYPE_VOID,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_ASSIGN_SUB,TYPE_VOID,{TYPE_VEC2,TYPE_VEC2}}, - {OP_ASSIGN_SUB,TYPE_VOID,{TYPE_VEC3,TYPE_VEC3}}, - {OP_ASSIGN_SUB,TYPE_VOID,{TYPE_VEC4,TYPE_VEC4}}, - {OP_ASSIGN_SUB,TYPE_VOID,{TYPE_VEC2,TYPE_FLOAT}}, - {OP_ASSIGN_SUB,TYPE_VOID,{TYPE_VEC3,TYPE_FLOAT}}, - {OP_ASSIGN_SUB,TYPE_VOID,{TYPE_VEC4,TYPE_FLOAT}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_VEC2,TYPE_VEC2}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_VEC2,TYPE_FLOAT}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_VEC2,TYPE_MAT2}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_MAT2,TYPE_MAT2}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_VEC3,TYPE_MAT3}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_VEC3,TYPE_VEC3}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_VEC3,TYPE_FLOAT}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_VEC3,TYPE_MAT4}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_VEC4,TYPE_VEC4}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_VEC4,TYPE_FLOAT}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_VEC4,TYPE_MAT4}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_MAT3,TYPE_MAT3}}, - {OP_ASSIGN_MUL,TYPE_VOID,{TYPE_MAT4,TYPE_MAT4}}, - {OP_ASSIGN_DIV,TYPE_VOID,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_ASSIGN_DIV,TYPE_VOID,{TYPE_VEC2,TYPE_VEC2}}, - {OP_ASSIGN_DIV,TYPE_VOID,{TYPE_VEC2,TYPE_FLOAT}}, - {OP_ASSIGN_DIV,TYPE_VOID,{TYPE_VEC3,TYPE_VEC3}}, - {OP_ASSIGN_DIV,TYPE_VOID,{TYPE_VEC3,TYPE_FLOAT}}, - {OP_ASSIGN_DIV,TYPE_VOID,{TYPE_VEC4,TYPE_VEC4}}, - {OP_ASSIGN_DIV,TYPE_VOID,{TYPE_VEC4,TYPE_FLOAT}}, - {OP_NEG,TYPE_FLOAT,{TYPE_FLOAT,TYPE_VOID}}, - {OP_NEG,TYPE_VEC2,{TYPE_VEC2,TYPE_VOID}}, - {OP_NEG,TYPE_VEC3,{TYPE_VEC3,TYPE_VOID}}, - {OP_NEG,TYPE_VEC4,{TYPE_VEC4,TYPE_VOID}}, - {OP_NOT,TYPE_BOOL,{TYPE_BOOL,TYPE_VOID}}, - {OP_CMP_EQ,TYPE_BOOL,{TYPE_BOOL,TYPE_BOOL}}, - {OP_CMP_EQ,TYPE_BOOL,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_CMP_EQ,TYPE_BOOL,{TYPE_VEC3,TYPE_VEC2}}, - {OP_CMP_EQ,TYPE_BOOL,{TYPE_VEC3,TYPE_VEC3}}, - {OP_CMP_EQ,TYPE_BOOL,{TYPE_VEC3,TYPE_VEC4}}, - //{OP_CMP_EQ,TYPE_MAT3,{TYPE_MAT4,TYPE_MAT3}}, ?? - //{OP_CMP_EQ,TYPE_MAT4,{TYPE_MAT4,TYPE_MAT4}}, ?? - {OP_CMP_NEQ,TYPE_BOOL,{TYPE_BOOL,TYPE_BOOL}}, - {OP_CMP_NEQ,TYPE_BOOL,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_CMP_NEQ,TYPE_BOOL,{TYPE_VEC2,TYPE_VEC2}}, - {OP_CMP_NEQ,TYPE_BOOL,{TYPE_VEC3,TYPE_VEC3}}, - {OP_CMP_NEQ,TYPE_BOOL,{TYPE_VEC4,TYPE_VEC4}}, - //{OP_CMP_NEQ,TYPE_MAT4,{TYPE_MAT4,TYPE_MAT4}}, //? - {OP_CMP_LEQ,TYPE_BOOL,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_CMP_GEQ,TYPE_BOOL,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_CMP_LESS,TYPE_BOOL,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_CMP_GREATER,TYPE_BOOL,{TYPE_FLOAT,TYPE_FLOAT}}, - {OP_CMP_OR,TYPE_BOOL,{TYPE_BOOL,TYPE_BOOL}}, - {OP_CMP_AND,TYPE_BOOL,{TYPE_BOOL,TYPE_BOOL}}, - {OP_MAX,TYPE_VOID,{TYPE_VOID,TYPE_VOID}} -}; -const ShaderLanguage::BuiltinsDef ShaderLanguage::vertex_builtins_defs[]={ - - { "SRC_VERTEX", TYPE_VEC3}, - { "SRC_NORMAL", TYPE_VEC3}, - { "SRC_TANGENT", TYPE_VEC3}, - { "SRC_BINORMALF", TYPE_FLOAT}, - - { "POSITION", TYPE_VEC4 }, - { "VERTEX", TYPE_VEC3}, - { "NORMAL", TYPE_VEC3}, - { "TANGENT", TYPE_VEC3}, - { "BINORMAL", TYPE_VEC3}, - { "UV", TYPE_VEC2}, - { "UV2", TYPE_VEC2}, - { "COLOR", TYPE_VEC4}, - { "BONES", TYPE_VEC4}, - { "WEIGHTS", TYPE_VEC4}, - { "VAR1", TYPE_VEC4}, - { "VAR2", TYPE_VEC4}, - { "SPEC_EXP", TYPE_FLOAT}, - { "POINT_SIZE", TYPE_FLOAT}, - - //builtins - { "WORLD_MATRIX", TYPE_MAT4}, - { "INV_CAMERA_MATRIX", TYPE_MAT4}, - { "PROJECTION_MATRIX", TYPE_MAT4}, - { "MODELVIEW_MATRIX", TYPE_MAT4}, - { "INSTANCE_ID", TYPE_FLOAT}, - { "TIME", TYPE_FLOAT}, - { NULL, TYPE_VOID}, -}; -const ShaderLanguage::BuiltinsDef ShaderLanguage::fragment_builtins_defs[]={ - - { "VERTEX", TYPE_VEC3}, - { "POSITION", TYPE_VEC4}, - { "NORMAL", TYPE_VEC3}, - { "TANGENT", TYPE_VEC3}, - { "BINORMAL", TYPE_VEC3}, - { "NORMALMAP", TYPE_VEC3}, - { "NORMALMAP_DEPTH", TYPE_FLOAT}, - { "UV", TYPE_VEC2}, - { "UV2", TYPE_VEC2}, - { "COLOR", TYPE_VEC4}, - { "NORMAL", TYPE_VEC3}, - { "VAR1", TYPE_VEC4}, - { "VAR2", TYPE_VEC4}, - { "DIFFUSE", TYPE_VEC3}, - { "DIFFUSE_ALPHA", TYPE_VEC4}, - { "SPECULAR", TYPE_VEC3}, - { "EMISSION", TYPE_VEC3}, - { "SPEC_EXP", TYPE_FLOAT}, - { "GLOW", TYPE_FLOAT}, - { "SHADE_PARAM", TYPE_FLOAT}, - { "DISCARD", TYPE_BOOL}, - { "SCREEN_UV", TYPE_VEC2}, - { "POINT_COORD", TYPE_VEC2}, - { "INV_CAMERA_MATRIX", TYPE_MAT4}, - -// { "SCREEN_POS", TYPE_VEC2}, -// { "SCREEN_TEXEL_SIZE", TYPE_VEC2}, - { "TIME", TYPE_FLOAT}, - { NULL, TYPE_VOID} - -}; - -const ShaderLanguage::BuiltinsDef ShaderLanguage::light_builtins_defs[]={ - - { "NORMAL", TYPE_VEC3}, - { "LIGHT_DIR", TYPE_VEC3}, - { "LIGHT_DIFFUSE", TYPE_VEC3}, - { "LIGHT_SPECULAR", TYPE_VEC3}, - { "EYE_VEC", TYPE_VEC3}, - { "DIFFUSE", TYPE_VEC3}, - { "SPECULAR", TYPE_VEC3}, - { "SPECULAR_EXP", TYPE_FLOAT}, - { "SHADE_PARAM", TYPE_FLOAT}, - { "LIGHT", TYPE_VEC3}, - { "SHADOW", TYPE_VEC3 }, - { "POINT_COORD", TYPE_VEC2 }, -// { "SCREEN_POS", TYPE_VEC2}, -// { "SCREEN_TEXEL_SIZE", TYPE_VEC2}, - { "TIME", TYPE_FLOAT}, - { NULL, TYPE_VOID} - -}; - - - -const ShaderLanguage::BuiltinsDef ShaderLanguage::ci_vertex_builtins_defs[]={ - - { "SRC_VERTEX", TYPE_VEC2}, - { "VERTEX", TYPE_VEC2}, - { "WORLD_VERTEX", TYPE_VEC2}, - { "UV", TYPE_VEC2}, - { "COLOR", TYPE_VEC4}, - { "VAR1", TYPE_VEC4}, - { "VAR2", TYPE_VEC4}, - { "POINT_SIZE", TYPE_FLOAT}, - - //builtins - { "WORLD_MATRIX", TYPE_MAT4}, - { "PROJECTION_MATRIX", TYPE_MAT4}, - { "EXTRA_MATRIX", TYPE_MAT4}, - { "TIME", TYPE_FLOAT}, - { NULL, TYPE_VOID}, -}; -const ShaderLanguage::BuiltinsDef ShaderLanguage::ci_fragment_builtins_defs[]={ - - { "SRC_COLOR", TYPE_VEC4}, - { "POSITION", TYPE_VEC4}, - { "NORMAL", TYPE_VEC3}, - { "NORMALMAP", TYPE_VEC3}, - { "NORMALMAP_DEPTH", TYPE_FLOAT}, - { "UV", TYPE_VEC2}, - { "COLOR", TYPE_VEC4}, - { "TEXTURE", TYPE_TEXTURE}, - { "TEXTURE_PIXEL_SIZE", TYPE_VEC2}, - { "VAR1", TYPE_VEC4}, - { "VAR2", TYPE_VEC4}, - { "SCREEN_UV", TYPE_VEC2}, - { "POINT_COORD", TYPE_VEC2}, - -// { "SCREEN_POS", TYPE_VEC2}, -// { "SCREEN_TEXEL_SIZE", TYPE_VEC2}, - { "TIME", TYPE_FLOAT}, - { NULL, TYPE_VOID} - -}; - -const ShaderLanguage::BuiltinsDef ShaderLanguage::ci_light_builtins_defs[]={ - - { "POSITION", TYPE_VEC4}, - { "NORMAL", TYPE_VEC3}, - { "UV", TYPE_VEC2}, - { "COLOR", TYPE_VEC4}, - { "TEXTURE", TYPE_TEXTURE}, - { "TEXTURE_PIXEL_SIZE", TYPE_VEC2}, - { "VAR1", TYPE_VEC4}, - { "VAR2", TYPE_VEC4}, - { "SCREEN_UV", TYPE_VEC2}, - { "LIGHT_VEC", TYPE_VEC2}, - { "LIGHT_HEIGHT", TYPE_FLOAT}, - { "LIGHT_COLOR", TYPE_VEC4}, - { "LIGHT_UV", TYPE_VEC2}, - { "LIGHT_SHADOW", TYPE_VEC4}, - { "LIGHT", TYPE_VEC4}, - { "SHADOW", TYPE_VEC4}, - { "POINT_COORD", TYPE_VEC2}, -// { "SCREEN_POS", TYPE_VEC2}, -// { "SCREEN_TEXEL_SIZE", TYPE_VEC2}, - { "TIME", TYPE_FLOAT}, - { NULL, TYPE_VOID} - -}; - -const ShaderLanguage::BuiltinsDef ShaderLanguage::postprocess_fragment_builtins_defs[]={ - - { "IN_COLOR", TYPE_VEC3}, - { "IN_POSITION", TYPE_VEC3}, - { "OUT_COLOR", TYPE_VEC3}, - { "SCREEN_POS", TYPE_VEC2}, - { "SCREEN_TEXEL_SIZE", TYPE_VEC2}, - { "TIME", TYPE_FLOAT}, - { NULL, TYPE_VOID} -}; - - - -ShaderLanguage::DataType ShaderLanguage::compute_node_type(Node *p_node) { - - switch(p_node->type) { - - case Node::TYPE_PROGRAM: ERR_FAIL_V(TYPE_VOID); - case Node::TYPE_FUNCTION: return static_cast(p_node)->return_type; - case Node::TYPE_BLOCK: ERR_FAIL_V(TYPE_VOID); - case Node::TYPE_VARIABLE: return static_cast(p_node)->datatype_cache; - case Node::TYPE_CONSTANT: return static_cast(p_node)->datatype; - case Node::TYPE_OPERATOR: return static_cast(p_node)->return_cache; - case Node::TYPE_CONTROL_FLOW: ERR_FAIL_V(TYPE_VOID); - case Node::TYPE_MEMBER: return static_cast(p_node)->datatype; - } - - return TYPE_VOID; -} - - -ShaderLanguage::Node* ShaderLanguage::validate_function_call(Parser&parser, OperatorNode *p_func) { +bool ShaderLanguage::_validate_function_call(BlockNode* p_block, OperatorNode *p_func,DataType *r_ret_type) { ERR_FAIL_COND_V(p_func->op!=OP_CALL && p_func->op!=OP_CONSTRUCT,NULL); @@ -1231,13 +1653,13 @@ ShaderLanguage::Node* ShaderLanguage::validate_function_call(Parser&parser, Oper ERR_FAIL_COND_V( p_func->arguments[0]->type!=Node::TYPE_VARIABLE, NULL ); - String name = static_cast(p_func->arguments[0])->name.operator String(); + StringName name = static_cast(p_func->arguments[0])->name.operator String(); bool all_const=true; for(int i=1;iarguments.size();i++) { if (p_func->arguments[i]->type!=Node::TYPE_CONSTANT) all_const=false; - args.push_back(compute_node_type(p_func->arguments[i])); + args.push_back(p_func->arguments[i]->get_datatype()); } int argcount=args.size(); @@ -1265,10 +1687,11 @@ ShaderLanguage::Node* ShaderLanguage::validate_function_call(Parser&parser, Oper fail=true; //make sure the number of arguments matches if (!fail) { - p_func->return_cache=intrinsic_func_defs[idx].rettype; found_intrinsic=true; - break; + if (r_ret_type) + *r_ret_type=intrinsic_func_defs[idx].rettype; + return true; } } @@ -1277,7 +1700,7 @@ ShaderLanguage::Node* ShaderLanguage::validate_function_call(Parser&parser, Oper } } - +#if 0 if (found_intrinsic) { if (p_func->op==OP_CONSTRUCT && all_const) { @@ -1290,6 +1713,7 @@ ShaderLanguage::Node* ShaderLanguage::validate_function_call(Parser&parser, Oper switch(v.get_type()) { case Variant::REAL: cdata.push_back(v); break; + case Variant::INT: cdata.push_back(v); break; case Variant::VECTOR2: { Vector2 v2=v; cdata.push_back(v2.x); cdata.push_back(v2.y); } break; case Variant::VECTOR3: { Vector3 v3=v; cdata.push_back(v3.x); cdata.push_back(v3.y); cdata.push_back(v3.z);} break; case Variant::PLANE: { Plane v4=v; cdata.push_back(v4.normal.x); cdata.push_back(v4.normal.y); cdata.push_back(v4.normal.z); cdata.push_back(v4.d); } break; @@ -1331,39 +1755,45 @@ ShaderLanguage::Node* ShaderLanguage::validate_function_call(Parser&parser, Oper } return p_func; } - +#endif // try existing functions.. - FunctionNode *exclude_function=NULL; //exclude current function (in case inside one) + StringName exclude_function; + BlockNode *block = p_block; + while(block) { - Node *node = p_func; - - while(node->parent) { - - if (node->type==Node::TYPE_FUNCTION) { - - exclude_function = (FunctionNode*)node; + if (block->parent_function) { + exclude_function=block->parent_function->name; } - - node=node->parent; + block=block->parent_block; } - ERR_FAIL_COND_V(node->type!=Node::TYPE_PROGRAM,NULL); - ProgramNode *program = (ProgramNode*)node; - for(int i=0;ifunctions.size();i++) { + for(int i=0;ifunctions.size();i++) { - if (program->functions[i].function==exclude_function) + if (shader->functions[i].name==exclude_function) { + _set_error("Recursion is not allowed"); + return false; + } + + if (!shader->functions[i].callable) { + _set_error("Function '"+String(name)+" can't be called from source code."); + return false; + } + + if (name != shader->functions[i].name) continue; - FunctionNode *pfunc = program->functions[i].function; + FunctionNode *pfunc = shader->functions[i].function; + if (pfunc->arguments.size()!=args.size()) continue; bool fail=false; + for(int i=0;iarguments[i].type) { fail=true; @@ -1371,153 +1801,94 @@ ShaderLanguage::Node* ShaderLanguage::validate_function_call(Parser&parser, Oper } } - if (!fail && name == program->functions[i].name) { + if (!fail) { p_func->return_cache=pfunc->return_type; - return p_func; + return true; } } - return NULL; + return false; } -ShaderLanguage::Node * ShaderLanguage::validate_operator(Parser& parser,OperatorNode *p_func) { +bool ShaderLanguage::_parse_function_arguments(BlockNode* p_block,const Map &p_builtin_types,OperatorNode* p_func) { - int argcount = p_func->arguments.size(); - ERR_FAIL_COND_V(argcount>2,NULL); - - DataType argtype[2]={TYPE_VOID,TYPE_VOID}; - bool all_const=true; - - for(int i=0;iarguments[i]); - if (p_func->arguments[i]->type!=Node::TYPE_CONSTANT) - all_const=false; - - } - int idx=0; - - bool valid=false; - while(operator_defs[idx].op!=OP_MAX) { - - if (p_func->op==operator_defs[idx].op) { + Token tk = _get_token(); + while(true) { - if (operator_defs[idx].args[0]==argtype[0] && operator_defs[idx].args[1]==argtype[1]) { - - p_func->return_cache=operator_defs[idx].rettype; - valid=true; - break; - } + if (tk.type==TK_PARENTHESIS_CLOSE) { + return true; } - idx++; - } + Node *arg= _parse_expression(p_block,p_builtin_types); + if (!arg) + return false; - if (!valid) - return NULL; + p_func->arguments.push_back(arg); -#define _RCO2(m_op,m_vop)\ -case m_op: {\ - ConstantNode *cn = parser.create_node(p_func->parent);\ - cn->datatype=p_func->return_cache; \ - Variant::evaluate(m_vop,static_cast(p_func->arguments[0])->value,static_cast(p_func->arguments[1])->value,cn->value,valid);\ - if (!valid)\ - return NULL;\ - return cn;\ -} break; + tk = _get_token(); -#define _RCO1(m_op,m_vop)\ -case m_op: {\ - ConstantNode *cn = parser.create_node(p_func->parent);\ - cn->datatype=p_func->return_cache; \ - Variant::evaluate(m_vop,static_cast(p_func->arguments[0])->value,Variant(),cn->value,valid);\ - if (!valid)\ - return NULL;\ - return cn;\ -} break; - - if (all_const) { - //reduce constant operator - switch(p_func->op) { - _RCO2(OP_ADD,Variant::OP_ADD); - _RCO2(OP_SUB,Variant::OP_SUBSTRACT); - _RCO2(OP_MUL,Variant::OP_MULTIPLY); - _RCO2(OP_DIV,Variant::OP_DIVIDE); - _RCO1(OP_NEG,Variant::OP_NEGATE); - _RCO1(OP_NOT,Variant::OP_NOT); - _RCO2(OP_CMP_EQ,Variant::OP_EQUAL); - _RCO2(OP_CMP_NEQ,Variant::OP_NOT_EQUAL); - _RCO2(OP_CMP_LEQ,Variant::OP_LESS_EQUAL); - _RCO2(OP_CMP_GEQ,Variant::OP_GREATER_EQUAL); - _RCO2(OP_CMP_LESS,Variant::OP_LESS); - _RCO2(OP_CMP_GREATER,Variant::OP_GREATER); - _RCO2(OP_CMP_OR,Variant::OP_OR); - _RCO2(OP_CMP_AND,Variant::OP_AND); - default: {} + if (tk.type==TK_PARENTHESIS_CLOSE) { + //none + } else if (tk.type==TK_COMMA) { + tk = _get_token(); //next + } else { + // something is broken + _set_error("Expected ',' or ')' after argument"); + return false; } + } - - return p_func; - + return true; } bool ShaderLanguage::is_token_operator(TokenType p_type) { - return (p_type==TK_OP_EQUAL) || - (p_type==TK_OP_NOT_EQUAL) || - (p_type==TK_OP_LESS) || - (p_type==TK_OP_LESS_EQUAL) || - (p_type==TK_OP_GREATER) || - (p_type==TK_OP_GREATER_EQUAL) || - (p_type==TK_OP_AND) || - (p_type==TK_OP_OR) || - (p_type==TK_OP_NOT) || - (p_type==TK_OP_ADD) || - (p_type==TK_OP_SUB) || - (p_type==TK_OP_MUL) || - (p_type==TK_OP_DIV) || - (p_type==TK_OP_NEG) || - (p_type==TK_OP_ASSIGN) || - (p_type==TK_OP_ASSIGN_ADD) || - (p_type==TK_OP_ASSIGN_SUB) || - (p_type==TK_OP_ASSIGN_MUL) || - (p_type==TK_OP_ASSIGN_DIV); + + return (p_type==TK_OP_EQUAL || + p_type==TK_OP_NOT_EQUAL || + p_type==TK_OP_LESS || + p_type==TK_OP_LESS_EQUAL || + p_type==TK_OP_GREATER || + p_type==TK_OP_GREATER_EQUAL || + p_type==TK_OP_AND || + p_type==TK_OP_OR || + p_type==TK_OP_NOT || + p_type==TK_OP_ADD || + p_type==TK_OP_SUB || + p_type==TK_OP_MUL || + p_type==TK_OP_DIV || + p_type==TK_OP_MOD || + p_type==TK_OP_SHIFT_LEFT || + p_type==TK_OP_SHIFT_RIGHT || + p_type==TK_OP_ASSIGN || + p_type==TK_OP_ASSIGN_ADD || + p_type==TK_OP_ASSIGN_SUB || + p_type==TK_OP_ASSIGN_MUL || + p_type==TK_OP_ASSIGN_DIV || + p_type==TK_OP_ASSIGN_MOD || + p_type==TK_OP_ASSIGN_SHIFT_LEFT || + p_type==TK_OP_ASSIGN_SHIFT_RIGHT || + p_type==TK_OP_ASSIGN_BIT_AND || + p_type==TK_OP_ASSIGN_BIT_OR || + p_type==TK_OP_ASSIGN_BIT_XOR || + p_type==TK_OP_BIT_AND || + p_type==TK_OP_BIT_OR || + p_type==TK_OP_BIT_XOR || + p_type==TK_OP_BIT_INVERT || + p_type==TK_OP_INCREMENT || + p_type==TK_OP_DECREMENT || + p_type==TK_QUESTION || + p_type==TK_COLON ); } -ShaderLanguage::Operator ShaderLanguage::get_token_operator(TokenType p_type) { - - switch(p_type) { - case TK_OP_EQUAL: return OP_CMP_EQ ; - case TK_OP_NOT_EQUAL: return OP_CMP_NEQ; - case TK_OP_LESS: return OP_CMP_LESS ; - case TK_OP_LESS_EQUAL: return OP_CMP_LEQ ; - case TK_OP_GREATER: return OP_CMP_GREATER ; - case TK_OP_GREATER_EQUAL: return OP_CMP_GEQ ; - case TK_OP_AND: return OP_CMP_AND ; - case TK_OP_OR: return OP_CMP_OR ; - case TK_OP_NOT: return OP_NOT ; - case TK_OP_ADD: return OP_ADD ; - case TK_OP_SUB: return OP_SUB ; - case TK_OP_MUL: return OP_MUL ; - case TK_OP_DIV: return OP_DIV ; - case TK_OP_NEG: return OP_NEG ; - case TK_OP_ASSIGN: return OP_ASSIGN ; - case TK_OP_ASSIGN_ADD: return OP_ASSIGN_ADD ; - case TK_OP_ASSIGN_SUB: return OP_ASSIGN_SUB ; - case TK_OP_ASSIGN_MUL: return OP_ASSIGN_MUL ; - case TK_OP_ASSIGN_DIV: return OP_ASSIGN_DIV ; - default: ERR_FAIL_V(OP_MAX); - } - - return OP_MAX; -} -Error ShaderLanguage::parse_expression(Parser& parser,Node *p_parent,Node **r_expr) { + +ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const Map &p_builtin_types) { Vector expression; //Vector operators; @@ -1525,416 +1896,413 @@ Error ShaderLanguage::parse_expression(Parser& parser,Node *p_parent,Node **r_ex while(true) { Node *expr=NULL; + int pos = char_idx; + Token tk = _get_token(); - if (parser.get_token_type()==TK_PARENTHESIS_OPEN) { + if (tk.type==TK_PARENTHESIS_OPEN) { //handle subexpression - parser.advance(); - Error err = parse_expression(parser,p_parent,&expr); - if (err) - return err; - if (parser.get_token_type()!=TK_PARENTHESIS_CLOSE) { + expr = _parse_expression(p_block,p_builtin_types); + if (!expr) + return NULL; - parser.set_error("Expected ')' in expression"); - return ERR_PARSE_ERROR; + tk = _get_token(); + + if (tk.type!=TK_PARENTHESIS_CLOSE) { + + _set_error("Expected ')' in expression"); + return NULL; } - parser.advance(); - - } else if (parser.get_token_type()==TK_REAL_CONSTANT) { + } else if (tk.type==TK_REAL_CONSTANT) { - ConstantNode *constant = parser.create_node(p_parent); - constant->value=parser.get_token().text.operator String().to_double(); + ConstantNode *constant = alloc_node(); + constant->value=tk.constant; constant->datatype=TYPE_FLOAT; expr=constant; - parser.advance(); - } else if (parser.get_token_type()==TK_TRUE) { + + } else if (tk.type==TK_REAL_CONSTANT) { + + + ConstantNode *constant = alloc_node(); + constant->value=int(tk.constant); + constant->datatype=TYPE_INT; + expr=constant; + + } else if (tk.type==TK_TRUE) { //print_line("found true"); //handle true constant - ConstantNode *constant = parser.create_node(p_parent); + ConstantNode *constant = alloc_node(); constant->value=true; constant->datatype=TYPE_BOOL; expr=constant; - parser.advance(); - } else if (parser.get_token_type()==TK_FALSE) { + + } else if (tk.type==TK_FALSE) { //handle false constant - ConstantNode *constant = parser.create_node(p_parent); + ConstantNode *constant = alloc_node(); constant->value=false; constant->datatype=TYPE_BOOL; expr=constant; - parser.advance(); - } else if (parser.get_token_type()==TK_TYPE_VOID) { + + } else if (tk.type==TK_TYPE_VOID) { //make sure void is not used in expression - parser.set_error("Void value not allowed in Expression"); - return ERR_PARSE_ERROR; - } else if (parser.get_token_type(1)==TK_PARENTHESIS_OPEN && (is_token_nonvoid_datatype(parser.get_token_type()) || parser.get_token_type()==TK_INDENTIFIER)) { + _set_error("Void value not allowed in Expression"); + return NULL; + } else if (is_token_precision(tk.type) || is_token_nonvoid_datatype(tk.type)) { + //basic type constructor + + OperatorNode *func = alloc_node(); + func->op=OP_CONSTRUCT; - //function or constructor - StringName name; - DataType constructor=TYPE_VOID; - if (is_token_nonvoid_datatype(parser.get_token_type())) { + if (is_token_precision(tk.type)) { - constructor=get_token_datatype(parser.get_token_type()); - switch(get_token_datatype(parser.get_token_type())) { - case TYPE_BOOL: name="bool"; break; - case TYPE_FLOAT: name="float"; break; - case TYPE_VEC2: name="vec2"; break; - case TYPE_VEC3: name="vec3"; break; - case TYPE_VEC4: name="vec4"; break; - case TYPE_MAT2: name="mat2"; break; - case TYPE_MAT3: name="mat3"; break; - case TYPE_MAT4: name="mat4"; break; - default: ERR_FAIL_V(ERR_BUG); - } - } else { - - name=parser.get_token().text; + func->return_precision_cache=get_token_precision(tk.type); + tk=_get_token(); } - if (!test_existing_identifier(p_parent,name)) { - - parser.set_error("Unknown identifier in expression: "+name); - return ERR_PARSE_ERROR; - } - - parser.advance(2); - - OperatorNode *func = parser.create_node(p_parent); - - func->op=constructor!=TYPE_VOID?OP_CONSTRUCT:OP_CALL; - - VariableNode *funcname = parser.create_node(func); - funcname->name=name; + VariableNode *funcname = alloc_node(); + funcname->name=get_datatype_name(get_token_datatype(tk.type)); func->arguments.push_back(funcname); - //parse parameters + tk=_get_token(); + if (tk.type!=TK_PARENTHESIS_OPEN) { + _set_error("Expected '(' after type name"); + return NULL; + } + + if (!_parse_function_arguments(p_block,p_builtin_types,func)) + return NULL; + + + if (!_validate_function_call(p_block,func,&func->return_cache)) { + _set_error("No matching constructor found for: '"+String(funcname->name)+"'"); + return NULL; + } + //validate_Function_call() + + expr=func; + + + } else if (tk.type==TK_IDENTIFIER) { + + + tk=_get_token(); + if (tk.type==TK_PARENTHESIS_OPEN) { + //a function + StringName name = tk.text; + + OperatorNode *func = alloc_node(); + func->op=OP_CALL; + VariableNode *funcname = alloc_node(); + funcname->name=name; + func->arguments.push_back(funcname); + + if (!_parse_function_arguments(p_block,p_builtin_types,func)) + return NULL; + + + if (!_validate_function_call(p_block,func,&func->return_cache)) { + _set_error("No matching function found for: '"+String(funcname->name)+"'"); + return NULL; + } + + expr=func; - if (parser.get_token_type()==TK_PARENTHESIS_CLOSE) { - parser.advance(); } else { + //an identifier + char_idx=pos; //rollback - while(true) { + StringName identifier = tk.text; + DataType data_type; + IdentifierType ident_type; - Node *arg=NULL; - Error err = parse_expression(parser,func,&arg); - if (err) - return err; - func->arguments.push_back(arg); - - if (parser.get_token_type()==TK_PARENTHESIS_CLOSE) { - parser.advance(); - break; - - } else if (parser.get_token_type()==TK_COMMA) { - - if (parser.get_token_type(1)==TK_PARENTHESIS_CLOSE) { - - parser.set_error("Expression expected"); - return ERR_PARSE_ERROR; - } - - parser.advance(); - } else { - // something is broken - parser.set_error("Expected ',' or ')'"); - return ERR_PARSE_ERROR; - } - + if (!_find_identifier(p_block,p_builtin_types,identifier,&data_type,&ident_type)) { + _set_error("Unknown identifier in expression: "+String(identifier)); + return NULL; } - } - expr=validate_function_call(parser,func); - if (!expr) { + if (ident_type==IDENTIFIER_FUNCTION) { + _set_error("Can't use function as identifier: "+String(identifier)); + return NULL; + } - parser.set_error("Invalid arguments to function/constructor: "+StringName(name)); - return ERR_PARSE_ERROR; + + VariableNode *varname = alloc_node(); + varname->name=identifier; + varname->datatype_cache=data_type; + expr=varname; } - } else if (parser.get_token_type()==TK_INDENTIFIER) { - //probably variable + } else if (tk.type==TK_OP_ADD) { + continue; //this one does nothing + } if (tk.type==TK_OP_SUB || tk.type==TK_OP_NOT || tk.type==TK_OP_BIT_INVERT || tk.type==TK_OP_INCREMENT || tk.type==TK_OP_DECREMENT) { - Node *node =p_parent; - bool existing=false; - DataType datatype; - StringName identifier=parser.get_token().text; - - while(node) { - - if (node->type==Node::TYPE_BLOCK) { - - BlockNode *block = (BlockNode*)node; - - if (block->variables.has(identifier)) { - existing=true; - datatype=block->variables[identifier]; - break; - } - } - - if (node->type==Node::TYPE_FUNCTION) { - - FunctionNode *function=(FunctionNode*)node; - for(int i=0;iarguments.size();i++) { - if (function->arguments[i].name==identifier) { - existing=true; - datatype=function->arguments[i].type; - break; - } - } - - if (existing) - break; - - } - - if (node->type==Node::TYPE_PROGRAM) { - - ProgramNode *program = (ProgramNode*)node; - if (program->builtin_variables.has(identifier)) { - datatype = program->builtin_variables[identifier]; - existing=true; - break; - } - if (program->uniforms.has(identifier)) { - datatype = program->uniforms[identifier].type; - existing=true; - break; - } - - } - - node=node->parent; - } - - if (!existing) { - - parser.set_error("Nonexistent identifier in expression: "+identifier); - return ERR_PARSE_ERROR; - - } - - VariableNode *varname = parser.create_node(p_parent); - varname->name=identifier; - varname->datatype_cache=datatype; - parser.advance(); - expr=varname; - - } else if (parser.get_token_type()==TK_OP_SUB || parser.get_token_type()==TK_OP_NOT) { - - //single prefix operators - TokenType token_type=parser.get_token_type(); - parser.advance(); - //Node *subexpr=NULL; - //Error err = parse_expression(parser,p_parent,&subexpr); - //if (err) - // return err; - - //OperatorNode *op = parser.create_node(p_parent); Expression e; e.is_op=true; - switch(token_type) { - case TK_OP_SUB: e.op=TK_OP_NEG; break; - case TK_OP_NOT: e.op=TK_OP_NOT; break; - //case TK_OP_PLUS_PLUS: op->op=OP_PLUS_PLUS; break; - //case TK_OP_MINUS_MINUS: op->op=OP_MINUS_MINUS; break; - default: ERR_FAIL_V(ERR_BUG); + switch(tk.type) { + case TK_OP_SUB: e.op=OP_NEGATE; break; + case TK_OP_NOT: e.op=OP_NOT; break; + case TK_OP_BIT_INVERT: e.op=OP_BIT_INVERT; break; + case TK_OP_INCREMENT: e.op=OP_INCREMENT; break; + case TK_OP_DECREMENT: e.op=OP_DECREMENT; break; + default: ERR_FAIL_V(NULL); } expression.push_back(e); - continue; } else { - print_line("found bug?"); - print_line("misplaced token: "+String(token_names[parser.get_token_type()])); - - parser.set_error("Error parsing expression, misplaced: "+String(token_names[parser.get_token_type()])); - return ERR_PARSE_ERROR; + _set_error("Expected expression, found: "+get_token_text(tk)); + return NULL; //nothing } - ERR_FAIL_COND_V(!expr,ERR_BUG); + ERR_FAIL_COND_V(!expr,NULL); /* OK now see what's NEXT to the operator.. */ /* OK now see what's NEXT to the operator.. */ /* OK now see what's NEXT to the operator.. */ + while(true) { + int pos = char_idx; + tk=_get_token(); - if (parser.get_token_type()==TK_PERIOD) { + if (tk.type==TK_PERIOD) { - if (parser.get_token_type(1)!=TK_INDENTIFIER) { - parser.set_error("Expected identifier as member"); - return ERR_PARSE_ERROR; - } + tk=_get_token(); + if (tk.type!=TK_IDENTIFIER) { + _set_error("Expected identifier as member"); + return NULL; + } - DataType dt = compute_node_type(expr); - String ident = parser.get_token(1).text; + DataType dt = expr->get_datatype(); + String ident = tk.text; - bool ok=true; - DataType member_type; - switch(dt) { - case TYPE_VEC2: { + bool ok=true; + DataType member_type; + switch(dt) { + case TYPE_BVEC2: + case TYPE_IVEC2: + case TYPE_UVEC2: + case TYPE_VEC2: { - int l = ident.length(); - if (l==1) { - member_type=TYPE_FLOAT; - } else if (l==2) { - member_type=TYPE_VEC2; - } else { - ok=false; - break; - } - - const CharType *c=ident.ptr(); - for(int i=0;i(); + mn->basetype=dt; + mn->datatype=member_type; + mn->name=ident; + mn->owner=expr; + expr=mn; + + + //todo + //member (period) has priority over any operator + //creates a subindexing expression in place + + + /*} else if (tk.type==TK_BRACKET_OPEN) { + //todo + //subindexing has priority over any operator + //creates a subindexing expression in place + + */ + } else if (tk.type==TK_OP_INCREMENT || tk.type==TK_OP_DECREMENT) { + + OperatorNode *op = alloc_node(); + op->op=tk.type==TK_OP_DECREMENT ? OP_POST_DECREMENT : OP_POST_INCREMENT; + op->arguments.push_back(expr); + + if (!_validate_operator(op,&op->return_cache)) { + _set_error("Invalid base type for increment/decrement operator"); + return NULL; + } + expr=op; + } else { + char_idx=pos; //rollback + break; } - - if (!ok) { - - parser.set_error("Invalid member for expression: ."+ident); - return ERR_PARSE_ERROR; - } - - MemberNode *mn = parser.create_node(p_parent); - mn->basetype=dt; - mn->datatype=member_type; - mn->name=ident; - mn->owner=expr; - expr=mn; - - parser.advance(2); - //todo - //member (period) has priority over any operator - //creates a subindexing expression in place - - - } else if (parser.get_token_type()==TK_BRACKET_OPEN) { - //todo - //subindexing has priority over any operator - //creates a subindexing expression in place - - - } /*else if (parser.get_token_type()==TK_OP_PLUS_PLUS || parser.get_token_type()==TK_OP_MINUS_MINUS) { - //todo - //inc/dec operators have priority over any operator - //creates a subindexing expression in place - //return OK; //wtfs - - } */ + } Expression e; e.is_op=false; e.node=expr; expression.push_back(e); + pos = char_idx; + tk = _get_token(); - if (is_token_operator(parser.get_token_type())) { + if (is_token_operator(tk.type)) { Expression o; o.is_op=true; - o.op=parser.get_token_type(); + + switch(tk.type) { + + case TK_OP_EQUAL: o.op = OP_EQUAL; break; + case TK_OP_NOT_EQUAL: o.op = OP_NOT_EQUAL; break; + case TK_OP_LESS: o.op = OP_LESS; break; + case TK_OP_LESS_EQUAL: o.op = OP_LESS_EQUAL; break; + case TK_OP_GREATER: o.op = OP_GREATER; break; + case TK_OP_GREATER_EQUAL: o.op = OP_GREATER_EQUAL; break; + case TK_OP_AND: o.op = OP_AND; break; + case TK_OP_OR: o.op = OP_OR; break; + case TK_OP_ADD: o.op = OP_ADD; break; + case TK_OP_SUB: o.op = OP_SUB; break; + case TK_OP_MUL: o.op = OP_MUL; break; + case TK_OP_DIV: o.op = OP_DIV; break; + case TK_OP_MOD: o.op = OP_MOD; break; + case TK_OP_SHIFT_LEFT: o.op = OP_SHIFT_LEFT; break; + case TK_OP_SHIFT_RIGHT: o.op = OP_SHIFT_RIGHT; break; + case TK_OP_ASSIGN: o.op = OP_ASSIGN; break; + case TK_OP_ASSIGN_ADD: o.op = OP_ASSIGN_ADD; break; + case TK_OP_ASSIGN_SUB: o.op = OP_ASSIGN_SUB; break; + case TK_OP_ASSIGN_MUL: o.op = OP_ASSIGN_MUL; break; + case TK_OP_ASSIGN_DIV: o.op = OP_ASSIGN_DIV; break; + case TK_OP_ASSIGN_MOD: o.op = OP_ASSIGN_MOD; break; + case TK_OP_ASSIGN_SHIFT_LEFT: o.op = OP_ASSIGN_SHIFT_LEFT; break; + case TK_OP_ASSIGN_SHIFT_RIGHT: o.op = OP_ASSIGN_SHIFT_RIGHT; break; + case TK_OP_ASSIGN_BIT_AND: o.op = OP_ASSIGN_BIT_AND; break; + case TK_OP_ASSIGN_BIT_OR: o.op = OP_ASSIGN_BIT_OR; break; + case TK_OP_ASSIGN_BIT_XOR: o.op = OP_ASSIGN_BIT_XOR; break; + case TK_OP_BIT_AND: o.op = OP_BIT_AND; break; + case TK_OP_BIT_OR: o.op = OP_BIT_OR ; break; + case TK_OP_BIT_XOR: o.op = OP_BIT_XOR; break; + case TK_QUESTION: o.op = OP_SELECT_IF; break; + case TK_COLON: o.op = OP_SELECT_ELSE; break; + default: { + _set_error("Invalid token for operator: "+get_token_text(tk)); + return NULL; + } + } break; + expression.push_back(o); - parser.advance(); + } else { + char_idx=pos; //something else, so rollback and end break; } } @@ -1948,6 +2316,7 @@ Error ShaderLanguage::parse_expression(Parser& parser,Node *p_parent,Node **r_ex int next_op=-1; int min_priority=0xFFFFF; bool is_unary=false; + bool is_ternary=false; for(int i=0;i=next_op;i--) { - OperatorNode *op = parser.create_node(p_parent); - op->op=get_token_operator(expression[i].op); + OperatorNode *op = alloc_node(); + op->op=expression[i].op; op->arguments.push_back(expression[i+1].node); expression[i].is_op=false; - expression[i].node=validate_operator(parser,op); - if (!expression[i].node) { + expression[i].node=op; + + + if (!_validate_operator(op,&op->return_cache)) { String at; for(int i=0;iarguments.size();i++) { if (i>0) at+=" and "; - at+=get_datatype_name(compute_node_type(op->arguments[i])); + at+=get_datatype_name(op->arguments[i]->get_datatype()); } - parser.set_error("Invalid argument to unary operator "+String(token_names[op->op])+": "+at); - return ERR_PARSE_ERROR; + _set_error("Invalid argument to unary operator: "+at); + return NULL; } expression.remove(i+1); } + + } else if (is_ternary) { + + if (next_op <1 || next_op>=(expression.size()-1)) { + _set_error("Parser bug.."); + ERR_FAIL_V(NULL); + } + + if (next_op+2 >= expression.size() || !expression[next_op+2].is_op || expression[next_op+2].op!=OP_SELECT_ELSE) { + _set_error("Mising matching ':' for select operator"); + return NULL; + } + + + + OperatorNode *op = alloc_node(); + op->op=expression[next_op].op; + op->arguments.push_back(expression[next_op-1].node); + op->arguments.push_back(expression[next_op+1].node); + op->arguments.push_back(expression[next_op+3].node); + + expression[next_op-1].is_op=false; + expression[next_op-1].node=op; + if (!_validate_operator(op,&op->return_cache)) { + + String at; + for(int i=0;iarguments.size();i++) { + if (i>0) + at+=" and "; + at+=get_datatype_name(op->arguments[i]->get_datatype()); + + } + _set_error("Invalid argument to ternary ?: operator: "+at); + return NULL; + } + + for(int i=0;i<4;i++) { + expression.remove(next_op); + } + } else { if (next_op <1 || next_op>=(expression.size()-1)) { - parser.set_error("Parser bug.."); - ERR_FAIL_V(ERR_BUG); + _set_error("Parser bug.."); + ERR_FAIL_V(NULL); } - OperatorNode *op = parser.create_node(p_parent); - op->op=get_token_operator(expression[next_op].op); + OperatorNode *op = alloc_node(); + op->op=expression[next_op].op; if (expression[next_op-1].is_op) { - parser.set_error("Parser bug.."); - ERR_FAIL_V(ERR_BUG); + _set_error("Parser bug.."); + ERR_FAIL_V(NULL); } if (expression[next_op+1].is_op) { @@ -2071,645 +2486,660 @@ Error ShaderLanguage::parse_expression(Parser& parser,Node *p_parent,Node **r_ex // can be followed by an unary op in a valid combination, // due to how precedence works, unaries will always dissapear first - parser.set_error("Parser bug.."); + _set_error("Parser bug.."); } op->arguments.push_back(expression[next_op-1].node); //expression goes as left op->arguments.push_back(expression[next_op+1].node); //next expression goes as right + expression[next_op-1].node=op; //replace all 3 nodes by this operator and make it an expression - expression[next_op-1].node=validate_operator(parser,op); - if (!expression[next_op-1].node) { + + if (!_validate_operator(op,&op->return_cache)) { String at; for(int i=0;iarguments.size();i++) { if (i>0) at+=" and "; - at+=get_datatype_name(compute_node_type(op->arguments[i])); + at+=get_datatype_name(op->arguments[i]->get_datatype()); } - static const char *op_names[OP_MAX]={"=","+","-","*","/","+=","-=","*=","/=","-","!","==","!=","<=",">=","<",">","||","&&","call","()"}; - - parser.set_error("Invalid arguments to operator "+String(op_names[op->op])+": "+at); - return ERR_PARSE_ERROR; + _set_error("Invalid arguments to operator: "+at); + return NULL; } + expression.remove(next_op); expression.remove(next_op); } -#if 0 - OperatorNode *op = parser.create_node(p_parent); - op->op=get_token_operator(operators[next_op]); - - op->arguments.push_back(expressions[next_op]); //expression goes as left - op->arguments.push_back(expressions[next_op+1]); //next expression goes as right - - expressions[next_op]=validate_operator(parser,op); - if (!expressions[next_op]) { - - String at; - for(int i=0;iarguments.size();i++) { - if (i>0) - at+=" and "; - at+=get_datatype_name(compute_node_type(op->arguments[i])); - - } - parser.set_error("Invalid arguments to operator "+String(token_names[operators[next_op]])+": "+at); - return ERR_PARSE_ERROR; - } - - - expressions.remove(next_op+1); - operators.remove(next_op); -#endif - } - *r_expr=expression[0].node; + return expression[0].node; +} - return OK; -/* - TokenType token_type=parser.get_token_type(); - OperatorNode *op = parser.create_node(p_parent); - op->op=get_token_operator(parser.get_token_type()); +ShaderLanguage::Node* ShaderLanguage::_reduce_expression(BlockNode *p_block, ShaderLanguage::Node *p_node) { - op->arguments.push_back(*r_expr); //expression goes as left - parser.advance(); - Node *right_expr=NULL; - Error err = parse_expression(parser,p_parent,&right_expr); - if (err) - return err; - op->arguments.push_back(right_expr); + if (p_node->type!=Node::TYPE_OPERATOR) + return p_node; - if (!validate_operator(op)) { + //for now only reduce simple constructors + OperatorNode *op=static_cast(p_node); - parser.set_error("Invalid arguments to operator "+String(token_names[token_type])); - return ERR_PARSE_ERROR; + if (op->op==OP_CONSTRUCT) { + + ERR_FAIL_COND_V(op->arguments[0]->type!=Node::TYPE_VARIABLE,p_node); + VariableNode *vn = static_cast(op->arguments[0]); + StringName name=vn->name; + + if (name=="vec2" || name=="vec3" || name=="vec4") { + Vector values; + + for(int i=1;iarguments.size();i++) { + + op->arguments[i]=_reduce_expression(p_block,op->arguments[i]); + if (op->arguments[i]->type==Node::TYPE_CONSTANT) { + ConstantNode *cn = static_cast(op->arguments[i]); + values.push_back(cn->value); + + } else { + return p_node; //do not bother, not reducible + } } -*/ + ConstantNode *cn=alloc_node(); + + if (name=="vec2") { + cn->datatype=TYPE_VEC2; + cn->value=Vector2(values[0],values[1]); + } else if (name=="vec3") { + cn->datatype=TYPE_VEC3; + cn->value=Vector3(values[0],values[1],values[2]); + } else if (name=="vec4") { + cn->datatype=TYPE_VEC4; + cn->value=Plane(values[0],values[1],values[2],values[3]); + } else { + ERR_FAIL_V(p_node); + } + + return cn; + } + } + + + return p_node; + } -Error ShaderLanguage::parse_variable_declaration(Parser& parser,BlockNode *p_block) { - bool uniform = parser.get_token(-1).type==TK_UNIFORM; - - DataType type=get_token_datatype(parser.get_token_type(0)); - bool iscolor = parser.get_token_type(0)==TK_TYPE_COLOR; - - if (type==TYPE_VOID) { - - parser.set_error("Cannot Declare a 'void' Variable"); - return ERR_PARSE_ERROR; - } - - if (type==TYPE_TEXTURE && !uniform) { - - parser.set_error("Cannot Declare a Non-Uniform Texture"); - return ERR_PARSE_ERROR; - } - if (type==TYPE_CUBEMAP && !uniform) { - - parser.set_error("Cannot Declare a Non-Uniform Cubemap"); - return ERR_PARSE_ERROR; - } +ShaderLanguage::Node* ShaderLanguage::_parse_and_reduce_expression(BlockNode *p_block, const Map &p_builtin_types) { - parser.advance(); - int found=0; + ShaderLanguage::Node* expr = _parse_expression(p_block,p_builtin_types); + + expr = _reduce_expression(p_block,expr); + + return expr; +} + + + + +Error ShaderLanguage::_parse_block(BlockNode* p_block,const Map &p_builtin_types,bool p_just_one,bool p_can_break,bool p_can_continue) { while(true) { + int pos = char_idx; - if (found && parser.get_token_type()!=TK_COMMA) { - break; - } + Token tk = _get_token(); + if (tk.type==TK_CURLY_BRACKET_CLOSE) { //end of block + if (p_just_one) { + _set_error("Unexpected '}'"); + return ERR_PARSE_ERROR; + } - if (parser.get_token_type()!=TK_INDENTIFIER) { + return OK; - parser.set_error("Identifier Expected"); - return ERR_PARSE_ERROR; + } else if (is_token_precision(tk.type) || is_token_nonvoid_datatype(tk.type)) { + DataPrecision precision=PRECISION_DEFAULT; + if (is_token_precision(tk.type)) { + precision=get_token_precision(tk.type); + tk = _get_token(); + if (!is_token_nonvoid_datatype(tk.type)) { + _set_error("Expected datatype after precission"); + return ERR_PARSE_ERROR; + } + } - } + DataType type = get_token_datatype(tk.type); - StringName name = parser.get_token().text; + tk = _get_token(); - if (test_existing_identifier(p_block,name)) { - parser.set_error("Duplicate Identifier (existing variable/function): "+name); - return ERR_PARSE_ERROR; - } + while(true) { - found=true; - - parser.advance(); - //see if declaration has an initializer - if (parser.get_token_type()==TK_OP_ASSIGN) { - parser.advance(); - OperatorNode * op = parser.create_node(p_block); - VariableNode * var = parser.create_node(op); - var->name=name; - var->datatype_cache=type; - var->uniform=uniform; - Node *expr; - Error err = parse_expression(parser,p_block,&expr); - - if (err) - return err; - - if (var->uniform) { - - if (expr->type!=Node::TYPE_CONSTANT) { - - parser.set_error("Uniform can only be initialized to a constant."); + if (tk.type!=TK_IDENTIFIER) { + _set_error("Expected identifier after type"); return ERR_PARSE_ERROR; } - Uniform u; - u.order=parser.program->uniforms.size(); - u.type=type; - u.default_value=static_cast(expr)->value; - if (iscolor && u.default_value.get_type()==Variant::PLANE) { - Color c; - Plane p = u.default_value; - c=Color(p.normal.x,p.normal.y,p.normal.z,p.d); - u.default_value=c; - } - parser.program->uniforms[var->name]=u; - } else { - op->op=OP_ASSIGN; - op->arguments.push_back(var); - op->arguments.push_back(expr); - Node *n=validate_operator(parser,op); - if (!n) { - parser.set_error("Invalid initializer for variable: "+name); + StringName name = tk.text; + if (_find_identifier(p_block,p_builtin_types,name)) { + _set_error("Redefinition of '"+String(name)+"'"); return ERR_PARSE_ERROR; } - p_block->statements.push_back(n); + + BlockNode::Variable var; + var.type=type; + var.precision=precision; + p_block->variables[name]=var; + + tk = _get_token(); + + if (tk.type==TK_OP_ASSIGN) { + //variable creted with assignment! must parse an expression + Node* n = _parse_and_reduce_expression(p_block,p_builtin_types); + if (!n) + return ERR_PARSE_ERROR; + + OperatorNode *assign = alloc_node(); + VariableNode *vnode = alloc_node(); + vnode->name=name; + vnode->datatype_cache=type; + assign->arguments.push_back(vnode); + assign->arguments.push_back(n); + assign->op=OP_ASSIGN; + p_block->statements.push_back(assign); + tk = _get_token(); + } + + if (tk.type==TK_COMMA) { + tk = _get_token(); + //another variable + } else if (tk.type==TK_SEMICOLON) { + break; + } else { + _set_error("Expected ',' or ';' after variable"); + return ERR_PARSE_ERROR; + } + } + } else if (tk.type==TK_CURLY_BRACKET_OPEN) { + //a sub block, just because.. + BlockNode* block = alloc_node(); + block->parent_block=p_block; + _parse_block(block,p_builtin_types,false,p_can_break,p_can_continue); + } else if (tk.type==TK_CF_IF) { + //if () {} + tk = _get_token(); + if (tk.type!=TK_PARENTHESIS_OPEN) { + _set_error("Expected '(' after if"); + return ERR_PARSE_ERROR; } - } else { - //initialize it EMPTY + ControlFlowNode *cf = alloc_node(); + cf->flow_op=FLOW_OP_IF; + Node* n = _parse_and_reduce_expression(p_block,p_builtin_types); + if (!n) + return ERR_PARSE_ERROR; - OperatorNode * op = parser.create_node(p_block); - VariableNode * var = parser.create_node(op); - ConstantNode * con = parser.create_node(op); - - var->name=name; - var->datatype_cache=type; - var->uniform=uniform; - con->datatype=type; - - switch(type) { - case TYPE_BOOL: con->value=false; break; - case TYPE_FLOAT: con->value=0.0; break; - case TYPE_VEC2: con->value=Vector2(); break; - case TYPE_VEC3: con->value=Vector3(); break; - case TYPE_VEC4: con->value=iscolor?Variant(Color()):Variant(Plane()); break; - case TYPE_MAT2: con->value=Matrix32(); break; - case TYPE_MAT3: con->value=Matrix3(); break; - case TYPE_MAT4: con->value=Transform(); break; - case TYPE_TEXTURE: - case TYPE_CUBEMAP: con->value=RID(); break; - default: {} + tk = _get_token(); + if (tk.type!=TK_PARENTHESIS_CLOSE) { + _set_error("Expected '(' after expression"); + return ERR_PARSE_ERROR; } - if (uniform) { - Uniform u; - u.type=type; - u.default_value=con->value; - u.order=parser.program->uniforms.size(); - parser.program->uniforms[var->name]=u; + BlockNode* block = alloc_node(); + block->parent_block=p_block; + cf->blocks.push_back(block); + + + Error err=_parse_block(p_block,p_builtin_types,true,p_can_break,p_can_continue); + + pos=char_idx; + tk = _get_token(); + if (tk.type==TK_CF_ELSE) { + + block = alloc_node(); + block->parent_block=p_block; + cf->blocks.push_back(block); + err=_parse_block(p_block,p_builtin_types,true,p_can_break,p_can_continue); } else { - op->op=OP_ASSIGN; - op->arguments.push_back(var); - op->arguments.push_back(con); - p_block->statements.push_back(op); + char_idx=pos; //rollback } - } - - if (!uniform) - p_block->variables[name]=type; - - } - - if (parser.get_token_type()!=TK_SEMICOLON) { - parser.set_error("Expected ';'"); - return ERR_PARSE_ERROR; - } - - - return OK; - -} - -Error ShaderLanguage::parse_flow_if(Parser& parser,Node *p_parent,Node **r_statement) { - - ControlFlowNode *cf = parser.create_node(p_parent); - - cf->flow_op=FLOW_OP_IF; - - parser.advance(); - - if (parser.get_token_type()!=TK_PARENTHESIS_OPEN) { - parser.set_error("Expected '(' after 'if'"); - return ERR_PARSE_ERROR; - } - parser.advance(); - - Node *expression=NULL; - Error err = parse_expression(parser,cf,&expression); - if (err) - return err; - - if (compute_node_type(expression)!=TYPE_BOOL) { - - parser.set_error("Expression for 'if' is not boolean"); - return ERR_PARSE_ERROR; - } - - cf->statements.push_back(expression); - - if (parser.get_token_type()!=TK_PARENTHESIS_CLOSE) { - parser.set_error("Expected ')' after expression"); - return ERR_PARSE_ERROR; - } - - parser.advance(); - - if (parser.get_token_type()!=TK_CURLY_BRACKET_OPEN) { - parser.set_error("Expected statement block after 'if()'"); - return ERR_PARSE_ERROR; - } - - Node *substatement=NULL; - err = parse_statement(parser,cf,&substatement); - if (err) - return err; - - cf->statements.push_back(substatement); - - if (parser.get_token_type()==TK_CF_ELSE) { - - parser.advance(); - - if (parser.get_token_type()!=TK_CURLY_BRACKET_OPEN) { - parser.set_error("Expected statement block after 'else'"); - return ERR_PARSE_ERROR; - } - - substatement=NULL; - err = parse_statement(parser,cf,&substatement); - if (err) - return err; - - cf->statements.push_back(substatement); - } - - - - *r_statement=cf; - - return OK; -} - -Error ShaderLanguage::parse_flow_return(Parser& parser,Node *p_parent,Node **r_statement) { - - - FunctionNode *function=NULL; - - Node *parent=p_parent; - - while(parent) { - - if (parent->type==Node::TYPE_FUNCTION) { - - function=(FunctionNode*)parent; - break; - } - - parent=parent->parent; - } - - if (!function) { - - parser.set_error("'return' must be inside a function"); - return ERR_PARSE_ERROR; - } - - ControlFlowNode *cf = parser.create_node(p_parent); - - cf->flow_op=FLOW_OP_RETURN; - - parser.advance(); - - if (function->return_type!=TYPE_VOID) { - // should expect a return expression. - - Node *expr=NULL; - Error err = parse_expression(parser,cf,&expr); - if (err) - return err; - - if (compute_node_type(expr)!=function->return_type) { - parser.set_error("Invalid type for 'return' expression"); - return ERR_PARSE_ERROR; - } - cf->statements.push_back(expr); - } - - *r_statement=cf; - - - if (parser.get_token_type()!=TK_SEMICOLON) { - parser.set_error("Expected ';'"); - return ERR_PARSE_ERROR; - } - - return OK; -} - -Error ShaderLanguage::parse_statement(Parser& parser,Node *p_parent,Node **r_statement) { - - *r_statement=NULL; - - TokenType token_type = parser.get_token_type(); - - if (token_type==TK_CURLY_BRACKET_OPEN) { - //sub-block - parser.advance(); - BlockNode *block = parser.create_node(p_parent); - - *r_statement=block; - return parse_block(parser,block); - } else if (token_type==TK_SEMICOLON) { - // empty ; - parser.advance(); - return OK; - } else if (token_type==TK_CF_IF) { - return parse_flow_if(parser,p_parent,r_statement); - - } else if (token_type==TK_CF_RETURN) { - return parse_flow_return(parser,p_parent,r_statement); - } else { - Error err=parse_expression(parser,p_parent,r_statement); - - if (err) - return err; - - if (parser.get_token_type()!=TK_SEMICOLON) { - parser.set_error("Expected ';'"); - return ERR_PARSE_ERROR; - } - - } - - return OK; -} - -Error ShaderLanguage::parse_block(Parser& parser,BlockNode *p_block) { - - while(true) { - - if (parser.is_at_end()) { - if (p_block->parent->type!=Node::TYPE_PROGRAM) { - parser.set_error("Unexpected End of File"); - return ERR_PARSE_ERROR; - } - return OK; //bye - } - - TokenType token_type = parser.get_token_type(); - - if (token_type==TK_CURLY_BRACKET_CLOSE) { - if (p_block->parent->type==Node::TYPE_PROGRAM) { - parser.set_error("Unexpected '}'"); - return ERR_PARSE_ERROR; - } - parser.advance(); - return OK; // exit block - - } else if (token_type==TK_UNIFORM) { - - if (p_block!=parser.program->body) { - - parser.set_error("Uniform only allowed in main program body."); - return ERR_PARSE_ERROR; - } - parser.advance(); - Error err=parse_variable_declaration(parser,p_block); - if (err) - return err; - - } else if (is_token_datatype(token_type)) { - - Error err=OK; - if (parser_is_at_function(parser)) - err = parse_function(parser,p_block); - else { - err = parse_variable_declaration(parser,p_block); - } - - if (err) - return err; - } else { - // must be a statement - Node *statement=NULL; - Error err = parse_statement(parser,p_block,&statement); - if (err) - return err; - if (statement) { - p_block->statements.push_back(statement); + //nothng else, so expression + char_idx=pos; //rollback + _parse_and_reduce_expression(p_block,p_builtin_types); + tk = _get_token(); + + if (tk.type!=TK_SEMICOLON) { + _set_error("Expected ';' after statement"); + return ERR_PARSE_ERROR; } - } + + if (p_just_one) + break; } return OK; } - -Error ShaderLanguage::parse(const Vector& p_tokens,ShaderType p_type,CompileFunc p_compile_func,void *p_userdata,String *r_error,int *r_err_line,int *r_err_column) { +Error ShaderLanguage::_parse_shader(const Map< StringName, Map > &p_functions, const Set &p_render_modes) { - Parser parser(p_tokens); - parser.program = parser.create_node(NULL); - parser.program->body = parser.create_node(parser.program); + Token tk = _get_token(); + + while(tk.type!=TK_EOF) { + + switch(tk.type) { + case TK_RENDER_MODE: { + + while(true) { + tk = _get_token(); + if (tk.type!=TK_IDENTIFIER) { + _set_error("Expected identifier for render mode"); + return ERR_PARSE_ERROR; + } + + if (!p_render_modes.has(tk.text)) { + _set_error("Invalid render mode: '"+String(tk.text)+"'"); + return ERR_PARSE_ERROR; + } + + if (shader->render_modes.has(tk.text)) { + _set_error("Duplicate render mode: '"+String(tk.text)+"'"); + return ERR_PARSE_ERROR; + } + + shader->render_modes.insert(tk.text); + + tk = _get_token(); + if (tk.type==TK_COMMA) { + //all good, do nothing + } else if (tk.type==TK_SEMICOLON) { + break; //done + } else { + _set_error("Unexpected token: "+get_token_text(tk)); + return ERR_PARSE_ERROR; + } + } + } break; + case TK_UNIFORM: + case TK_VARYING: { + + bool uniform = tk.type==TK_UNIFORM; + DataPrecision precision = PRECISION_DEFAULT; + DataType type; + StringName name; + + tk = _get_token(); + if (is_token_precision(tk.type)) { + precision=get_token_precision(tk.type); + tk = _get_token(); + } + + if (!is_token_datatype(tk.type)) { + _set_error("Expected datatype. "); + return ERR_PARSE_ERROR; + } + + type = get_token_datatype(tk.type); + + if (type==TYPE_VOID) { + _set_error("void datatype not allowed here"); + return ERR_PARSE_ERROR; + } + if (!uniform && typeTYPE_VEC4) { + _set_error("Invalid type for varying, only float,vec2,vec3,vec4 allowed."); + return ERR_PARSE_ERROR; + } + + tk = _get_token(); + if (tk.type!=TK_IDENTIFIER) { + _set_error("Expected identifier!"); + return ERR_PARSE_ERROR; + } + + name=tk.text; + + if (_find_identifier(NULL,Map(),name)) { + _set_error("Redefinition of '"+String(name)+"'"); + return ERR_PARSE_ERROR; + } + + if (uniform) { + + ShaderNode::Uniform uniform; + uniform.order=shader->uniforms.size(); + uniform.type=type; + uniform.precission=precision; + + shader->uniforms[name]=uniform; + //todo parse default value + + tk = _get_token(); + if (tk.type==TK_OP_ASSIGN) { + + Node* expr = _parse_and_reduce_expression(NULL,Map()); + if (!expr) + return ERR_PARSE_ERROR; + if (expr->type!=Node::TYPE_CONSTANT) { + _set_error("Expected constant expression after '='"); + return ERR_PARSE_ERROR; + } + + uniform.default_value=static_cast(expr)->value; + tk = _get_token(); + } + + if (tk.type==TK_COLON) { + //hint + tk = _get_token(); + if (tk.type==TK_HINT_WHITE_TEXTURE) { + uniform.hint=ShaderNode::Uniform::HINT_WHITE_TEXTURE; + } else if (tk.type==TK_HINT_BLACK_TEXTURE) { + uniform.hint=ShaderNode::Uniform::HINT_BLACK_TEXTURE; + } else if (tk.type==TK_HINT_NORMAL_TEXTURE) { + uniform.hint=ShaderNode::Uniform::HINT_NORMAL_TEXTURE; + } else if (tk.type==TK_HINT_RANGE) { + + uniform.hint=ShaderNode::Uniform::HINT_RANGE; + if (type!=TYPE_FLOAT && type!=TYPE_INT) { + _set_error("Range hint is for float and int only"); + return ERR_PARSE_ERROR; + } + + tk = _get_token(); + if (tk.type!=TK_PARENTHESIS_OPEN) { + _set_error("Expected '(' after hint_range"); + return ERR_PARSE_ERROR; + } + + tk = _get_token(); + + if (tk.type!=TK_REAL_CONSTANT ||tk.type!=TK_INT_CONSTANT) { + _set_error("Expected integer constant"); + return ERR_PARSE_ERROR; + } + + uniform.hint_range[0]=tk.constant; + + tk = _get_token(); + + if (tk.type!=TK_COMMA) { + _set_error("Expected ',' after integer constant"); + return ERR_PARSE_ERROR; + } + + tk = _get_token(); + + if (tk.type!=TK_REAL_CONSTANT || tk.type!=TK_INT_CONSTANT) { + _set_error("Expected integer constant after ','"); + return ERR_PARSE_ERROR; + } + + uniform.hint_range[1]=tk.constant; + + tk = _get_token(); + + if (tk.type==TK_COMMA) { + tk = _get_token(); + + if (tk.type!=TK_REAL_CONSTANT || tk.type!=TK_INT_CONSTANT) { + _set_error("Expected integer constant after ','"); + return ERR_PARSE_ERROR; + } + + uniform.hint_range[2]=tk.constant; + tk = _get_token(); + } else { + if (type==TYPE_INT) { + uniform.hint_range[2]=1; + } else { + uniform.hint_range[2]=0.001; + } + } + + if (tk.type!=TK_PARENTHESIS_CLOSE) { + _set_error("Expected ','"); + return ERR_PARSE_ERROR; + } - //add builtins - switch(p_type) { - case SHADER_MATERIAL_VERTEX: { - int idx=0; - while (vertex_builtins_defs[idx].name) { - parser.program->builtin_variables[vertex_builtins_defs[idx].name]=vertex_builtins_defs[idx].type; - idx++; + } + + if (uniform.hint!=ShaderNode::Uniform::HINT_RANGE && uniform.hint!=ShaderNode::Uniform::HINT_NONE && type <=TYPE_MAT4) { + _set_error("This hint is only for sampler types"); + return ERR_PARSE_ERROR; + + } + + + } + + if (tk.type!=TK_SEMICOLON) { + _set_error("Expected ';'"); + return ERR_PARSE_ERROR; + } + } else { + + ShaderNode::Varying varying; + varying.type=type; + varying.precission=precision; + shader->varyings[name]=varying; + + tk = _get_token(); + if (tk.type!=TK_SEMICOLON) { + _set_error("Expected ';'"); + return ERR_PARSE_ERROR; + } + + } + + + + } break; + default: { + //function + + DataPrecision precision = PRECISION_DEFAULT; + DataType type; + StringName name; + + if (is_token_precision(tk.type)) { + precision=get_token_precision(tk.type); + tk = _get_token(); + } + + if (!is_token_datatype(tk.type)) { + _set_error("Expected funtion, uniform or varying "); + return ERR_PARSE_ERROR; + } + + type = get_token_datatype(tk.type); + + + tk = _get_token(); + if (tk.type!=TK_IDENTIFIER) { + _set_error("Expected function name after datatype"); + return ERR_PARSE_ERROR; + + } + + name=tk.text; + + if (_find_identifier(NULL,Map(),name)) { + _set_error("Redefinition of '"+String(name)+"'"); + return ERR_PARSE_ERROR; + } + + tk = _get_token(); + if (tk.type!=TK_PARENTHESIS_OPEN) { + _set_error("Expected '(' after identifier"); + return ERR_PARSE_ERROR; + + } + + + Map builtin_types; + if (p_functions.has(name)) { + builtin_types=p_functions[name]; + } + + ShaderNode::Function function; + + function.callable=!p_functions.has(name); + function.name=name; + + FunctionNode* func_node=alloc_node(); + + function.function=func_node; + + shader->functions.push_back(function); + + func_node->name=name; + func_node->return_type=type; + func_node->return_precision=precision; + + func_node->body = alloc_node(); + func_node->body->parent_function=func_node; + + + tk = _get_token(); + + while(true) { + if (tk.type==TK_PARENTHESIS_CLOSE) { + break; + } + + DataType ptype; + StringName pname; + DataPrecision pprecision = PRECISION_DEFAULT; + + if (is_token_precision(tk.type)) { + pprecision=get_token_precision(tk.type); + tk = _get_token(); + } + + if (!is_token_datatype(tk.type)) { + _set_error("Expected a valid datatype for argument"); + return ERR_PARSE_ERROR; + } + + ptype=get_token_datatype(tk.type); + + if (ptype==TYPE_VOID) { + _set_error("void not allowed in argument"); + return ERR_PARSE_ERROR; + } + + tk = _get_token(); + + if (tk.type!=TK_IDENTIFIER) { + _set_error("Expected identifier for argument name"); + return ERR_PARSE_ERROR; + } + + pname = tk.text; + + if (_find_identifier(func_node->body,builtin_types,pname)) { + _set_error("Redefinition of '"+String(pname)+"'"); + return ERR_PARSE_ERROR; + } + FunctionNode::Argument arg; + arg.type=ptype; + arg.name=pname; + arg.precision=pprecision; + + func_node->arguments.push_back(arg); + + tk = _get_token(); + + + if (tk.type==TK_COMMA) { + tk = _get_token(); + //do none and go on + } else if (tk.type!=TK_PARENTHESIS_CLOSE) { + _set_error("Expected ',' or ')' after identifier"); + return ERR_PARSE_ERROR; + } + + } + + if (p_functions.has(name)) { + //if one of the core functions, make sure they are of the correct form + if (func_node->arguments.size() > 0) { + _set_error("Function '"+String(name)+"' expects no arguments."); + return ERR_PARSE_ERROR; + } + if (func_node->return_type!=TYPE_VOID) { + _set_error("Function '"+String(name)+"' must be of void return type."); + return ERR_PARSE_ERROR; + } + } + + + //all good let's parse inside the fucntion! + tk = _get_token(); + if (tk.type!=TK_CURLY_BRACKET_OPEN) { + _set_error("Expected '{' to begin function"); + return ERR_PARSE_ERROR; + } + + Error err = _parse_block(func_node->body,builtin_types); + if (err) + return err; } - } break; - case SHADER_MATERIAL_FRAGMENT: { - int idx=0; - while (fragment_builtins_defs[idx].name) { - parser.program->builtin_variables[fragment_builtins_defs[idx].name]=fragment_builtins_defs[idx].type; - idx++; - } - } break; - case SHADER_MATERIAL_LIGHT: { - int idx=0; - while (light_builtins_defs[idx].name) { - parser.program->builtin_variables[light_builtins_defs[idx].name]=light_builtins_defs[idx].type; - idx++; - } - } break; - case SHADER_CANVAS_ITEM_VERTEX: { - int idx=0; - while (ci_vertex_builtins_defs[idx].name) { - parser.program->builtin_variables[ci_vertex_builtins_defs[idx].name]=ci_vertex_builtins_defs[idx].type; - idx++; - } - } break; - case SHADER_CANVAS_ITEM_FRAGMENT: { - int idx=0; - while (ci_fragment_builtins_defs[idx].name) { - parser.program->builtin_variables[ci_fragment_builtins_defs[idx].name]=ci_fragment_builtins_defs[idx].type; - idx++; - } - } break; - case SHADER_CANVAS_ITEM_LIGHT: { - int idx=0; - while (ci_light_builtins_defs[idx].name) { - parser.program->builtin_variables[ci_light_builtins_defs[idx].name]=ci_light_builtins_defs[idx].type; - idx++; - } - } break; - case SHADER_POST_PROCESS: { - int idx=0; - while (postprocess_fragment_builtins_defs[idx].name) { - parser.program->builtin_variables[postprocess_fragment_builtins_defs[idx].name]=postprocess_fragment_builtins_defs[idx].type; - idx++; - } - } break; + } + + tk = _get_token(); } - Error err = parse_block(parser,parser.program->body); - if (err) { - parser.get_error(r_error,r_err_line,r_err_column); - return err; - } - - if (p_compile_func) { - err = p_compile_func(p_userdata,parser.program); - } - - //clean up nodes created - while(parser.nodegc.size()) { - - memdelete( parser.nodegc.front()->get() ); - parser.nodegc.pop_front(); - } - return err; + return OK; } -Error ShaderLanguage::compile(const String& p_code,ShaderType p_type,CompileFunc p_compile_func,void *p_userdata,String *r_error,int *r_err_line,int *r_err_column) { +Error ShaderLanguage::compile(const String& p_code, const Map< StringName, Map > &p_functions, const Set &p_render_modes) { - *r_error=""; - *r_err_line=0; - *r_err_column=0; - Vector tokens; + clear(); - Error err = tokenize(p_code,&tokens,r_error,r_err_line,r_err_column); - if (err!=OK) { - print_line("tokenizer error!"); - } + code=p_code; + + nodes=NULL; + + + shader = alloc_node(); + Error err = _parse_shader(p_functions,p_render_modes); - if (err!=OK) { - return err; - } - err = parse(tokens,p_type,p_compile_func,p_userdata,r_error,r_err_line,r_err_column); if (err!=OK) { return err; } return OK; } +String ShaderLanguage::get_error_text() { - -void ShaderLanguage::get_keyword_list(ShaderType p_type, List *p_keywords) { - - int idx=0; - - p_keywords->push_back("uniform"); - p_keywords->push_back("texture"); - p_keywords->push_back("cubemap"); - p_keywords->push_back("color"); - p_keywords->push_back("if"); - p_keywords->push_back("else"); - - while(intrinsic_func_defs[idx].name) { - - p_keywords->push_back(intrinsic_func_defs[idx].name); - idx++; - } - - - switch(p_type) { - case SHADER_MATERIAL_VERTEX: { - idx=0; - while (vertex_builtins_defs[idx].name) { - p_keywords->push_back(vertex_builtins_defs[idx].name); - idx++; - } - } break; - case SHADER_MATERIAL_FRAGMENT: { - idx=0; - while (fragment_builtins_defs[idx].name) { - p_keywords->push_back(fragment_builtins_defs[idx].name); - idx++; - } - } break; - case SHADER_MATERIAL_LIGHT: { - idx=0; - while (light_builtins_defs[idx].name) { - p_keywords->push_back(light_builtins_defs[idx].name); - idx++; - } - } break; - case SHADER_CANVAS_ITEM_VERTEX: { - idx=0; - while (ci_vertex_builtins_defs[idx].name) { - p_keywords->push_back(ci_vertex_builtins_defs[idx].name); - idx++; - } - } break; - case SHADER_CANVAS_ITEM_FRAGMENT: { - idx=0; - while (ci_fragment_builtins_defs[idx].name) { - p_keywords->push_back(ci_fragment_builtins_defs[idx].name); - idx++; - } - } break; - case SHADER_CANVAS_ITEM_LIGHT: { - idx=0; - while (ci_light_builtins_defs[idx].name) { - p_keywords->push_back(ci_light_builtins_defs[idx].name); - idx++; - } - } break; - - case SHADER_POST_PROCESS: { - idx=0; - while (postprocess_fragment_builtins_defs[idx].name) { - p_keywords->push_back(postprocess_fragment_builtins_defs[idx].name); - idx++; - } - } break; - - } - + return error_str; } + +int ShaderLanguage::get_error_line() { + + return error_line; +} + +ShaderLanguage::ShaderLanguage() { + + nodes=NULL; +} + +ShaderLanguage::~ShaderLanguage() { + + clear(); +} + + diff --git a/servers/visual/shader_language.h b/servers/visual/shader_language.h index 31e9fcda5ba..b37853c77ee 100644 --- a/servers/visual/shader_language.h +++ b/servers/visual/shader_language.h @@ -46,12 +46,24 @@ public: enum TokenType { TK_EMPTY, - TK_INDENTIFIER, + TK_IDENTIFIER, TK_TRUE, TK_FALSE, TK_REAL_CONSTANT, + TK_INT_CONSTANT, TK_TYPE_VOID, TK_TYPE_BOOL, + TK_TYPE_BVEC2, + TK_TYPE_BVEC3, + TK_TYPE_BVEC4, + TK_TYPE_INT, + TK_TYPE_IVEC2, + TK_TYPE_IVEC3, + TK_TYPE_IVEC4, + TK_TYPE_UINT, + TK_TYPE_UVEC2, + TK_TYPE_UVEC3, + TK_TYPE_UVEC4, TK_TYPE_FLOAT, TK_TYPE_VEC2, TK_TYPE_VEC3, @@ -59,9 +71,13 @@ public: TK_TYPE_MAT2, TK_TYPE_MAT3, TK_TYPE_MAT4, - TK_TYPE_TEXTURE, - TK_TYPE_CUBEMAP, - TK_TYPE_COLOR, + TK_TYPE_SAMPLER2D, + TK_TYPE_ISAMPLER2D, + TK_TYPE_USAMPLER2D, + TK_TYPE_SAMPLERCUBE, + TK_PRECISION_LOW, + TK_PRECISION_MID, + TK_PRECISION_HIGH, TK_OP_EQUAL, TK_OP_NOT_EQUAL, TK_OP_LESS, @@ -75,14 +91,35 @@ public: TK_OP_SUB, TK_OP_MUL, TK_OP_DIV, - TK_OP_NEG, + TK_OP_MOD, + TK_OP_SHIFT_LEFT, + TK_OP_SHIFT_RIGHT, TK_OP_ASSIGN, TK_OP_ASSIGN_ADD, TK_OP_ASSIGN_SUB, TK_OP_ASSIGN_MUL, TK_OP_ASSIGN_DIV, + TK_OP_ASSIGN_MOD, + TK_OP_ASSIGN_SHIFT_LEFT, + TK_OP_ASSIGN_SHIFT_RIGHT, + TK_OP_ASSIGN_BIT_AND, + TK_OP_ASSIGN_BIT_OR, + TK_OP_ASSIGN_BIT_XOR, + TK_OP_BIT_AND, + TK_OP_BIT_OR, + TK_OP_BIT_XOR, + TK_OP_BIT_INVERT, + TK_OP_INCREMENT, + TK_OP_DECREMENT, TK_CF_IF, TK_CF_ELSE, + TK_CF_FOR, + TK_CF_WHILE, + TK_CF_DO, + TK_CF_SWITCH, + TK_CF_CASE, + TK_CF_BREAK, + TK_CF_CONTINUE, TK_CF_RETURN, TK_BRACKET_OPEN, TK_BRACKET_CLOSE, @@ -90,31 +127,39 @@ public: TK_CURLY_BRACKET_CLOSE, TK_PARENTHESIS_OPEN, TK_PARENTHESIS_CLOSE, + TK_QUESTION, TK_COMMA, + TK_COLON, TK_SEMICOLON, TK_PERIOD, TK_UNIFORM, + TK_VARYING, + TK_RENDER_MODE, + TK_HINT_WHITE_TEXTURE, + TK_HINT_BLACK_TEXTURE, + TK_HINT_NORMAL_TEXTURE, + TK_HINT_RANGE, TK_ERROR, + TK_EOF, TK_MAX }; - - /* COMPILER */ - enum ShaderType { - SHADER_MATERIAL_VERTEX, - SHADER_MATERIAL_FRAGMENT, - SHADER_MATERIAL_LIGHT, - SHADER_CANVAS_ITEM_VERTEX, - SHADER_CANVAS_ITEM_FRAGMENT, - SHADER_CANVAS_ITEM_LIGHT, - SHADER_POST_PROCESS, - }; - enum DataType { TYPE_VOID, TYPE_BOOL, + TYPE_BVEC2, + TYPE_BVEC3, + TYPE_BVEC4, + TYPE_INT, + TYPE_IVEC2, + TYPE_IVEC3, + TYPE_IVEC4, + TYPE_UINT, + TYPE_UVEC2, + TYPE_UVEC3, + TYPE_UVEC4, TYPE_FLOAT, TYPE_VEC2, TYPE_VEC3, @@ -122,30 +167,58 @@ public: TYPE_MAT2, TYPE_MAT3, TYPE_MAT4, - TYPE_TEXTURE, - TYPE_CUBEMAP, + TYPE_SAMPLER2D, + TYPE_ISAMPLER2D, + TYPE_USAMPLER2D, + TYPE_SAMPLERCUBE, + }; + + enum DataPrecision { + PRECISION_LOWP, + PRECISION_MEDIUMP, + PRECISION_HIGHP, + PRECISION_DEFAULT, }; enum Operator { - OP_ASSIGN, + OP_EQUAL, + OP_NOT_EQUAL, + OP_LESS, + OP_LESS_EQUAL, + OP_GREATER, + OP_GREATER_EQUAL, + OP_AND, + OP_OR, + OP_NOT, + OP_NEGATE, OP_ADD, OP_SUB, OP_MUL, OP_DIV, + OP_MOD, + OP_SHIFT_LEFT, + OP_SHIFT_RIGHT, + OP_ASSIGN, OP_ASSIGN_ADD, OP_ASSIGN_SUB, OP_ASSIGN_MUL, OP_ASSIGN_DIV, - OP_NEG, - OP_NOT, - OP_CMP_EQ, - OP_CMP_NEQ, - OP_CMP_LEQ, - OP_CMP_GEQ, - OP_CMP_LESS, - OP_CMP_GREATER, - OP_CMP_OR, - OP_CMP_AND, + OP_ASSIGN_MOD, + OP_ASSIGN_SHIFT_LEFT, + OP_ASSIGN_SHIFT_RIGHT, + OP_ASSIGN_BIT_AND, + OP_ASSIGN_BIT_OR, + OP_ASSIGN_BIT_XOR, + OP_BIT_AND, + OP_BIT_OR, + OP_BIT_XOR, + OP_BIT_INVERT, + OP_INCREMENT, + OP_DECREMENT, + OP_SELECT_IF, + OP_SELECT_ELSE, //used only internally, then only IF appears with 3 arguments + OP_POST_INCREMENT, + OP_POST_DECREMENT, OP_CALL, OP_CONSTRUCT, OP_MAX @@ -154,18 +227,21 @@ public: enum FlowOperation { FLOW_OP_IF, FLOW_OP_RETURN, - //FLOW_OP_FOR, - //FLOW_OP_WHILE, - //FLOW_OP_DO, - //FLOW_OP_BREAK, - //FLOW_OP_CONTINUE, + FLOW_OP_FOR, + FLOW_OP_WHILE, + FLOW_OP_DO, + FLOW_OP_BREAK, + FLOW_OP_SWITCH, + FLOW_OP_CONTINUE }; struct Node { + Node *next; + enum Type { - TYPE_PROGRAM, + TYPE_SHADER, TYPE_FUNCTION, TYPE_BLOCK, TYPE_VARIABLE, @@ -175,7 +251,6 @@ public: TYPE_MEMBER }; - Node * parent; Type type; virtual DataType get_datatype() const { return TYPE_VOID; } @@ -183,24 +258,34 @@ public: virtual ~Node() {} }; + template + T* alloc_node() { + T* node = memnew(T); + node->next=nodes; + nodes=node; + return node; + } + + Node *nodes; + struct OperatorNode : public Node { DataType return_cache; + DataPrecision return_precision_cache; Operator op; Vector arguments; virtual DataType get_datatype() const { return return_cache; } - OperatorNode() { type=TYPE_OPERATOR; return_cache=TYPE_VOID; } + OperatorNode() { type=TYPE_OPERATOR; return_cache=TYPE_VOID; return_precision_cache=PRECISION_DEFAULT; } }; struct VariableNode : public Node { - bool uniform; DataType datatype_cache; StringName name; virtual DataType get_datatype() const { return datatype_cache; } - VariableNode() { type=TYPE_VARIABLE; datatype_cache=TYPE_VOID; uniform=false; } + VariableNode() { type=TYPE_VARIABLE; datatype_cache=TYPE_VOID; } }; struct ConstantNode : public Node { @@ -212,17 +297,26 @@ public: ConstantNode() { type=TYPE_CONSTANT; } }; - struct BlockNode : public Node { + struct FunctionNode; - Map variables; + struct BlockNode : public Node { + FunctionNode *parent_function; + BlockNode *parent_block; + + struct Variable { + DataType type; + DataPrecision precision; + }; + + Map variables; List statements; - BlockNode() { type=TYPE_BLOCK; } + BlockNode() { type=TYPE_BLOCK; parent_block=NULL; parent_function=NULL; } }; struct ControlFlowNode : public Node { FlowOperation flow_op; - Vector statements; + Vector blocks; ControlFlowNode() { type=TYPE_CONTROL_FLOW; flow_op=FLOW_OP_IF;} }; @@ -244,39 +338,60 @@ public: StringName name; DataType type; + DataPrecision precision; }; StringName name; DataType return_type; + DataPrecision return_precision; Vector arguments; BlockNode *body; - FunctionNode() { type=TYPE_FUNCTION; } + FunctionNode() { type=TYPE_FUNCTION; return_precision=PRECISION_DEFAULT; } }; - struct Uniform { - int order; - DataType type; - Variant default_value; - }; - - struct ProgramNode : public Node { + struct ShaderNode : public Node { struct Function { StringName name; FunctionNode*function; + bool callable; + }; + + struct Varying { + DataType type; + DataPrecision precission; + }; + + struct Uniform { + enum Hint { + HINT_NONE, + HINT_WHITE_TEXTURE, + HINT_BLACK_TEXTURE, + HINT_NORMAL_TEXTURE, + HINT_RANGE, + }; + + int order; + DataType type; + DataPrecision precission; + Variant default_value; + Hint hint; + float hint_range[3]; + + Uniform() { hint=HINT_NONE; hint_range[0]=0; hint_range[1]=1; hint_range[2]=0.001;} }; - Map builtin_variables; + Map varyings; Map uniforms; + Set render_modes; Vector functions; - BlockNode *body; - ProgramNode() { type=TYPE_PROGRAM; } + ShaderNode() { type=TYPE_SHADER; } }; @@ -284,12 +399,12 @@ public: bool is_op; union { - TokenType op; + Operator op; Node *node; }; }; - typedef Error (*CompileFunc)(void*,ProgramNode*); + struct VarInfo { @@ -297,60 +412,79 @@ public: DataType type; }; -private: - - - - static const char * token_names[TK_MAX]; + enum CompletionType { + COMPLETION_NONE, + COMPLETION_BUILT_IN_TYPE_CONSTANT, + COMPLETION_FUNCTION, + COMPLETION_IDENTIFIER, + COMPLETION_PARENT_FUNCTION, + COMPLETION_METHOD, + COMPLETION_CALL_ARGUMENTS, + COMPLETION_INDEX, + COMPLETION_VIRTUAL_FUNC, + COMPLETION_YIELD, + }; struct Token { TokenType type; StringName text; - uint16_t line,col; - - Token(TokenType p_type=TK_EMPTY,const String& p_text=String()) { type=p_type; text=p_text; line=0; col=0; } + double constant; + uint16_t line; }; + static String get_token_text(Token p_token); + static bool is_token_datatype(TokenType p_type); + static DataType get_token_datatype(TokenType p_type); + static bool is_token_precision(TokenType p_type); + static DataPrecision get_token_precision(TokenType p_type); + static String get_datatype_name(DataType p_type); + static bool is_token_nonvoid_datatype(TokenType p_type); + static bool is_token_operator(TokenType p_type); + +private: + + bool error_set; + String error_str; + int error_line; + + String code; + int char_idx; + int tk_line; - static Token read_token(const CharType* p_text,int p_len,int &r_line,int &r_chars); - static Error tokenize(const String& p_text,Vector *p_tokens,String *r_error,int *r_err_line,int *r_err_column); + void _set_error(const String& p_str) { + if (error_set) + return; + error_set=true; + error_str=p_str; + } + + + static const char * token_names[TK_MAX]; - class Parser { + Token _make_token(TokenType p_type, const StringName& p_text=StringName()); + Token _get_token(); - Vector tokens; - int pos; - String error; - public: + ShaderNode *shader; - - void set_error(const String& p_error) { error=p_error; } - void get_error(String *r_error, int *r_line, int *r_column) { - - *r_error=error; - *r_line=get_token(pos).line; - *r_column=get_token(pos).col; - } - - - - Token get_token(int ofs=0) const { int idx=pos+ofs; if (idx<0 || idx>=tokens.size()) return Token(TK_ERROR); return tokens[idx]; } - TokenType get_token_type(int ofs=0) const { int idx=pos+ofs; if (idx<0 || idx>=tokens.size()) return TK_ERROR; return tokens[idx].type; } - void advance(int p_amount=1) { pos+=p_amount; } - bool is_at_end() const { return pos>=tokens.size(); } - - ProgramNode *program; - template - T* create_node(Node *p_parent) { T*n=memnew( T ); nodegc.push_back(n); n->parent=p_parent; return n; } - List nodegc; - - Parser(const Vector& p_tokens) { tokens=p_tokens; pos=0;} + enum IdentifierType { + IDENTIFIER_FUNCTION, + IDENTIFIER_UNIFORM, + IDENTIFIER_VARYING, + IDENTIFIER_FUNCTION_ARGUMENT, + IDENTIFIER_LOCAL_VAR, + IDENTIFIER_BUILTIN_VAR, }; + bool _find_identifier(const BlockNode* p_block,const Map &p_builtin_types,const StringName& p_identifier, DataType *r_data_type=NULL, IdentifierType *r_type=NULL); + + bool _validate_operator(OperatorNode *p_op,DataType *r_ret_type=NULL); + + struct IntrinsicFuncDef { enum { MAX_ARGS=5 }; @@ -360,70 +494,35 @@ private: }; + static const IntrinsicFuncDef intrinsic_func_defs[]; + bool _validate_function_call(BlockNode* p_block, OperatorNode *p_func,DataType *r_ret_type); - struct OperatorDef { + bool _parse_function_arguments(BlockNode *p_block, const Map &p_builtin_types, OperatorNode* p_func); - enum { MAX_ARGS=2 }; - Operator op; - DataType rettype; - const DataType args[MAX_ARGS]; - }; + Node* _parse_expression(BlockNode *p_block, const Map &p_builtin_types); - static const OperatorDef operator_defs[]; + ShaderLanguage::Node* _reduce_expression(BlockNode *p_block, ShaderLanguage::Node *p_node); + Node* _parse_and_reduce_expression(BlockNode *p_block,const Map &p_builtin_types); - struct BuiltinsDef { + Error _parse_block(BlockNode *p_block, const Map &p_builtin_types, bool p_just_one=false, bool p_can_break=false, bool p_can_continue=false); - const char* name; - DataType type; - }; - - static const BuiltinsDef vertex_builtins_defs[]; - static const BuiltinsDef fragment_builtins_defs[]; - static const BuiltinsDef light_builtins_defs[]; - - static const BuiltinsDef ci_vertex_builtins_defs[]; - static const BuiltinsDef ci_fragment_builtins_defs[]; - static const BuiltinsDef ci_light_builtins_defs[]; + Error _parse_shader(const Map< StringName, Map > &p_functions, const Set &p_render_modes); - static const BuiltinsDef postprocess_fragment_builtins_defs[]; - - static DataType get_token_datatype(TokenType p_type); - static String get_datatype_name(DataType p_type); - static bool is_token_datatype(TokenType p_type); - static bool is_token_nonvoid_datatype(TokenType p_type); - - static bool test_existing_identifier(Node *p_node,const StringName p_identifier,bool p_func=true,bool p_var=true,bool p_builtin=true); - - static bool parser_is_at_function(Parser& parser); - static DataType compute_node_type(Node *p_node); - - static Node* validate_function_call(Parser&parser, OperatorNode *p_func); - static Node* validate_operator(Parser& parser,OperatorNode *p_func); - static bool is_token_operator(TokenType p_type); - static Operator get_token_operator(TokenType p_type); - - static Error parse_expression(Parser& parser,Node *p_parent,Node **r_expr); - - static Error parse_variable_declaration(Parser& parser,BlockNode *p_block); - static Error parse_function(Parser& parser,BlockNode *p_block); - static Error parse_flow_if(Parser& parser,Node *p_parent,Node **r_statement); - static Error parse_flow_return(Parser& parser,Node *p_parent,Node **r_statement); - static Error parse_statement(Parser& parser,Node *p_parent,Node **r_statement); - static Error parse_block(Parser& parser,BlockNode *p_block); - - - static Error parse(const Vector &p_tokens,ShaderType p_type,CompileFunc p_compile_func,void *p_userdata,String *r_error,int *r_err_line,int *r_err_column); - -; public: - static void get_keyword_list(ShaderType p_type,List *p_keywords); +// static void get_keyword_list(ShaderType p_type,List *p_keywords); + + void clear(); + Error compile(const String& p_code,const Map< StringName, Map > &p_functions,const Set& p_render_modes); + String get_error_text(); + int get_error_line(); - static Error compile(const String& p_code,ShaderType p_type, CompileFunc p_compile_func,void *p_userdata,String *r_error,int *r_err_line,int *r_err_column); static String lex_debug(const String& p_code); + ShaderLanguage(); + ~ShaderLanguage(); }; diff --git a/servers/visual/visual_server_canvas.cpp b/servers/visual/visual_server_canvas.cpp new file mode 100644 index 00000000000..f70ce46a3d4 --- /dev/null +++ b/servers/visual/visual_server_canvas.cpp @@ -0,0 +1,1268 @@ +#include "visual_server_canvas.h" +#include "visual_server_global.h" +#include "visual_server_viewport.h" + +void VisualServerCanvas::_render_canvas_item_tree(Item *p_canvas_item, const Matrix32& p_transform, const Rect2& p_clip_rect, const Color& p_modulate, RasterizerCanvas::Light *p_lights) { + + + static const int z_range = VS::CANVAS_ITEM_Z_MAX-VS::CANVAS_ITEM_Z_MIN+1; + RasterizerCanvas::Item *z_list[z_range]; + RasterizerCanvas::Item *z_last_list[z_range]; + + for(int i=0;icanvas_render_items(z_list[i],VS::CANVAS_ITEM_Z_MIN+i,p_modulate,p_lights); + } + +} + +void VisualServerCanvas::_render_canvas_item(Item *p_canvas_item,const Matrix32& p_transform,const Rect2& p_clip_rect, const Color &p_modulate,int p_z,RasterizerCanvas::Item **z_list,RasterizerCanvas::Item **z_last_list,Item *p_canvas_clip,Item *p_material_owner) { + + Item *ci = p_canvas_item; + + if (!ci->visible) + return; + + Rect2 rect = ci->get_rect(); + Matrix32 xform = p_transform * ci->xform; + Rect2 global_rect = xform.xform(rect); + global_rect.pos+=p_clip_rect.pos; + + + if (ci->use_parent_material && p_material_owner) + ci->material_owner=p_material_owner; + else { + p_material_owner=ci; + ci->material_owner=NULL; + } + + + Color modulate( ci->modulate.r * p_modulate.r, ci->modulate.g * p_modulate.g,ci->modulate.b * p_modulate.b,ci->modulate.a * p_modulate.a); + + if (modulate.a<0.007) + return; + + + int child_item_count=ci->child_items.size(); + Item **child_items=(Item**)alloca(child_item_count*sizeof(Item*)); + copymem(child_items,ci->child_items.ptr(),child_item_count*sizeof(Item*)); + + if (ci->clip) { + if (p_canvas_clip != NULL) { + ci->final_clip_rect=p_canvas_clip->final_clip_rect.clip(global_rect); + } else { + ci->final_clip_rect=global_rect; + } + ci->final_clip_owner=ci; + + } else { + ci->final_clip_owner=p_canvas_clip; + } + + if (ci->sort_y) { + + SortArray sorter; + sorter.sort(child_items,child_item_count); + } + + if (ci->z_relative) + p_z=CLAMP(p_z+ci->z,VS::CANVAS_ITEM_Z_MIN,VS::CANVAS_ITEM_Z_MAX); + else + p_z=ci->z; + + for(int i=0;ibehind) + continue; + _render_canvas_item(child_items[i],xform,p_clip_rect,modulate,p_z,z_list,z_last_list,(Item*)ci->final_clip_owner,p_material_owner); + } + + if (ci->copy_back_buffer) { + + ci->copy_back_buffer->screen_rect = xform.xform(ci->copy_back_buffer->rect).clip(p_clip_rect); + } + + if ((!ci->commands.empty() && p_clip_rect.intersects(global_rect)) || ci->vp_render || ci->copy_back_buffer) { + //something to draw? + ci->final_transform=xform; + ci->final_modulate=Color(modulate.r*ci->self_modulate.r, modulate.g*ci->self_modulate.g, modulate.b*ci->self_modulate.b, modulate.a*ci->self_modulate.a ); + ci->global_rect_cache=global_rect; + ci->global_rect_cache.pos-=p_clip_rect.pos; + ci->light_masked=false; + + int zidx = p_z-VS::CANVAS_ITEM_Z_MIN; + + if (z_last_list[zidx]) { + z_last_list[zidx]->next=ci; + z_last_list[zidx]=ci; + + } else { + z_list[zidx]=ci; + z_last_list[zidx]=ci; + } + + ci->next=NULL; + + } + + for(int i=0;ibehind) + continue; + _render_canvas_item(child_items[i],xform,p_clip_rect,modulate,p_z,z_list,z_last_list,(Item*)ci->final_clip_owner,p_material_owner); + } + +} + +void VisualServerCanvas::_light_mask_canvas_items(int p_z,RasterizerCanvas::Item *p_canvas_item,RasterizerCanvas::Light *p_masked_lights) { + + if (!p_masked_lights) + return; + + RasterizerCanvas::Item *ci=p_canvas_item; + + while(ci) { + + RasterizerCanvas::Light *light=p_masked_lights; + while(light) { + + if (ci->light_mask&light->item_mask && p_z>=light->z_min && p_z<=light->z_max && ci->global_rect_cache.intersects_transformed(light->xform_cache,light->rect_cache)) { + ci->light_masked=true; + } + + light=light->mask_next_ptr; + } + + ci=ci->next; + } + + + + +} + +void VisualServerCanvas::render_canvas(Canvas *p_canvas, const Matrix32 &p_transform, RasterizerCanvas::Light *p_lights, RasterizerCanvas::Light *p_masked_lights, const Rect2 &p_clip_rect) { + + VSG::canvas_render->canvas_begin(); + + int l = p_canvas->child_items.size(); + Canvas::ChildItem *ci=p_canvas->child_items.ptr(); + + bool has_mirror=false; + for(int i=0;icanvas_render_items(z_list[i],VS::CANVAS_ITEM_Z_MIN+i,p_canvas->modulate,p_lights); + } + } else { + + for(int i=0;ichild_items[i]; + _render_canvas_item_tree(ci.item,p_transform,p_clip_rect,p_canvas->modulate,p_lights); + + //mirroring (useful for scrolling backgrounds) + if (ci.mirror.x!=0) { + + Matrix32 xform2 = p_transform * Matrix32(0,Vector2(ci.mirror.x,0)); + _render_canvas_item_tree(ci.item,xform2,p_clip_rect,p_canvas->modulate,p_lights); + } + if (ci.mirror.y!=0) { + + Matrix32 xform2 = p_transform * Matrix32(0,Vector2(0,ci.mirror.y)); + _render_canvas_item_tree(ci.item,xform2,p_clip_rect,p_canvas->modulate,p_lights); + } + if (ci.mirror.y!=0 && ci.mirror.x!=0) { + + Matrix32 xform2 = p_transform * Matrix32(0,ci.mirror); + _render_canvas_item_tree(ci.item,xform2,p_clip_rect,p_canvas->modulate,p_lights); + } + + } + } + +} + + +RID VisualServerCanvas::canvas_create() { + + Canvas * canvas = memnew( Canvas ); + ERR_FAIL_COND_V(!canvas,RID()); + RID rid = canvas_owner.make_rid( canvas ); + + return rid; +} + +void VisualServerCanvas::canvas_set_item_mirroring(RID p_canvas,RID p_item,const Point2& p_mirroring) { + + Canvas * canvas = canvas_owner.getornull(p_canvas); + ERR_FAIL_COND(!canvas); + Item *canvas_item = canvas_item_owner.getornull(p_item); + ERR_FAIL_COND(!canvas_item); + + int idx = canvas->find_item(canvas_item); + ERR_FAIL_COND(idx==-1); + canvas->child_items[idx].mirror=p_mirroring; + +} +void VisualServerCanvas::canvas_set_modulate(RID p_canvas,const Color& p_color) { + + Canvas * canvas = canvas_owner.get(p_canvas); + ERR_FAIL_COND(!canvas); + canvas->modulate=p_color; +} + + +RID VisualServerCanvas::canvas_item_create() { + + Item *canvas_item = memnew( Item ); + ERR_FAIL_COND_V(!canvas_item,RID()); + + return canvas_item_owner.make_rid( canvas_item ); +} + +void VisualServerCanvas::canvas_item_set_parent(RID p_item,RID p_parent) { + + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + if (canvas_item->parent.is_valid()) { + + if (canvas_owner.owns(canvas_item->parent)) { + + Canvas *canvas = canvas_owner.get(canvas_item->parent); + canvas->erase_item(canvas_item); + } else if (canvas_item_owner.owns(canvas_item->parent)) { + + Item *item_owner = canvas_item_owner.get(canvas_item->parent); + item_owner->child_items.erase(canvas_item); + } + + canvas_item->parent=RID(); + } + + + if (p_parent.is_valid()) { + if (canvas_owner.owns(p_parent)) { + + Canvas *canvas = canvas_owner.get(p_parent); + Canvas::ChildItem ci; + ci.item=canvas_item; + canvas->child_items.push_back(ci); + canvas->children_order_dirty=true; + } else if (canvas_item_owner.owns(p_parent)) { + + Item *item_owner = canvas_item_owner.get(p_parent); + item_owner->child_items.push_back(canvas_item); + item_owner->children_order_dirty=true; + + } else { + + ERR_EXPLAIN("Invalid parent"); + ERR_FAIL(); + } + + + } + + canvas_item->parent=p_parent; + + +} +void VisualServerCanvas::canvas_item_set_visible(RID p_item,bool p_visible){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->visible=p_visible; + +} +void VisualServerCanvas::canvas_item_set_light_mask(RID p_item,int p_mask){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->light_mask=p_mask; + +} + +void VisualServerCanvas::canvas_item_set_transform(RID p_item, const Matrix32& p_transform){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->xform=p_transform; + +} +void VisualServerCanvas::canvas_item_set_clip(RID p_item, bool p_clip){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->clip=p_clip; + +} +void VisualServerCanvas::canvas_item_set_distance_field_mode(RID p_item, bool p_enable){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->distance_field=p_enable; + + +} +void VisualServerCanvas::canvas_item_set_custom_rect(RID p_item, bool p_custom_rect,const Rect2& p_rect){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->custom_rect=p_custom_rect; + canvas_item->rect=p_rect; + +} +void VisualServerCanvas::canvas_item_set_modulate(RID p_item, const Color& p_color) { + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->modulate=p_color; + +} +void VisualServerCanvas::canvas_item_set_self_modulate(RID p_item, const Color& p_color){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->self_modulate=p_color; + +} + +void VisualServerCanvas::canvas_item_set_draw_behind_parent(RID p_item, bool p_enable){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->behind=p_enable; + +} + + +void VisualServerCanvas::canvas_item_add_line(RID p_item, const Point2& p_from, const Point2& p_to,const Color& p_color,float p_width,bool p_antialiased) { + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + Item::CommandLine * line = memnew( Item::CommandLine ); + ERR_FAIL_COND(!line); + line->color=p_color; + line->from=p_from; + line->to=p_to; + line->width=p_width; + line->antialiased=p_antialiased; + canvas_item->rect_dirty=true; + + + canvas_item->commands.push_back(line); +} + +void VisualServerCanvas::canvas_item_add_rect(RID p_item, const Rect2& p_rect, const Color& p_color) { + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + Item::CommandRect * rect = memnew( Item::CommandRect ); + ERR_FAIL_COND(!rect); + rect->modulate=p_color; + rect->rect=p_rect; + canvas_item->rect_dirty=true; + + canvas_item->commands.push_back(rect); +} + +void VisualServerCanvas::canvas_item_add_circle(RID p_item, const Point2& p_pos, float p_radius,const Color& p_color) { + + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + Item::CommandCircle * circle = memnew( Item::CommandCircle ); + ERR_FAIL_COND(!circle); + circle->color=p_color; + circle->pos=p_pos; + circle->radius=p_radius; + + canvas_item->commands.push_back(circle); + +} + +void VisualServerCanvas::canvas_item_add_texture_rect(RID p_item, const Rect2& p_rect, RID p_texture,bool p_tile,const Color& p_modulate,bool p_transpose) { + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + Item::CommandRect * rect = memnew( Item::CommandRect ); + ERR_FAIL_COND(!rect); + rect->modulate=p_modulate; + rect->rect=p_rect; + rect->flags=0; + if (p_tile) { + rect->flags|=RasterizerCanvas::CANVAS_RECT_TILE; + rect->flags|=RasterizerCanvas::CANVAS_RECT_REGION; + rect->source=Rect2(0,0,p_rect.size.width,p_rect.size.height); + } + + if (p_rect.size.x<0) { + + rect->flags|=RasterizerCanvas::CANVAS_RECT_FLIP_H; + rect->rect.size.x = -rect->rect.size.x; + } + if (p_rect.size.y<0) { + + rect->flags|=RasterizerCanvas::CANVAS_RECT_FLIP_V; + rect->rect.size.y = -rect->rect.size.y; + } + if (p_transpose) { + rect->flags|=RasterizerCanvas::CANVAS_RECT_TRANSPOSE; + SWAP(rect->rect.size.x, rect->rect.size.y); + } + rect->texture=p_texture; + canvas_item->rect_dirty=true; + canvas_item->commands.push_back(rect); +} + +void VisualServerCanvas::canvas_item_add_texture_rect_region(RID p_item, const Rect2& p_rect, RID p_texture,const Rect2& p_src_rect,const Color& p_modulate,bool p_transpose) { + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + Item::CommandRect * rect = memnew( Item::CommandRect ); + ERR_FAIL_COND(!rect); + rect->modulate=p_modulate; + rect->rect=p_rect; + rect->texture=p_texture; + rect->source=p_src_rect; + rect->flags=RasterizerCanvas::CANVAS_RECT_REGION; + + if (p_rect.size.x<0) { + + rect->flags|=RasterizerCanvas::CANVAS_RECT_FLIP_H; + rect->rect.size.x = -rect->rect.size.x; + } + if (p_rect.size.y<0) { + + rect->flags|=RasterizerCanvas::CANVAS_RECT_FLIP_V; + rect->rect.size.y = -rect->rect.size.y; + } + if (p_transpose) { + rect->flags|=RasterizerCanvas::CANVAS_RECT_TRANSPOSE; + SWAP(rect->rect.size.x, rect->rect.size.y); + } + + canvas_item->rect_dirty=true; + + canvas_item->commands.push_back(rect); + +} + +void VisualServerCanvas::canvas_item_add_nine_patch(RID p_item, const Rect2& p_rect, const Rect2& p_source, RID p_texture,const Vector2& p_topleft, const Vector2& p_bottomright,VS::NinePatchAxisMode p_x_axis_mode, VS::NinePatchAxisMode p_y_axis_mode,bool p_draw_center,const Color& p_modulate) { + + + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + Item::CommandNinePatch * style = memnew( Item::CommandNinePatch ); + ERR_FAIL_COND(!style); + style->texture=p_texture; + style->rect=p_rect; + style->source=p_source; + style->draw_center=p_draw_center; + style->color=p_modulate; + style->margin[MARGIN_LEFT]=p_topleft.x; + style->margin[MARGIN_TOP]=p_topleft.y; + style->margin[MARGIN_RIGHT]=p_bottomright.x; + style->margin[MARGIN_BOTTOM]=p_bottomright.y; + style->axis_x=p_x_axis_mode; + style->axis_y=p_y_axis_mode; + canvas_item->rect_dirty=true; + + canvas_item->commands.push_back(style); +} +void VisualServerCanvas::canvas_item_add_primitive(RID p_item,const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture,float p_width) { + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + Item::CommandPrimitive * prim = memnew( Item::CommandPrimitive ); + ERR_FAIL_COND(!prim); + prim->texture=p_texture; + prim->points=p_points; + prim->uvs=p_uvs; + prim->colors=p_colors; + prim->width=p_width; + canvas_item->rect_dirty=true; + + canvas_item->commands.push_back(prim); +} + +void VisualServerCanvas::canvas_item_add_polygon(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture) { + + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); +#ifdef DEBUG_ENABLED + int pointcount = p_points.size(); + ERR_FAIL_COND(pointcount<3); + int color_size=p_colors.size(); + int uv_size=p_uvs.size(); + ERR_FAIL_COND(color_size!=0 && color_size!=1 && color_size!=pointcount); + ERR_FAIL_COND(uv_size!=0 && (uv_size!=pointcount || !p_texture.is_valid())); +#endif + Vector indices = Geometry::triangulate_polygon(p_points); + + if (indices.empty()) { + + ERR_EXPLAIN("Bad Polygon!"); + ERR_FAIL_V(); + } + + Item::CommandPolygon * polygon = memnew( Item::CommandPolygon ); + ERR_FAIL_COND(!polygon); + polygon->texture=p_texture; + polygon->points=p_points; + polygon->uvs=p_uvs; + polygon->colors=p_colors; + polygon->indices=indices; + polygon->count=indices.size(); + canvas_item->rect_dirty=true; + + canvas_item->commands.push_back(polygon); + +} + + +void VisualServerCanvas::canvas_item_add_triangle_array(RID p_item, const Vector& p_indices, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture, int p_count) { + + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + int ps = p_points.size(); + ERR_FAIL_COND(!p_colors.empty() && p_colors.size()!=ps && p_colors.size()!=1); + ERR_FAIL_COND(!p_uvs.empty() && p_uvs.size()!=ps); + + Vector indices = p_indices; + + int count = p_count * 3; + + if (indices.empty()) { + + ERR_FAIL_COND( ps % 3 != 0 ); + if (p_count == -1) + count = ps; + } else { + + ERR_FAIL_COND( indices.size() % 3 != 0 ); + if (p_count == -1) + count = indices.size(); + } + + Item::CommandPolygon * polygon = memnew( Item::CommandPolygon ); + ERR_FAIL_COND(!polygon); + polygon->texture=p_texture; + polygon->points=p_points; + polygon->uvs=p_uvs; + polygon->colors=p_colors; + polygon->indices=indices; + polygon->count = count; + canvas_item->rect_dirty=true; + + canvas_item->commands.push_back(polygon); +} + + +void VisualServerCanvas::canvas_item_add_set_transform(RID p_item,const Matrix32& p_transform) { + + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + Item::CommandTransform * tr = memnew( Item::CommandTransform ); + ERR_FAIL_COND(!tr); + tr->xform=p_transform; + + canvas_item->commands.push_back(tr); + +} + +void VisualServerCanvas::canvas_item_add_mesh(RID p_item, const RID& p_mesh,RID p_skeleton){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + Item::CommandMesh * m = memnew( Item::CommandMesh ); + ERR_FAIL_COND(!m); + m->mesh=p_mesh; + m->skeleton=p_skeleton; + + canvas_item->commands.push_back(m); +} +void VisualServerCanvas::canvas_item_add_multimesh(RID p_item, RID p_mesh,RID p_skeleton){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + Item::CommandMultiMesh * mm = memnew( Item::CommandMultiMesh ); + ERR_FAIL_COND(!mm); + mm->multimesh=p_mesh; + mm->skeleton=p_skeleton; + + canvas_item->commands.push_back(mm); + +} + +void VisualServerCanvas::canvas_item_add_clip_ignore(RID p_item, bool p_ignore){ + + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + Item::CommandClipIgnore * ci = memnew( Item::CommandClipIgnore); + ERR_FAIL_COND(!ci); + ci->ignore=p_ignore; + + canvas_item->commands.push_back(ci); +} +void VisualServerCanvas::canvas_item_set_sort_children_by_y(RID p_item, bool p_enable){ + + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->sort_y=p_enable; +} +void VisualServerCanvas::canvas_item_set_z(RID p_item, int p_z){ + + ERR_FAIL_COND(p_zVS::CANVAS_ITEM_Z_MAX); + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->z=p_z; +} +void VisualServerCanvas::canvas_item_set_z_as_relative_to_parent(RID p_item, bool p_enable){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->z_relative=p_enable; + +} +void VisualServerCanvas::canvas_item_set_copy_to_backbuffer(RID p_item, bool p_enable,const Rect2& p_rect){ + + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + if (bool(canvas_item->copy_back_buffer!=NULL) !=p_enable) { + if (p_enable) { + canvas_item->copy_back_buffer = memnew( RasterizerCanvas::Item::CopyBackBuffer ); + } else { + memdelete(canvas_item->copy_back_buffer); + canvas_item->copy_back_buffer=NULL; + } + } + + if (p_enable) { + canvas_item->copy_back_buffer->rect=p_rect; + canvas_item->copy_back_buffer->full=p_rect==Rect2(); + } + +} + +void VisualServerCanvas::canvas_item_clear(RID p_item){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->clear(); +} +void VisualServerCanvas::canvas_item_set_draw_index(RID p_item,int p_index){ + + Item *canvas_item = canvas_item_owner.getornull( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->index=p_index; + + if (canvas_item_owner.owns( canvas_item->parent)) { + Item *canvas_item_parent = canvas_item_owner.getornull( canvas_item->parent ); + canvas_item_parent->children_order_dirty=true; + return; + } + + Canvas* canvas = canvas_owner.getornull( canvas_item->parent ); + if (canvas) { + canvas->children_order_dirty=true; + return; + } + +} + +void VisualServerCanvas::canvas_item_set_material(RID p_item, RID p_material){ + + Item *canvas_item = canvas_item_owner.get( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->material=p_material; +} + +void VisualServerCanvas::canvas_item_set_use_parent_material(RID p_item, bool p_enable){ + + Item *canvas_item = canvas_item_owner.get( p_item ); + ERR_FAIL_COND(!canvas_item); + + canvas_item->use_parent_material=p_enable; + +} + +RID VisualServerCanvas::canvas_light_create(){ + + RasterizerCanvas::Light *clight = memnew( RasterizerCanvas::Light ); + clight->light_internal = VSG::canvas_render->light_internal_create(); + return canvas_light_owner.make_rid(clight); + +} +void VisualServerCanvas::canvas_light_attach_to_canvas(RID p_light,RID p_canvas){ + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + if (clight->canvas.is_valid()) { + + Canvas *canvas = canvas_owner.getornull(clight->canvas); + canvas->lights.erase(clight); + } + + if (!canvas_owner.owns(p_canvas)) + p_canvas=RID(); + + clight->canvas=p_canvas; + + if (clight->canvas.is_valid()) { + + Canvas *canvas = canvas_owner.get(clight->canvas); + canvas->lights.insert(clight); + } +} + + +void VisualServerCanvas::canvas_light_set_enabled(RID p_light, bool p_enabled){ + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->enabled=p_enabled; + +} +void VisualServerCanvas::canvas_light_set_scale(RID p_light, float p_scale){ + + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->scale=p_scale; + +} +void VisualServerCanvas::canvas_light_set_transform(RID p_light, const Matrix32& p_transform){ + + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->xform=p_transform; + +} +void VisualServerCanvas::canvas_light_set_texture(RID p_light, RID p_texture){ + + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->texture=p_texture; + +} +void VisualServerCanvas::canvas_light_set_texture_offset(RID p_light, const Vector2& p_offset){ + + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->texture_offset=p_offset; + +} +void VisualServerCanvas::canvas_light_set_color(RID p_light, const Color& p_color){ + + + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->color=p_color; +} +void VisualServerCanvas::canvas_light_set_height(RID p_light, float p_height){ + + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->height=p_height; + +} +void VisualServerCanvas::canvas_light_set_energy(RID p_light, float p_energy){ + + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->energy=p_energy; + +} +void VisualServerCanvas::canvas_light_set_z_range(RID p_light, int p_min_z,int p_max_z){ + + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->z_min=p_min_z; + clight->z_max=p_max_z; + +} +void VisualServerCanvas::canvas_light_set_layer_range(RID p_light, int p_min_layer,int p_max_layer){ + + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + + clight->layer_max=p_max_layer; + clight->layer_min=p_min_layer; + + +} +void VisualServerCanvas::canvas_light_set_item_cull_mask(RID p_light, int p_mask){ + + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->item_mask=p_mask; + +} +void VisualServerCanvas::canvas_light_set_item_shadow_cull_mask(RID p_light, int p_mask){ + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->item_shadow_mask=p_mask; + +} +void VisualServerCanvas::canvas_light_set_mode(RID p_light, VS::CanvasLightMode p_mode){ + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->mode=p_mode; + +} + + +void VisualServerCanvas::canvas_light_set_shadow_enabled(RID p_light, bool p_enabled){ + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + if (clight->shadow_buffer.is_valid()==p_enabled) + return; + if (p_enabled) { + clight->shadow_buffer=VSG::storage->canvas_light_shadow_buffer_create(clight->shadow_buffer_size); + } else { + VSG::storage->free(clight->shadow_buffer); + clight->shadow_buffer=RID(); + } + +} +void VisualServerCanvas::canvas_light_set_shadow_buffer_size(RID p_light, int p_size){ + + ERR_FAIL_COND(p_size<32 || p_size>16384); + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + int new_size = nearest_power_of_2(p_size);; + if (new_size==clight->shadow_buffer_size) + return; + + clight->shadow_buffer_size=nearest_power_of_2(p_size); + + if (clight->shadow_buffer.is_valid()) { + VSG::storage->free(clight->shadow_buffer); + clight->shadow_buffer=VSG::storage->canvas_light_shadow_buffer_create(clight->shadow_buffer_size); + } +} + +void VisualServerCanvas::canvas_light_set_shadow_gradient_length(RID p_light, float p_length) { + + ERR_FAIL_COND(p_length<0); + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->shadow_gradient_length=p_length; + +} +void VisualServerCanvas::canvas_light_set_shadow_filter(RID p_light, VS::CanvasLightShadowFilter p_filter) { + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->shadow_filter=p_filter; + +} +void VisualServerCanvas::canvas_light_set_shadow_color(RID p_light, const Color& p_color) { + + RasterizerCanvas::Light *clight = canvas_light_owner.get(p_light); + ERR_FAIL_COND(!clight); + + clight->shadow_color=p_color; +} + + + +RID VisualServerCanvas::canvas_light_occluder_create() { + + RasterizerCanvas::LightOccluderInstance *occluder = memnew( RasterizerCanvas::LightOccluderInstance ); + + return canvas_light_occluder_owner.make_rid(occluder); +} +void VisualServerCanvas::canvas_light_occluder_attach_to_canvas(RID p_occluder,RID p_canvas){ + + RasterizerCanvas::LightOccluderInstance *occluder = canvas_light_occluder_owner.get(p_occluder); + ERR_FAIL_COND(!occluder); + + if (occluder->canvas.is_valid()) { + + Canvas *canvas = canvas_owner.get(occluder->canvas); + canvas->occluders.erase(occluder); + } + + if (!canvas_owner.owns(p_canvas)) + p_canvas=RID(); + + occluder->canvas=p_canvas; + + if (occluder->canvas.is_valid()) { + + Canvas *canvas = canvas_owner.get(occluder->canvas); + canvas->occluders.insert(occluder); + } +} +void VisualServerCanvas::canvas_light_occluder_set_enabled(RID p_occluder,bool p_enabled) { + + RasterizerCanvas::LightOccluderInstance *occluder = canvas_light_occluder_owner.get(p_occluder); + ERR_FAIL_COND(!occluder); + + occluder->enabled=p_enabled; +} +void VisualServerCanvas::canvas_light_occluder_set_polygon(RID p_occluder,RID p_polygon) { + + RasterizerCanvas::LightOccluderInstance *occluder = canvas_light_occluder_owner.get(p_occluder); + ERR_FAIL_COND(!occluder); + + if (occluder->polygon.is_valid()) { + LightOccluderPolygon *occluder_poly = canvas_light_occluder_polygon_owner.get(p_polygon); + if (occluder_poly) { + occluder_poly->owners.erase(occluder); + } + } + + occluder->polygon=p_polygon; + occluder->polygon_buffer=RID(); + + if (occluder->polygon.is_valid()) { + LightOccluderPolygon *occluder_poly = canvas_light_occluder_polygon_owner.get(p_polygon); + if (!occluder_poly) + occluder->polygon=RID(); + ERR_FAIL_COND(!occluder_poly); + occluder_poly->owners.insert(occluder); + occluder->polygon_buffer=occluder_poly->occluder; + occluder->aabb_cache=occluder_poly->aabb; + occluder->cull_cache=occluder_poly->cull_mode; + } + +} +void VisualServerCanvas::canvas_light_occluder_set_transform(RID p_occluder,const Matrix32& p_xform) { + + RasterizerCanvas::LightOccluderInstance *occluder = canvas_light_occluder_owner.get(p_occluder); + ERR_FAIL_COND(!occluder); + + occluder->xform=p_xform; +} +void VisualServerCanvas::canvas_light_occluder_set_light_mask(RID p_occluder,int p_mask) { + + RasterizerCanvas::LightOccluderInstance *occluder = canvas_light_occluder_owner.get(p_occluder); + ERR_FAIL_COND(!occluder); + + occluder->light_mask=p_mask; +} + +RID VisualServerCanvas::canvas_occluder_polygon_create() { + + LightOccluderPolygon * occluder_poly = memnew( LightOccluderPolygon ); + occluder_poly->occluder=VSG::storage->canvas_light_occluder_create(); + return canvas_light_occluder_polygon_owner.make_rid(occluder_poly); + +} +void VisualServerCanvas::canvas_occluder_polygon_set_shape(RID p_occluder_polygon,const DVector& p_shape,bool p_closed) { + + if (p_shape.size()<3) { + canvas_occluder_polygon_set_shape_as_lines(p_occluder_polygon,p_shape); + return; + } + + DVector lines; + int lc = p_shape.size()*2; + + lines.resize(lc-(p_closed?0:2)); + { + DVector::Write w = lines.write(); + DVector::Read r = p_shape.read(); + + int max=lc/2; + if (!p_closed) { + max--; + } + for(int i=0;i& p_shape) { + + LightOccluderPolygon * occluder_poly = canvas_light_occluder_polygon_owner.get(p_occluder_polygon); + ERR_FAIL_COND(!occluder_poly); + ERR_FAIL_COND(p_shape.size()&1); + + int lc = p_shape.size(); + occluder_poly->aabb=Rect2(); + { + DVector::Read r = p_shape.read(); + for(int i=0;iaabb.pos=r[i]; + else + occluder_poly->aabb.expand_to(r[i]); + } + } + + VSG::storage->canvas_light_occluder_set_polylines(occluder_poly->occluder,p_shape); + for( Set::Element *E=occluder_poly->owners.front();E;E=E->next()) { + E->get()->aabb_cache=occluder_poly->aabb; + } +} + + +void VisualServerCanvas::canvas_occluder_polygon_set_cull_mode(RID p_occluder_polygon,VS::CanvasOccluderPolygonCullMode p_mode) { + + LightOccluderPolygon * occluder_poly = canvas_light_occluder_polygon_owner.get(p_occluder_polygon); + ERR_FAIL_COND(!occluder_poly); + occluder_poly->cull_mode=p_mode; + for( Set::Element *E=occluder_poly->owners.front();E;E=E->next()) { + E->get()->cull_cache=p_mode; + } +} + + + +bool VisualServerCanvas::free(RID p_rid) { + + if (canvas_owner.owns(p_rid)) { + + Canvas *canvas = canvas_owner.get(p_rid); + ERR_FAIL_COND_V(!canvas,false); + + while(canvas->viewports.size()) { + + VisualServerViewport::Viewport *vp = VSG::viewport->viewport_owner.get(canvas->viewports.front()->get()); + ERR_FAIL_COND_V(!vp,true); + + Map::Element *E=vp->canvas_map.find(p_rid); + ERR_FAIL_COND_V(!E,true); + vp->canvas_map.erase(p_rid); + + canvas->viewports.erase( canvas->viewports.front() ); + } + + for (int i=0;ichild_items.size();i++) { + + canvas->child_items[i].item->parent=RID(); + } + + for (Set::Element *E=canvas->lights.front();E;E=E->next()) { + + E->get()->canvas=RID(); + } + + for (Set::Element *E=canvas->occluders.front();E;E=E->next()) { + + E->get()->canvas=RID(); + } + + canvas_owner.free( p_rid ); + + memdelete( canvas ); + + } else if (canvas_item_owner.owns(p_rid)) { + + Item *canvas_item = canvas_item_owner.get(p_rid); + ERR_FAIL_COND_V(!canvas_item,true); + + if (canvas_item->parent.is_valid()) { + + if (canvas_owner.owns(canvas_item->parent)) { + + Canvas *canvas = canvas_owner.get(canvas_item->parent); + canvas->erase_item(canvas_item); + } else if (canvas_item_owner.owns(canvas_item->parent)) { + + Item *item_owner = canvas_item_owner.get(canvas_item->parent); + item_owner->child_items.erase(canvas_item); + + } + } + + for (int i=0;ichild_items.size();i++) { + + canvas_item->child_items[i]->parent=RID(); + } + +// if (canvas_item->material) { +// canvas_item->material->owners.erase(canvas_item); +// } + + canvas_item_owner.free( p_rid ); + + memdelete( canvas_item ); + + } else if (canvas_light_owner.owns(p_rid)) { + + RasterizerCanvas::Light *canvas_light = canvas_light_owner.get(p_rid); + ERR_FAIL_COND_V(!canvas_light,true); + + if (canvas_light->canvas.is_valid()) { + Canvas* canvas = canvas_owner.get(canvas_light->canvas); + if (canvas) + canvas->lights.erase(canvas_light); + } + + if (canvas_light->shadow_buffer.is_valid()) + VSG::storage->free(canvas_light->shadow_buffer); + + VSG::canvas_render->light_internal_free(canvas_light->light_internal); + + canvas_light_owner.free( p_rid ); + memdelete( canvas_light ); + + } else if (canvas_light_occluder_owner.owns(p_rid)) { + + RasterizerCanvas::LightOccluderInstance *occluder = canvas_light_occluder_owner.get(p_rid); + ERR_FAIL_COND_V(!occluder,true); + + if (occluder->polygon.is_valid()) { + + LightOccluderPolygon *occluder_poly = canvas_light_occluder_polygon_owner.get(occluder->polygon); + if (occluder_poly) { + occluder_poly->owners.erase(occluder); + } + + } + + if (occluder->canvas.is_valid() && canvas_owner.owns(occluder->canvas)) { + + Canvas *canvas = canvas_owner.get(occluder->canvas); + canvas->occluders.erase(occluder); + + } + + canvas_light_occluder_owner.free( p_rid ); + memdelete(occluder); + + } else if (canvas_light_occluder_polygon_owner.owns(p_rid)) { + + LightOccluderPolygon *occluder_poly = canvas_light_occluder_polygon_owner.get(p_rid); + ERR_FAIL_COND_V(!occluder_poly,true); + VSG::storage->free(occluder_poly->occluder); + + while(occluder_poly->owners.size()) { + + occluder_poly->owners.front()->get()->polygon=RID(); + occluder_poly->owners.erase( occluder_poly->owners.front() ); + } + + canvas_light_occluder_polygon_owner.free( p_rid ); + memdelete(occluder_poly); + } else { + return false; + } + + return true; +} + + + +VisualServerCanvas::VisualServerCanvas() +{ + +} diff --git a/servers/visual/visual_server_canvas.h b/servers/visual/visual_server_canvas.h new file mode 100644 index 00000000000..b5412ed6080 --- /dev/null +++ b/servers/visual/visual_server_canvas.h @@ -0,0 +1,214 @@ +#ifndef VISUALSERVERCANVAS_H +#define VISUALSERVERCANVAS_H + +#include "rasterizer.h" +#include "visual_server_viewport.h" + +class VisualServerCanvas { +public: + + + struct Item : public RasterizerCanvas::Item { + + + RID parent; // canvas it belongs to + List::Element *E; + int z; + bool z_relative; + bool sort_y; + Color modulate; + Color self_modulate; + bool use_parent_material; + int index; + bool children_order_dirty; + + + Vector child_items; + + + Item() { + children_order_dirty=true; + E=NULL; + z=0; + modulate=Color(1,1,1,1); + self_modulate=Color(1,1,1,1); + sort_y=false; + use_parent_material=false; + z_relative=true; + index=0; + } + }; + + + struct ItemPtrSort { + + _FORCE_INLINE_ bool operator()(const Item* p_left,const Item* p_right) const { + + return p_left->xform.elements[2].y < p_right->xform.elements[2].y; + } + }; + + struct LightOccluderPolygon : RID_Data { + + bool active; + Rect2 aabb; + VS::CanvasOccluderPolygonCullMode cull_mode; + RID occluder; + Set owners; + + LightOccluderPolygon() { active=false; cull_mode=VS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED; } + }; + + + RID_Owner canvas_light_occluder_polygon_owner; + + RID_Owner canvas_light_occluder_owner; + + struct Canvas : public VisualServerViewport::CanvasBase { + + Set viewports; + struct ChildItem { + + Point2 mirror; + Item *item; + bool operator<(const ChildItem& p_item) const { + return item->index < p_item.item->index; + } + }; + + + + Set lights; + + Set occluders; + + bool children_order_dirty; + Vector child_items; + Color modulate; + + int find_item(Item *p_item) { + for(int i=0;i=0) + child_items.remove(idx); + } + + + Canvas() { modulate=Color(1,1,1,1); children_order_dirty=true; } + + }; + + + RID_Owner canvas_owner; + RID_Owner canvas_item_owner; + RID_Owner canvas_light_owner; + +private: + + void _render_canvas_item_tree(Item *p_canvas_item, const Matrix32& p_transform, const Rect2& p_clip_rect, const Color &p_modulate, RasterizerCanvas::Light *p_lights); + void _render_canvas_item(Item *p_canvas_item, const Matrix32& p_transform, const Rect2& p_clip_rect, const Color &p_modulate, int p_z, RasterizerCanvas::Item **z_list, RasterizerCanvas::Item **z_last_list, Item *p_canvas_clip, Item *p_material_owner); + void _light_mask_canvas_items(int p_z,RasterizerCanvas::Item *p_canvas_item,RasterizerCanvas::Light *p_masked_lights); +public: + + void render_canvas(Canvas *p_canvas, const Matrix32 &p_transform, RasterizerCanvas::Light *p_lights, RasterizerCanvas::Light *p_masked_lights,const Rect2& p_clip_rect); + + RID canvas_create(); + void canvas_set_item_mirroring(RID p_canvas,RID p_item,const Point2& p_mirroring); + void canvas_set_modulate(RID p_canvas,const Color& p_color); + + + RID canvas_item_create(); + void canvas_item_set_parent(RID p_item,RID p_parent); + + void canvas_item_set_visible(RID p_item,bool p_visible); + void canvas_item_set_light_mask(RID p_item,int p_mask); + + void canvas_item_set_transform(RID p_item, const Matrix32& p_transform); + void canvas_item_set_clip(RID p_item, bool p_clip); + void canvas_item_set_distance_field_mode(RID p_item, bool p_enable); + void canvas_item_set_custom_rect(RID p_item, bool p_custom_rect,const Rect2& p_rect=Rect2()); + void canvas_item_set_modulate(RID p_item, const Color& p_color); + void canvas_item_set_self_modulate(RID p_item, const Color& p_color); + + void canvas_item_set_draw_behind_parent(RID p_item, bool p_enable); + + + void canvas_item_add_line(RID p_item, const Point2& p_from, const Point2& p_to,const Color& p_color,float p_width=1.0,bool p_antialiased=false); + void canvas_item_add_rect(RID p_item, const Rect2& p_rect, const Color& p_color); + void canvas_item_add_circle(RID p_item, const Point2& p_pos, float p_radius,const Color& p_color); + void canvas_item_add_texture_rect(RID p_item, const Rect2& p_rect, RID p_texture,bool p_tile=false,const Color& p_modulate=Color(1,1,1),bool p_transpose=false); + void canvas_item_add_texture_rect_region(RID p_item, const Rect2& p_rect, RID p_texture,const Rect2& p_src_rect,const Color& p_modulate=Color(1,1,1),bool p_transpose=false); + void canvas_item_add_nine_patch(RID p_item, const Rect2& p_rect, const Rect2& p_source, RID p_texture,const Vector2& p_topleft, const Vector2& p_bottomright,VS::NinePatchAxisMode p_x_axis_mode=VS::NINE_PATCH_STRETCH, VS::NinePatchAxisMode p_y_axis_mode=VS::NINE_PATCH_STRETCH,bool p_draw_center=true,const Color& p_modulate=Color(1,1,1)); + void canvas_item_add_primitive(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture,float p_width=1.0); + void canvas_item_add_polygon(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs=Vector(), RID p_texture=RID()); + void canvas_item_add_triangle_array(RID p_item, const Vector& p_indices, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs=Vector(), RID p_texture=RID(), int p_count=-1); + void canvas_item_add_mesh(RID p_item, const RID& p_mesh,RID p_skeleton=RID()); + void canvas_item_add_multimesh(RID p_item, RID p_mesh,RID p_skeleton=RID()); + void canvas_item_add_set_transform(RID p_item,const Matrix32& p_transform); + void canvas_item_add_clip_ignore(RID p_item, bool p_ignore); + void canvas_item_set_sort_children_by_y(RID p_item, bool p_enable); + void canvas_item_set_z(RID p_item, int p_z); + void canvas_item_set_z_as_relative_to_parent(RID p_item, bool p_enable); + void canvas_item_set_copy_to_backbuffer(RID p_item, bool p_enable,const Rect2& p_rect); + + void canvas_item_clear(RID p_item); + void canvas_item_set_draw_index(RID p_item, int p_index); + + void canvas_item_set_material(RID p_item, RID p_material); + + void canvas_item_set_use_parent_material(RID p_item, bool p_enable); + + RID canvas_light_create(); + void canvas_light_attach_to_canvas(RID p_light,RID p_canvas); + void canvas_light_set_enabled(RID p_light, bool p_enabled); + void canvas_light_set_scale(RID p_light, float p_scale); + void canvas_light_set_transform(RID p_light, const Matrix32& p_transform); + void canvas_light_set_texture(RID p_light, RID p_texture); + void canvas_light_set_texture_offset(RID p_light, const Vector2& p_offset); + void canvas_light_set_color(RID p_light, const Color& p_color); + void canvas_light_set_height(RID p_light, float p_height); + void canvas_light_set_energy(RID p_light, float p_energy); + void canvas_light_set_z_range(RID p_light, int p_min_z,int p_max_z); + void canvas_light_set_layer_range(RID p_light, int p_min_layer,int p_max_layer); + void canvas_light_set_item_cull_mask(RID p_light, int p_mask); + void canvas_light_set_item_shadow_cull_mask(RID p_light, int p_mask); + + void canvas_light_set_mode(RID p_light, VS::CanvasLightMode p_mode); + + + void canvas_light_set_shadow_enabled(RID p_light, bool p_enabled); + void canvas_light_set_shadow_buffer_size(RID p_light, int p_size); + void canvas_light_set_shadow_gradient_length(RID p_light, float p_length); + void canvas_light_set_shadow_filter(RID p_light, VS::CanvasLightShadowFilter p_filter); + void canvas_light_set_shadow_color(RID p_light, const Color& p_color); + + + + RID canvas_light_occluder_create(); + void canvas_light_occluder_attach_to_canvas(RID p_occluder,RID p_canvas); + void canvas_light_occluder_set_enabled(RID p_occluder,bool p_enabled); + void canvas_light_occluder_set_polygon(RID p_occluder,RID p_polygon); + void canvas_light_occluder_set_transform(RID p_occluder,const Matrix32& p_xform); + void canvas_light_occluder_set_light_mask(RID p_occluder,int p_mask); + + RID canvas_occluder_polygon_create(); + void canvas_occluder_polygon_set_shape(RID p_occluder_polygon,const DVector& p_shape,bool p_closed); + void canvas_occluder_polygon_set_shape_as_lines(RID p_occluder_polygon,const DVector& p_shape); + + + void canvas_occluder_polygon_set_cull_mode(RID p_occluder_polygon,VS::CanvasOccluderPolygonCullMode p_mode); + + + bool free(RID p_rid); + VisualServerCanvas(); + + +}; + +#endif // VISUALSERVERCANVAS_H diff --git a/servers/visual/visual_server_global.cpp b/servers/visual/visual_server_global.cpp new file mode 100644 index 00000000000..32b9710f42f --- /dev/null +++ b/servers/visual/visual_server_global.cpp @@ -0,0 +1,10 @@ +#include "visual_server_global.h" + +RasterizerStorage *VisualServerGlobals::storage=NULL; +RasterizerCanvas *VisualServerGlobals::canvas_render=NULL; +RasterizerScene *VisualServerGlobals::scene_render=NULL; +Rasterizer *VisualServerGlobals::rasterizer=NULL; + +VisualServerCanvas *VisualServerGlobals::canvas=NULL; +VisualServerViewport *VisualServerGlobals::viewport=NULL; +VisualServerScene *VisualServerGlobals::scene=NULL; diff --git a/servers/visual/visual_server_global.h b/servers/visual/visual_server_global.h new file mode 100644 index 00000000000..d413334ac42 --- /dev/null +++ b/servers/visual/visual_server_global.h @@ -0,0 +1,26 @@ +#ifndef VISUALSERVERGLOBAL_H +#define VISUALSERVERGLOBAL_H + +#include "rasterizer.h" + +class VisualServerCanvas; +class VisualServerViewport; +class VisualServerScene; + +class VisualServerGlobals +{ +public: + + static RasterizerStorage *storage; + static RasterizerCanvas *canvas_render; + static RasterizerScene *scene_render; + static Rasterizer *rasterizer; + + static VisualServerCanvas *canvas; + static VisualServerViewport *viewport; + static VisualServerScene *scene; +}; + +#define VSG VisualServerGlobals + +#endif // VISUALSERVERGLOBAL_H diff --git a/servers/visual/visual_server_raster.cpp b/servers/visual/visual_server_raster.cpp index 8d228ad859d..168afac77fd 100644 --- a/servers/visual/visual_server_raster.cpp +++ b/servers/visual/visual_server_raster.cpp @@ -32,8 +32,1280 @@ #include "default_mouse_cursor.xpm" #include "sort.h" #include "io/marshalls.h" +#include "visual_server_canvas.h" +#include "visual_server_global.h" + // careful, these may run in different threads than the visual server +RID VisualServerRaster::texture_create(){ + + return VSG::storage->texture_create(); +} +void VisualServerRaster::texture_allocate(RID p_texture,int p_width, int p_height,Image::Format p_format,uint32_t p_flags){ + + VSG::storage->texture_allocate(p_texture,p_width,p_height,p_format,p_flags); +} +void VisualServerRaster::texture_set_data(RID p_texture,const Image& p_image,CubeMapSide p_cube_side){ + + VSG::storage->texture_set_data(p_texture,p_image,p_cube_side); +} +Image VisualServerRaster::texture_get_data(RID p_texture,CubeMapSide p_cube_side) const{ + + return VSG::storage->texture_get_data(p_texture,p_cube_side); +} +void VisualServerRaster::texture_set_flags(RID p_texture,uint32_t p_flags) { + + VSG::storage->texture_set_flags(p_texture,p_flags); +} +uint32_t VisualServerRaster::texture_get_flags(RID p_texture) const{ + + return VSG::storage->texture_get_flags(p_texture); +} +Image::Format VisualServerRaster::texture_get_format(RID p_texture) const{ + + return VSG::storage->texture_get_format(p_texture); +} +uint32_t VisualServerRaster::texture_get_width(RID p_texture) const{ + + return VSG::storage->texture_get_width(p_texture); +} +uint32_t VisualServerRaster::texture_get_height(RID p_texture) const{ + + return VSG::storage->texture_get_height(p_texture); +} +void VisualServerRaster::texture_set_size_override(RID p_texture,int p_width, int p_height){ + VSG::storage->texture_set_size_override(p_texture,p_width,p_height); + +} + +void VisualServerRaster::texture_set_path(RID p_texture,const String& p_path){ + + VSG::storage->texture_set_path(p_texture,p_path); +} + +String VisualServerRaster::texture_get_path(RID p_texture) const{ + + return VSG::storage->texture_get_path(p_texture); +} + +void VisualServerRaster::texture_set_shrink_all_x2_on_set_data(bool p_enable){ + + VSG::storage->texture_set_shrink_all_x2_on_set_data(p_enable); +} + +void VisualServerRaster::texture_debug_usage(List *r_info){ + + VSG::storage->texture_debug_usage(r_info); +} + + +/* SHADER API */ + + +RID VisualServerRaster::shader_create(ShaderMode p_mode){ + + return VSG::storage->shader_create(p_mode); +} + +void VisualServerRaster::shader_set_mode(RID p_shader,ShaderMode p_mode){ + + VSG::storage->shader_set_mode(p_shader,p_mode); +} +VisualServerRaster::ShaderMode VisualServerRaster::shader_get_mode(RID p_shader) const{ + + return VSG::storage->shader_get_mode(p_shader); +} + +void VisualServerRaster::shader_set_code(RID p_shader, const String& p_code){ + + VSG::storage->shader_set_code(p_shader,p_code); +} +String VisualServerRaster::shader_get_code(RID p_shader) const{ + + return VSG::storage->shader_get_code(p_shader); +} +void VisualServerRaster::shader_get_param_list(RID p_shader, List *p_param_list) const{ + + VSG::storage->shader_get_param_list(p_shader,p_param_list); +} + +void VisualServerRaster::shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture){ + + VSG::storage->shader_set_default_texture_param(p_shader,p_name,p_texture); +} +RID VisualServerRaster::shader_get_default_texture_param(RID p_shader, const StringName& p_name) const{ + + return VSG::storage->shader_get_default_texture_param(p_shader,p_name); +} + + +/* COMMON MATERIAL API */ + +RID VisualServerRaster::material_create(){ + + return VSG::storage->material_create(); +} + +void VisualServerRaster::material_set_shader(RID p_shader_material, RID p_shader){ + + VSG::storage->material_set_shader(p_shader_material,p_shader); +} +RID VisualServerRaster::material_get_shader(RID p_shader_material) const{ + + return VSG::storage->material_get_shader(p_shader_material); +} + +void VisualServerRaster::material_set_param(RID p_material, const StringName& p_param, const Variant& p_value){ + + VSG::storage->material_set_param(p_material,p_param,p_value); +} +Variant VisualServerRaster::material_get_param(RID p_material, const StringName& p_param) const{ + + return VSG::storage->material_get_param(p_material,p_param); +} + +/* MESH API */ + +RID VisualServerRaster::mesh_create(){ + + return VSG::storage->mesh_create(); +} + +void VisualServerRaster::mesh_add_surface(RID p_mesh,uint32_t p_format,PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes){ + + VSG::storage->mesh_add_surface(p_mesh,p_format,p_primitive,p_array,p_vertex_count,p_index_array,p_index_count,p_blend_shapes); +} + +void VisualServerRaster::mesh_set_morph_target_count(RID p_mesh,int p_amount){ + + VSG::storage->mesh_set_morph_target_count(p_mesh,p_amount); +} +int VisualServerRaster::mesh_get_morph_target_count(RID p_mesh) const{ + + return VSG::storage->mesh_get_morph_target_count(p_mesh); +} + + +void VisualServerRaster::mesh_set_morph_target_mode(RID p_mesh,MorphTargetMode p_mode){ + + VSG::storage->mesh_set_morph_target_mode(p_mesh,p_mode); +} +VisualServerRaster::MorphTargetMode VisualServerRaster::mesh_get_morph_target_mode(RID p_mesh) const{ + + return VSG::storage->mesh_get_morph_target_mode(p_mesh); +} + +void VisualServerRaster::mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material){ + + VSG::storage->mesh_surface_set_material(p_mesh,p_surface,p_material); +} +RID VisualServerRaster::mesh_surface_get_material(RID p_mesh, int p_surface) const{ + + return VSG::storage->mesh_surface_get_material(p_mesh,p_surface); +} + +int VisualServerRaster::mesh_surface_get_array_len(RID p_mesh, int p_surface) const{ + + return VSG::storage->mesh_surface_get_array_len(p_mesh,p_surface); +} +int VisualServerRaster::mesh_surface_get_array_index_len(RID p_mesh, int p_surface) const{ + + return VSG::storage->mesh_surface_get_array_index_len(p_mesh,p_surface); +} + +DVector VisualServerRaster::mesh_surface_get_array(RID p_mesh, int p_surface) const{ + + return VSG::storage->mesh_surface_get_array(p_mesh,p_surface); +} +DVector VisualServerRaster::mesh_surface_get_index_array(RID p_mesh, int p_surface) const{ + + return VSG::storage->mesh_surface_get_index_array(p_mesh,p_surface); +} + + +uint32_t VisualServerRaster::mesh_surface_get_format(RID p_mesh, int p_surface) const{ + + return VSG::storage->mesh_surface_get_format(p_mesh,p_surface); +} +VisualServerRaster::PrimitiveType VisualServerRaster::mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const{ + + return VSG::storage->mesh_surface_get_primitive_type(p_mesh,p_surface); +} + +void VisualServerRaster::mesh_remove_surface(RID p_mesh,int p_index){ + + VSG::storage->mesh_remove_surface(p_mesh,p_index); +} +int VisualServerRaster::mesh_get_surface_count(RID p_mesh) const{ + + return VSG::storage->mesh_get_surface_count(p_mesh); +} + +void VisualServerRaster::mesh_set_custom_aabb(RID p_mesh,const AABB& p_aabb){ + + VSG::storage->mesh_set_custom_aabb(p_mesh,p_aabb); +} +AABB VisualServerRaster::mesh_get_custom_aabb(RID p_mesh) const{ + + return VSG::storage->mesh_get_custom_aabb(p_mesh); +} + +void VisualServerRaster::mesh_clear(RID p_mesh){ + + VSG::storage->mesh_clear(p_mesh); +} + +/* MULTIMESH API */ + + +RID VisualServerRaster::multimesh_create() { + + return VSG::storage->multimesh_create(); +} + +void VisualServerRaster::multimesh_allocate(RID p_multimesh,int p_instances,MultimeshTransformFormat p_transform_format,MultimeshColorFormat p_color_format,bool p_gen_aabb){ + + VSG::storage->multimesh_allocate(p_multimesh,p_instances,p_transform_format,p_color_format,p_gen_aabb); +} +int VisualServerRaster::multimesh_get_instance_count(RID p_multimesh) const{ + + return VSG::storage->multimesh_get_instance_count(p_multimesh); +} + +void VisualServerRaster::multimesh_set_mesh(RID p_multimesh,RID p_mesh){ + + VSG::storage->multimesh_set_mesh(p_multimesh,p_mesh); +} +void VisualServerRaster::multimesh_set_custom_aabb(RID p_multimesh,const AABB& p_aabb){ + + VSG::storage->multimesh_set_custom_aabb(p_multimesh,p_aabb); +} +void VisualServerRaster::multimesh_instance_set_transform(RID p_multimesh,int p_index,const Transform& p_transform){ + + VSG::storage->multimesh_instance_set_transform(p_multimesh,p_index,p_transform); +} +void VisualServerRaster::multimesh_instance_set_transform_2d(RID p_multimesh,int p_index,const Matrix32& p_transform){ + + VSG::storage->multimesh_instance_set_transform_2d(p_multimesh,p_index,p_transform); +} +void VisualServerRaster::multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color){ + + VSG::storage->multimesh_instance_set_color(p_multimesh,p_index,p_color); +} + +RID VisualServerRaster::multimesh_get_mesh(RID p_multimesh) const{ + + return VSG::storage->multimesh_get_mesh(p_multimesh); +} +AABB VisualServerRaster::multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const{ + + return VSG::storage->multimesh_get_custom_aabb(p_multimesh,p_aabb); +} + +Transform VisualServerRaster::multimesh_instance_get_transform(RID p_multimesh,int p_index) const{ + + return VSG::storage->multimesh_instance_get_transform(p_multimesh,p_index); +} +Matrix32 VisualServerRaster::multimesh_instance_get_transform_2d(RID p_multimesh,int p_index) const{ + + return VSG::storage->multimesh_instance_get_transform_2d(p_multimesh,p_index); +} +Color VisualServerRaster::multimesh_instance_get_color(RID p_multimesh,int p_index) const{ + + return VSG::storage->multimesh_instance_get_color(p_multimesh,p_index); +} + +void VisualServerRaster::multimesh_set_visible_instances(RID p_multimesh,int p_visible){ + + VSG::storage->multimesh_set_visible_instances(p_multimesh,p_visible); +} +int VisualServerRaster::multimesh_get_visible_instances(RID p_multimesh) const{ + + return VSG::storage->multimesh_get_visible_instances(p_multimesh); +} + + +/* IMMEDIATE API */ + +RID VisualServerRaster::immediate_create(){ + + return VSG::storage->immediate_create(); +} +void VisualServerRaster::immediate_begin(RID p_immediate,PrimitiveType p_rimitive,RID p_texture){ + + VSG::storage->immediate_begin(p_immediate,p_rimitive,p_texture); +} +void VisualServerRaster::immediate_vertex(RID p_immediate,const Vector3& p_vertex){ + + VSG::storage->immediate_vertex(p_immediate,p_vertex); +} +void VisualServerRaster::immediate_vertex_2d(RID p_immediate,const Vector3& p_vertex){ + + VSG::storage->immediate_vertex_2d(p_immediate,p_vertex); + +} +void VisualServerRaster::immediate_normal(RID p_immediate,const Vector3& p_normal){ + + VSG::storage->immediate_normal(p_immediate,p_normal); + +} +void VisualServerRaster::immediate_tangent(RID p_immediate,const Plane& p_tangent){ + + VSG::storage->immediate_tangent(p_immediate,p_tangent); +} +void VisualServerRaster::immediate_color(RID p_immediate,const Color& p_color){ + + VSG::storage->immediate_color(p_immediate,p_color); + +} +void VisualServerRaster::immediate_uv(RID p_immediate,const Vector2& tex_uv){ + + VSG::storage->immediate_uv(p_immediate,tex_uv); + +} +void VisualServerRaster::immediate_uv2(RID p_immediate,const Vector2& tex_uv){ + + VSG::storage->immediate_uv2(p_immediate,tex_uv); + +} +void VisualServerRaster::immediate_end(RID p_immediate){ + + VSG::storage->immediate_end(p_immediate); + +} +void VisualServerRaster::immediate_clear(RID p_immediate){ + + VSG::storage->immediate_clear(p_immediate); + +} +void VisualServerRaster::immediate_set_material(RID p_immediate,RID p_material){ + + VSG::storage->immediate_set_material(p_immediate,p_material); + +} +RID VisualServerRaster::immediate_get_material(RID p_immediate) const{ + + return VSG::storage->immediate_get_material(p_immediate); +} + +/* SKELETON API */ + +RID VisualServerRaster::skeleton_create(){ + + return VSG::storage->skeleton_create(); +} +void VisualServerRaster::skeleton_allocate(RID p_skeleton,int p_bones,bool p_2d_skeleton){ + + VSG::storage->skeleton_allocate(p_skeleton,p_bones,p_2d_skeleton); +} +int VisualServerRaster::skeleton_get_bone_count(RID p_skeleton) const{ + + return VSG::storage->skeleton_get_bone_count(p_skeleton); +} +void VisualServerRaster::skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform){ + + VSG::storage->skeleton_bone_set_transform(p_skeleton,p_bone,p_transform); +} +Transform VisualServerRaster::skeleton_bone_get_transform(RID p_skeleton,int p_bone){ + + return VSG::storage->skeleton_bone_get_transform(p_skeleton,p_bone); +} +void VisualServerRaster::skeleton_bone_set_transform_2d(RID p_skeleton,int p_bone, const Matrix32& p_transform){ + + VSG::storage->skeleton_bone_set_transform_2d(p_skeleton,p_bone,p_transform); +} +Matrix32 VisualServerRaster::skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone){ + + return VSG::storage->skeleton_bone_get_transform_2d(p_skeleton,p_bone); +} + +/* Light API */ + +RID VisualServerRaster::light_create(LightType p_type){ + + return VSG::storage->light_create(p_type); +} + +void VisualServerRaster::light_set_color(RID p_light,const Color& p_color){ + + VSG::storage->light_set_color(p_light,p_color); +} +void VisualServerRaster::light_set_param(RID p_light,LightParam p_param,float p_value){ + + VSG::storage->light_set_param(p_light,p_param,p_value); +} +void VisualServerRaster::light_set_shadow(RID p_light,bool p_enabled){ + + VSG::storage->light_set_shadow(p_light,p_enabled); +} +void VisualServerRaster::light_set_projector(RID p_light,RID p_texture){ + + VSG::storage->light_set_projector(p_light,p_texture); +} +void VisualServerRaster::light_set_attenuation_texure(RID p_light,RID p_texture){ + + VSG::storage->light_set_attenuation_texure(p_light,p_texture); +} +void VisualServerRaster::light_set_negative(RID p_light,bool p_enable){ + + VSG::storage->light_set_negative(p_light,p_enable); +} +void VisualServerRaster::light_set_cull_mask(RID p_light,uint32_t p_mask){ + + VSG::storage->light_set_cull_mask(p_light,p_mask); +} +void VisualServerRaster::light_set_shader(RID p_light,RID p_shader){ + + VSG::storage->light_set_shader(p_light,p_shader); +} + + +void VisualServerRaster::light_directional_set_shadow_mode(RID p_light,LightDirectionalShadowMode p_mode){ + + VSG::storage->light_directional_set_shadow_mode(p_light,p_mode); +} + +/* PROBE API */ + +RID VisualServerRaster::reflection_probe_create(){ + + return VSG::storage->reflection_probe_create(); +} + +void VisualServerRaster::reflection_probe_set_intensity(RID p_probe, float p_intensity){ + + VSG::storage->reflection_probe_set_intensity(p_probe,p_intensity); +} +void VisualServerRaster::reflection_probe_set_clip(RID p_probe, float p_near, float p_far){ + + VSG::storage->reflection_probe_set_clip(p_probe,p_near,p_far); +} +void VisualServerRaster::reflection_probe_set_min_blend_distance(RID p_probe, float p_distance){ + + VSG::storage->reflection_probe_set_min_blend_distance(p_probe,p_distance); +} +void VisualServerRaster::reflection_probe_set_extents(RID p_probe, const Vector3& p_extents){ + + VSG::storage->reflection_probe_set_extents(p_probe,p_extents); +} +void VisualServerRaster::reflection_probe_set_origin_offset(RID p_probe, const Vector3& p_offset){ + + VSG::storage->reflection_probe_set_origin_offset(p_probe,p_offset); +} +void VisualServerRaster::reflection_probe_set_enable_parallax_correction(RID p_probe, bool p_enable){ + + VSG::storage->reflection_probe_set_enable_parallax_correction(p_probe,p_enable); +} +void VisualServerRaster::reflection_probe_set_resolution(RID p_probe, int p_resolution){ + + VSG::storage->reflection_probe_set_resolution(p_probe,p_resolution); +} +void VisualServerRaster::reflection_probe_set_hide_skybox(RID p_probe, bool p_hide){ + + VSG::storage->reflection_probe_set_hide_skybox(p_probe,p_hide); +} +void VisualServerRaster::reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers){ + + VSG::storage->reflection_probe_set_cull_mask(p_probe,p_layers); +} + + +/* ROOM API */ + +RID VisualServerRaster::room_create(){ + + return VSG::storage->room_create(); +} +void VisualServerRaster::room_add_bounds(RID p_room, const DVector& p_convex_polygon,float p_height,const Transform& p_transform){ + + VSG::storage->room_add_bounds(p_room,p_convex_polygon,p_height,p_transform); +} +void VisualServerRaster::room_clear_bounds(){ + + VSG::storage->room_clear_bounds(); +} + +/* PORTAL API */ + +// portals are only (x/y) points, forming a convex shape, which its clockwise +// order points outside. (z is 0); + +RID VisualServerRaster::portal_create(){ + + return VSG::storage->portal_create(); +} +void VisualServerRaster::portal_set_shape(RID p_portal, const Vector& p_shape){ + + VSG::storage->portal_set_shape(p_portal,p_shape); +} +void VisualServerRaster::portal_set_enabled(RID p_portal, bool p_enabled) { + + VSG::storage->portal_set_enabled(p_portal,p_enabled); +} +void VisualServerRaster::portal_set_disable_distance(RID p_portal, float p_distance){ + + VSG::storage->portal_set_disable_distance(p_portal,p_distance); +} +void VisualServerRaster::portal_set_disabled_color(RID p_portal, const Color& p_color){ + + VSG::storage->portal_set_disabled_color(p_portal,p_color); +} + +/* CAMERA API */ + +RID VisualServerRaster::camera_create() { + + return RID(); +} +void VisualServerRaster::camera_set_perspective(RID p_camera,float p_fovy_degrees, float p_z_near, float p_z_far) { + +} +void VisualServerRaster::camera_set_orthogonal(RID p_camera,float p_size, float p_z_near, float p_z_far){ + +} +void VisualServerRaster::camera_set_transform(RID p_camera,const Transform& p_transform) { + +} +void VisualServerRaster::camera_set_cull_mask(RID p_camera,uint32_t p_layers){ + +} +void VisualServerRaster::camera_set_environment(RID p_camera,RID p_env){ + +} +void VisualServerRaster::camera_set_use_vertical_aspect(RID p_camera,bool p_enable){ + +} + + +/* VIEWPORT TARGET API */ + +RID VisualServerRaster::viewport_create(){ + + return VSG::viewport->viewport_create(); +} + +void VisualServerRaster::viewport_set_size(RID p_viewport,int p_width,int p_height){ + + VSG::viewport->viewport_set_size(p_viewport,p_width,p_height); +} + +void VisualServerRaster::viewport_set_active(RID p_viewport,bool p_active) { + + VSG::viewport->viewport_set_active(p_viewport,p_active); +} + +void VisualServerRaster::viewport_set_clear_mode(RID p_viewport,ViewportClearMode p_clear_mode) { + + VSG::viewport->viewport_set_clear_mode(p_viewport,p_clear_mode); +} + +void VisualServerRaster::viewport_attach_to_screen(RID p_viewport,const Rect2& p_rect,int p_screen){ + + VSG::viewport->viewport_attach_to_screen(p_viewport,p_rect,p_screen); + +} +void VisualServerRaster::viewport_detach(RID p_viewport){ + + VSG::viewport->viewport_detach(p_viewport); + +} + +void VisualServerRaster::viewport_set_update_mode(RID p_viewport,ViewportUpdateMode p_mode){ + + VSG::viewport->viewport_set_update_mode(p_viewport,p_mode); + +} +void VisualServerRaster::viewport_set_vflip(RID p_viewport,bool p_enable){ + + VSG::viewport->viewport_set_vflip(p_viewport,p_enable); + +} + +RID VisualServerRaster::viewport_get_texture(RID p_viewport) const{ + + return VSG::viewport->viewport_get_texture(p_viewport); + +} +Image VisualServerRaster::viewport_capture(RID p_viewport) const{ + + return VSG::viewport->viewport_capture(p_viewport); +} + +void VisualServerRaster::viewport_set_hide_scenario(RID p_viewport,bool p_hide){ + + return VSG::viewport->viewport_set_hide_scenario(p_viewport,p_hide); +} +void VisualServerRaster::viewport_set_hide_canvas(RID p_viewport,bool p_hide){ + + return VSG::viewport->viewport_set_hide_canvas(p_viewport,p_hide); + +} +void VisualServerRaster::viewport_set_disable_environment(RID p_viewport,bool p_disable){ + + return VSG::viewport->viewport_set_disable_environment(p_viewport,p_disable); + +} + +void VisualServerRaster::viewport_attach_camera(RID p_viewport,RID p_camera){ + + return VSG::viewport->viewport_attach_camera(p_viewport,p_camera); + +} +void VisualServerRaster::viewport_set_scenario(RID p_viewport,RID p_scenario){ + + return VSG::viewport->viewport_set_scenario(p_viewport,p_scenario); + +} +void VisualServerRaster::viewport_attach_canvas(RID p_viewport,RID p_canvas){ + + return VSG::viewport->viewport_attach_canvas(p_viewport,p_canvas); + +} +void VisualServerRaster::viewport_remove_canvas(RID p_viewport,RID p_canvas){ + + return VSG::viewport->viewport_remove_canvas(p_viewport,p_canvas); + +} +void VisualServerRaster::viewport_set_canvas_transform(RID p_viewport,RID p_canvas,const Matrix32& p_offset){ + + return VSG::viewport->viewport_set_canvas_transform(p_viewport,p_canvas,p_offset); + +} +void VisualServerRaster::viewport_set_transparent_background(RID p_viewport,bool p_enabled){ + + return VSG::viewport->viewport_set_transparent_background(p_viewport,p_enabled); + +} + +void VisualServerRaster::viewport_set_global_canvas_transform(RID p_viewport,const Matrix32& p_transform){ + + return VSG::viewport->viewport_set_global_canvas_transform(p_viewport,p_transform); + +} +void VisualServerRaster::viewport_set_canvas_layer(RID p_viewport,RID p_canvas,int p_layer){ + + return VSG::viewport->viewport_set_canvas_layer(p_viewport,p_canvas,p_layer); + +} + + +/* ENVIRONMENT API */ + +RID VisualServerRaster::environment_create(){ + + return RID(); +} + +void VisualServerRaster::environment_set_background(RID p_env,EnvironmentBG p_bg){ + +} +void VisualServerRaster::environment_set_skybox(RID p_env,RID p_skybox,float p_energy){ + +} +void VisualServerRaster::environment_set_bg_color(RID p_env,const Color& p_color){ + +} +void VisualServerRaster::environment_set_canvas_max_layer(RID p_env,int p_max_layer){ + +} +void VisualServerRaster::environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy){ + +} + +void VisualServerRaster::environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,EnvironmentGlowBlendMode p_blend_mode){ + +} +void VisualServerRaster::environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture){ + +} + +void VisualServerRaster::environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,EnvironmentToneMapper p_tone_mapper){ + +} +void VisualServerRaster::environment_set_brightness(RID p_env,bool p_enable,float p_brightness){ + +} +void VisualServerRaster::environment_set_contrast(RID p_env,bool p_enable,float p_contrast){ + +} +void VisualServerRaster::environment_set_saturation(RID p_env,bool p_enable,float p_saturation){ + +} +void VisualServerRaster::environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp){ + +} + + +/* SCENARIO API */ + + +RID VisualServerRaster::scenario_create() { + + return RID(); +} + +void VisualServerRaster::scenario_set_debug(RID p_scenario,ScenarioDebugMode p_debug_mode){ + +} +void VisualServerRaster::scenario_set_environment(RID p_scenario, RID p_environment){ + +} +RID VisualServerRaster::scenario_get_environment(RID p_scenario, RID p_environment) const{ + + return RID(); +} +void VisualServerRaster::scenario_set_fallback_environment(RID p_scenario, RID p_environment){ + +} + + +/* INSTANCING API */ +// from can be mesh, light, area and portal so far. +RID VisualServerRaster::instance_create(){ + + return RID(); +} + +void VisualServerRaster::instance_set_base(RID p_instance, RID p_base){ + +} +void VisualServerRaster::instance_set_scenario(RID p_instance, RID p_scenario){ + +} +void VisualServerRaster::instance_set_layer_mask(RID p_instance, uint32_t p_mask){ + +} +void VisualServerRaster::instance_set_transform(RID p_instance, const Transform& p_transform){ + +} +void VisualServerRaster::instance_attach_object_instance_ID(RID p_instance,ObjectID p_ID){ + +} +void VisualServerRaster::instance_set_morph_target_weight(RID p_instance,int p_shape, float p_weight){ + +} +void VisualServerRaster::instance_set_surface_material(RID p_instance,int p_surface, RID p_material){ + +} + +void VisualServerRaster::instance_attach_skeleton(RID p_instance,RID p_skeleton){ + +} +void VisualServerRaster::instance_set_exterior( RID p_instance, bool p_enabled ){ + +} +void VisualServerRaster::instance_set_room( RID p_instance, RID p_room ){ + +} + +void VisualServerRaster::instance_set_extra_visibility_margin( RID p_instance, real_t p_margin ){ + +} + +// don't use these in a game! +Vector VisualServerRaster::instances_cull_aabb(const AABB& p_aabb, RID p_scenario) const{ + + return Vector(); +} + +Vector VisualServerRaster::instances_cull_ray(const Vector3& p_from, const Vector3& p_to, RID p_scenario) const{ + + return Vector(); +} +Vector VisualServerRaster::instances_cull_convex(const Vector& p_convex, RID p_scenario) const { + + return Vector(); +} + + +void VisualServerRaster::instance_geometry_set_flag(RID p_instance,InstanceFlags p_flags,bool p_enabled){ + +} +void VisualServerRaster::instance_geometry_set_cast_shadows_setting(RID p_instance, ShadowCastingSetting p_shadow_casting_setting) { + +} +void VisualServerRaster::instance_geometry_set_material_override(RID p_instance, RID p_material){ + +} + + +void VisualServerRaster::instance_geometry_set_draw_range(RID p_instance,float p_min,float p_max,float p_min_margin,float p_max_margin){ + +} +void VisualServerRaster::instance_geometry_set_as_instance_lod(RID p_instance,RID p_as_lod_of_instance){ + +} + +/* CANVAS (2D) */ + +RID VisualServerRaster::canvas_create(){ + + return VSG::canvas->canvas_create(); + +} +void VisualServerRaster::canvas_set_item_mirroring(RID p_canvas,RID p_item,const Point2& p_mirroring){ + + VSG::canvas->canvas_set_item_mirroring(p_canvas,p_item,p_mirroring); +} +void VisualServerRaster::canvas_set_modulate(RID p_canvas,const Color& p_color){ + + VSG::canvas->canvas_set_modulate(p_canvas,p_color); +} + + +RID VisualServerRaster::canvas_item_create(){ + + return VSG::canvas->canvas_item_create(); +} +void VisualServerRaster::canvas_item_set_parent(RID p_item,RID p_parent){ + + VSG::canvas->canvas_item_set_parent(p_item,p_parent); + +} + +void VisualServerRaster::canvas_item_set_visible(RID p_item,bool p_visible){ + + VSG::canvas->canvas_item_set_visible(p_item,p_visible); + +} +void VisualServerRaster::canvas_item_set_light_mask(RID p_item,int p_mask){ + + VSG::canvas->canvas_item_set_light_mask(p_item,p_mask); + +} + +void VisualServerRaster::canvas_item_set_transform(RID p_item, const Matrix32& p_transform){ + + VSG::canvas->canvas_item_set_transform(p_item,p_transform); + +} +void VisualServerRaster::canvas_item_set_clip(RID p_item, bool p_clip){ + + VSG::canvas->canvas_item_set_clip(p_item,p_clip); + +} +void VisualServerRaster::canvas_item_set_distance_field_mode(RID p_item, bool p_enable){ + + VSG::canvas->canvas_item_set_distance_field_mode(p_item,p_enable); + +} +void VisualServerRaster::canvas_item_set_custom_rect(RID p_item, bool p_custom_rect,const Rect2& p_rect){ + + VSG::canvas->canvas_item_set_custom_rect(p_item,p_custom_rect,p_rect); + +} +void VisualServerRaster::canvas_item_set_modulate(RID p_item, const Color& p_color){ + + VSG::canvas->canvas_item_set_modulate(p_item,p_color); + +} +void VisualServerRaster::canvas_item_set_self_modulate(RID p_item, const Color& p_color){ + + VSG::canvas->canvas_item_set_self_modulate(p_item,p_color); + +} + +void VisualServerRaster::canvas_item_set_draw_behind_parent(RID p_item, bool p_enable){ + + VSG::canvas->canvas_item_set_draw_behind_parent(p_item,p_enable); + +} + + +void VisualServerRaster::canvas_item_add_line(RID p_item, const Point2& p_from, const Point2& p_to,const Color& p_color,float p_width,bool p_antialiased){ + + VSG::canvas->canvas_item_add_line(p_item,p_from,p_to,p_color,p_width,p_antialiased); + +} +void VisualServerRaster::canvas_item_add_rect(RID p_item, const Rect2& p_rect, const Color& p_color){ + + VSG::canvas->canvas_item_add_rect(p_item,p_rect,p_color); + +} +void VisualServerRaster::canvas_item_add_circle(RID p_item, const Point2& p_pos, float p_radius,const Color& p_color){ + + VSG::canvas->canvas_item_add_circle(p_item,p_pos,p_radius,p_color); + +} +void VisualServerRaster::canvas_item_add_texture_rect(RID p_item, const Rect2& p_rect, RID p_texture,bool p_tile,const Color& p_modulate,bool p_transpose){ + + VSG::canvas->canvas_item_add_texture_rect(p_item,p_rect,p_texture,p_tile,p_modulate,p_transpose); + +} +void VisualServerRaster::canvas_item_add_texture_rect_region(RID p_item, const Rect2& p_rect, RID p_texture,const Rect2& p_src_rect,const Color& p_modulate,bool p_transpose){ + + VSG::canvas->canvas_item_add_texture_rect_region(p_item,p_rect,p_texture,p_src_rect,p_modulate,p_transpose); + +} +void VisualServerRaster::canvas_item_add_nine_patch(RID p_item, const Rect2& p_rect, const Rect2& p_source, RID p_texture,const Vector2& p_topleft, const Vector2& p_bottomright,NinePatchAxisMode p_x_axis_mode, NinePatchAxisMode p_y_axis_mode,bool p_draw_center,const Color& p_modulate){ + + VSG::canvas->canvas_item_add_nine_patch(p_item,p_rect,p_source,p_texture,p_topleft,p_bottomright,p_x_axis_mode,p_y_axis_mode,p_draw_center,p_modulate); + +} +void VisualServerRaster::canvas_item_add_primitive(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture,float p_width){ + + VSG::canvas->canvas_item_add_primitive(p_item,p_points,p_colors,p_uvs,p_texture,p_width); + +} +void VisualServerRaster::canvas_item_add_polygon(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture){ + + VSG::canvas->canvas_item_add_polygon(p_item,p_points,p_colors,p_uvs,p_texture); + +} +void VisualServerRaster::canvas_item_add_triangle_array(RID p_item, const Vector& p_indices, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture, int p_count){ + + VSG::canvas->canvas_item_add_triangle_array(p_item,p_indices,p_points,p_colors,p_uvs,p_texture,p_count); + +} +void VisualServerRaster::canvas_item_add_mesh(RID p_item, const RID& p_mesh,RID p_skeleton){ + + VSG::canvas->canvas_item_add_mesh(p_item,p_mesh,p_skeleton); + +} +void VisualServerRaster::canvas_item_add_multimesh(RID p_item, RID p_mesh,RID p_skeleton){ + + VSG::canvas->canvas_item_add_multimesh(p_item,p_mesh,p_skeleton); + +} +void VisualServerRaster::canvas_item_add_set_transform(RID p_item,const Matrix32& p_transform){ + + VSG::canvas->canvas_item_add_set_transform(p_item,p_transform); + +} +void VisualServerRaster::canvas_item_add_clip_ignore(RID p_item, bool p_ignore){ + + VSG::canvas->canvas_item_add_clip_ignore(p_item,p_ignore); + +} +void VisualServerRaster::canvas_item_set_sort_children_by_y(RID p_item, bool p_enable){ + + VSG::canvas->canvas_item_set_sort_children_by_y(p_item,p_enable); + +} +void VisualServerRaster::canvas_item_set_z(RID p_item, int p_z){ + + VSG::canvas->canvas_item_set_z(p_item,p_z); + +} +void VisualServerRaster::canvas_item_set_z_as_relative_to_parent(RID p_item, bool p_enable){ + + VSG::canvas->canvas_item_set_z_as_relative_to_parent(p_item,p_enable); + +} +void VisualServerRaster::canvas_item_set_copy_to_backbuffer(RID p_item, bool p_enable,const Rect2& p_rect){ + + VSG::canvas->canvas_item_set_copy_to_backbuffer(p_item,p_enable,p_rect); + +} + +void VisualServerRaster::canvas_item_clear(RID p_item){ + + VSG::canvas->canvas_item_clear(p_item); + +} +void VisualServerRaster::canvas_item_set_draw_index(RID p_item,int p_index){ + + VSG::canvas->canvas_item_set_draw_index(p_item,p_index); + +} + +void VisualServerRaster::canvas_item_set_material(RID p_item, RID p_material){ + + VSG::canvas->canvas_item_set_material(p_item,p_material); + +} + +void VisualServerRaster::canvas_item_set_use_parent_material(RID p_item, bool p_enable) { + + VSG::canvas->canvas_item_set_use_parent_material(p_item,p_enable); +} + +RID VisualServerRaster::canvas_light_create(){ + + return VSG::canvas->canvas_light_create(); +} + +void VisualServerRaster::canvas_light_attach_to_canvas(RID p_light,RID p_canvas){ + + VSG::canvas->canvas_light_attach_to_canvas(p_light,p_canvas); + +} +void VisualServerRaster::canvas_light_set_enabled(RID p_light, bool p_enabled){ + + VSG::canvas->canvas_light_set_enabled(p_light,p_enabled); + +} +void VisualServerRaster::canvas_light_set_scale(RID p_light, float p_scale){ + + VSG::canvas->canvas_light_set_scale(p_light,p_scale); + +} +void VisualServerRaster::canvas_light_set_transform(RID p_light, const Matrix32& p_transform){ + + VSG::canvas->canvas_light_set_transform(p_light,p_transform); + +} +void VisualServerRaster::canvas_light_set_texture(RID p_light, RID p_texture){ + + VSG::canvas->canvas_light_set_texture(p_light,p_texture); + +} +void VisualServerRaster::canvas_light_set_texture_offset(RID p_light, const Vector2& p_offset){ + + VSG::canvas->canvas_light_set_texture_offset(p_light,p_offset); + +} +void VisualServerRaster::canvas_light_set_color(RID p_light, const Color& p_color){ + + VSG::canvas->canvas_light_set_color(p_light,p_color); + +} +void VisualServerRaster::canvas_light_set_height(RID p_light, float p_height){ + + VSG::canvas->canvas_light_set_height(p_light,p_height); + +} +void VisualServerRaster::canvas_light_set_energy(RID p_light, float p_energy){ + + VSG::canvas->canvas_light_set_energy(p_light,p_energy); + +} +void VisualServerRaster::canvas_light_set_z_range(RID p_light, int p_min_z,int p_max_z){ + + VSG::canvas->canvas_light_set_z_range(p_light,p_min_z,p_max_z); + +} +void VisualServerRaster::canvas_light_set_layer_range(RID p_light, int p_min_layer,int p_max_layer){ + + VSG::canvas->canvas_light_set_layer_range(p_light,p_min_layer,p_max_layer); + +} +void VisualServerRaster::canvas_light_set_item_cull_mask(RID p_light, int p_mask){ + + VSG::canvas->canvas_light_set_item_cull_mask(p_light,p_mask); + +} +void VisualServerRaster::canvas_light_set_item_shadow_cull_mask(RID p_light, int p_mask){ + + VSG::canvas->canvas_light_set_item_shadow_cull_mask(p_light,p_mask); + +} + +void VisualServerRaster::canvas_light_set_mode(RID p_light, CanvasLightMode p_mode){ + + VSG::canvas->canvas_light_set_mode(p_light,p_mode); + +} + + +void VisualServerRaster::canvas_light_set_shadow_enabled(RID p_light, bool p_enabled) { + + VSG::canvas->canvas_light_set_shadow_enabled(p_light,p_enabled); + +} +void VisualServerRaster::canvas_light_set_shadow_buffer_size(RID p_light, int p_size) { + + VSG::canvas->canvas_light_set_shadow_buffer_size(p_light,p_size); + +} +void VisualServerRaster::canvas_light_set_shadow_gradient_length(RID p_light, float p_length) { + + VSG::canvas->canvas_light_set_shadow_gradient_length(p_light,p_length); + +} +void VisualServerRaster::canvas_light_set_shadow_filter(RID p_light, CanvasLightShadowFilter p_filter){ + + VSG::canvas->canvas_light_set_shadow_filter(p_light,p_filter); + +} +void VisualServerRaster::canvas_light_set_shadow_color(RID p_light, const Color& p_color){ + + VSG::canvas->canvas_light_set_shadow_color(p_light,p_color); + +} + + + +RID VisualServerRaster::canvas_light_occluder_create() { + + return VSG::canvas->canvas_light_occluder_create(); +} + +void VisualServerRaster::canvas_light_occluder_attach_to_canvas(RID p_occluder,RID p_canvas){ + + VSG::canvas->canvas_light_occluder_attach_to_canvas(p_occluder,p_canvas); + +} +void VisualServerRaster::canvas_light_occluder_set_enabled(RID p_occluder,bool p_enabled) { + + VSG::canvas->canvas_light_occluder_set_enabled(p_occluder,p_enabled); + +} +void VisualServerRaster::canvas_light_occluder_set_polygon(RID p_occluder,RID p_polygon){ + + VSG::canvas->canvas_light_occluder_set_polygon(p_occluder,p_polygon); + +} +void VisualServerRaster::canvas_light_occluder_set_transform(RID p_occluder,const Matrix32& p_xform){ + + VSG::canvas->canvas_light_occluder_set_transform(p_occluder,p_xform); + +} +void VisualServerRaster::canvas_light_occluder_set_light_mask(RID p_occluder,int p_mask){ + + VSG::canvas->canvas_light_occluder_set_light_mask(p_occluder,p_mask); + +} + +RID VisualServerRaster::canvas_occluder_polygon_create() { + + return VSG::canvas->canvas_occluder_polygon_create(); +} +void VisualServerRaster::canvas_occluder_polygon_set_shape(RID p_occluder_polygon,const DVector& p_shape,bool p_closed){ + + VSG::canvas->canvas_occluder_polygon_set_shape(p_occluder_polygon,p_shape,p_closed); +} +void VisualServerRaster::canvas_occluder_polygon_set_shape_as_lines(RID p_occluder_polygon,const DVector& p_shape){ + + VSG::canvas->canvas_occluder_polygon_set_shape_as_lines(p_occluder_polygon,p_shape); + +} + + +void VisualServerRaster::canvas_occluder_polygon_set_cull_mode(RID p_occluder_polygon,CanvasOccluderPolygonCullMode p_mode){ + + VSG::canvas->canvas_occluder_polygon_set_cull_mode(p_occluder_polygon,p_mode); + +} + + +/* CURSOR */ +void VisualServerRaster::cursor_set_rotation(float p_rotation, int p_cursor ){ + +} +void VisualServerRaster::cursor_set_texture(RID p_texture, const Point2 &p_center_offset, int p_cursor, const Rect2 &p_region){ + +} +void VisualServerRaster::cursor_set_visible(bool p_visible, int p_cursor ){ + +} +void VisualServerRaster::cursor_set_pos(const Point2& p_pos, int p_cursor ){ + +} + +/* BLACK BARS */ + + +void VisualServerRaster::black_bars_set_margins(int p_left, int p_top, int p_right, int p_bottom){ + +} +void VisualServerRaster::black_bars_set_images(RID p_left, RID p_top, RID p_right, RID p_bottom){ + +} + + +/* FREE */ + +void VisualServerRaster::free( RID p_rid ){ + + if (VSG::storage->free(p_rid)) + return; + if (VSG::canvas->free(p_rid)) + return; + if (VSG::viewport->free(p_rid)) + return; + +} + +/* EVENT QUEUING */ + +void VisualServerRaster::draw(){ + + //if (changes) + // print_line("changes: "+itos(changes)); + +// changes=0; + VSG::rasterizer->begin_frame(); + VSG::viewport->draw_viewports(); + //_draw_cursors_and_margins(); + VSG::rasterizer->end_frame(); + //draw_extra_frame=VS:rasterizer->needs_to_draw_next_frame(); +} +void VisualServerRaster::sync(){ + +} +bool VisualServerRaster::has_changed() const{ + + return false; +} +void VisualServerRaster::init(){ + + VSG::rasterizer->initialize(); + +} +void VisualServerRaster::finish(){ + + if (test_cube.is_valid()) { + free(test_cube); + } + + VSG::rasterizer->finalize(); +} + +/* STATUS INFORMATION */ + + +int VisualServerRaster::get_render_info(RenderInfo p_info){ + + return 0; +} + + + +/* TESTING */ + + + +void VisualServerRaster::set_boot_image(const Image& p_image, const Color& p_color,bool p_scale){ + +} +void VisualServerRaster::set_default_clear_color(const Color& p_color){ + +} + +bool VisualServerRaster::has_feature(Features p_feature) const { + + return false; +} + +RID VisualServerRaster::get_test_cube() { + if (!test_cube.is_valid()) { + test_cube=_make_test_cube(); + } + return test_cube; +} + +VisualServerRaster::VisualServerRaster() { + + VSG::canvas = memnew( VisualServerCanvas); + VSG::viewport = memnew( VisualServerViewport); + VSG::rasterizer = Rasterizer::create(); + VSG::storage=VSG::rasterizer->get_storage(); + VSG::canvas_render=VSG::rasterizer->get_canvas(); + VSG::scene_render=VSG::rasterizer->get_scene(); + +} + +VisualServerRaster::~VisualServerRaster() { + + memdelete(VSG::canvas); + memdelete(VSG::viewport); + memdelete(VSG::rasterizer); +} + + +#if 0 + BalloonAllocator<> *VisualServerRaster::OctreeAllocator::allocator=NULL; #define VS_CHANGED\ @@ -1146,7 +2418,7 @@ void VisualServerRaster::baked_light_set_octree(RID p_baked_light,const DVector< if (p_octree.size()==0) { if (baked_light->data.octree_texture.is_valid()) rasterizer->free(baked_light->data.octree_texture); - baked_light->data.octree_texture=RID(); + baked_light->data.octree_texture; baked_light->octree_aabb=AABB(); baked_light->octree_tex_size=Size2(); } else { @@ -1203,7 +2475,7 @@ void VisualServerRaster::baked_light_set_octree(RID p_baked_light,const DVector< if (tex_w!=baked_light->octree_tex_size.x || tex_h!=baked_light->octree_tex_size.y) { rasterizer->free(baked_light->data.octree_texture); - baked_light->data.octree_texture=RID(); + baked_light->data.octree_texture; baked_light->octree_tex_size.x=0; baked_light->octree_tex_size.y=0; } @@ -1212,7 +2484,7 @@ void VisualServerRaster::baked_light_set_octree(RID p_baked_light,const DVector< if (baked_light->data.light_texture.is_valid()) { if (!has_light_tex || light_tex_w!=baked_light->light_tex_size.x || light_tex_h!=baked_light->light_tex_size.y) { rasterizer->free(baked_light->data.light_texture); - baked_light->data.light_texture=RID(); + baked_light->data.light_texture; baked_light->light_tex_size.x=0; baked_light->light_tex_size.y=0; } @@ -1220,20 +2492,20 @@ void VisualServerRaster::baked_light_set_octree(RID p_baked_light,const DVector< if (!baked_light->data.octree_texture.is_valid()) { baked_light->data.octree_texture=rasterizer->texture_create(); - rasterizer->texture_allocate(baked_light->data.octree_texture,tex_w,tex_h,Image::FORMAT_RGBA,TEXTURE_FLAG_FILTER); + rasterizer->texture_allocate(baked_light->data.octree_texture,tex_w,tex_h,Image::FORMAT_RGBA8,TEXTURE_FLAG_FILTER); baked_light->octree_tex_size.x=tex_w; baked_light->octree_tex_size.y=tex_h; } if (!baked_light->data.light_texture.is_valid() && has_light_tex) { baked_light->data.light_texture=rasterizer->texture_create(); - rasterizer->texture_allocate(baked_light->data.light_texture,light_tex_w,light_tex_h,Image::FORMAT_RGBA,TEXTURE_FLAG_FILTER); + rasterizer->texture_allocate(baked_light->data.light_texture,light_tex_w,light_tex_h,Image::FORMAT_RGBA8,TEXTURE_FLAG_FILTER); baked_light->light_tex_size.x=light_tex_w; baked_light->light_tex_size.y=light_tex_h; } - Image img(tex_w,tex_h,0,Image::FORMAT_RGBA,p_octree); + Image img(tex_w,tex_h,0,Image::FORMAT_RGBA8,p_octree); rasterizer->texture_set_data(baked_light->data.octree_texture,img); } @@ -1276,7 +2548,7 @@ void VisualServerRaster::baked_light_set_light(RID p_baked_light,const DVectortexture_set_data(baked_light->data.light_texture,img); @@ -1611,8 +2883,8 @@ void VisualServerRaster::viewport_set_as_render_target(RID p_viewport,bool p_ena if (!p_enable) { rasterizer->free(viewport->render_target); - viewport->render_target=RID(); - viewport->render_target_texture=RID(); + viewport->render_target; + viewport->render_target_texture; if (viewport->update_list.in_list()) viewport_update_list.remove(&viewport->update_list); @@ -1811,7 +3083,7 @@ void VisualServerRaster::viewport_attach_camera(RID p_viewport,RID p_camera) { // a camera viewport->camera=p_camera; } else { - viewport->camera=RID(); + viewport->camera; } } @@ -1830,7 +3102,7 @@ void VisualServerRaster::viewport_set_scenario(RID p_viewport,RID p_scenario) { // a camera viewport->scenario=p_scenario; } else { - viewport->scenario=RID(); + viewport->scenario; } } @@ -2284,7 +3556,7 @@ void VisualServerRaster::instance_set_base(RID p_instance, RID p_base) { instance->base_type=INSTANCE_NONE; - instance->base_rid=RID(); + instance->base_rid; if (p_base.is_valid()) { @@ -3003,7 +4275,7 @@ void VisualServerRaster::instance_geometry_set_baked_light_sampler(RID p_instanc if (instance->sampled_light) { instance->sampled_light->baked_light_sampler_info->owned_instances.erase(instance); - instance->data.sampled_light=RID(); + instance->data.sampled_light; } if(p_baked_light_sampler.is_valid()) { @@ -3016,7 +4288,7 @@ void VisualServerRaster::instance_geometry_set_baked_light_sampler(RID p_instanc instance->sampled_light=NULL; } - instance->data.sampled_light=RID(); + instance->data.sampled_light; } @@ -3432,7 +4704,7 @@ void VisualServerRaster::canvas_item_set_parent(RID p_item,RID p_parent) { item_owner->child_items.erase(canvas_item); } - canvas_item->parent=RID(); + canvas_item->parent; } @@ -4057,7 +5329,7 @@ void VisualServerRaster::canvas_light_attach_to_canvas(RID p_light,RID p_canvas) } if (!canvas_owner.owns(p_canvas)) - p_canvas=RID(); + p_canvas; clight->canvas=p_canvas; if (clight->canvas.is_valid()) { @@ -4184,7 +5456,7 @@ void VisualServerRaster::canvas_light_set_shadow_enabled(RID p_light, bool p_ena clight->shadow_buffer=rasterizer->canvas_light_shadow_buffer_create(clight->shadow_buffer_size); } else { rasterizer->free(clight->shadow_buffer); - clight->shadow_buffer=RID(); + clight->shadow_buffer; } @@ -4246,7 +5518,7 @@ void VisualServerRaster::canvas_light_occluder_attach_to_canvas(RID p_occluder,R } if (!canvas_owner.owns(p_canvas)) - p_canvas=RID(); + p_canvas; occluder->canvas=p_canvas; @@ -4279,12 +5551,12 @@ void VisualServerRaster::canvas_light_occluder_set_polygon(RID p_occluder,RID p_ } occluder->polygon=p_polygon; - occluder->polygon_buffer=RID(); + occluder->polygon_buffer; if (occluder->polygon.is_valid()) { CanvasLightOccluderPolygon *occluder_poly = canvas_light_occluder_polygon_owner.get(p_polygon); if (!occluder_poly) - occluder->polygon=RID(); + occluder->polygon; ERR_FAIL_COND(!occluder_poly); occluder_poly->owners.insert(occluder); occluder->polygon_buffer=occluder_poly->occluder; @@ -4558,7 +5830,7 @@ void VisualServerRaster::free( RID p_rid ) { //detach skeletons for (Set::Element *F=E->get().front();F;F=F->next()) { - F->get()->data.skeleton=RID(); + F->get()->data.skeleton; } skeleton_dependency_map.erase(E); } @@ -4688,17 +5960,17 @@ void VisualServerRaster::free( RID p_rid ) { for (int i=0;ichild_items.size();i++) { - canvas->child_items[i].item->parent=RID(); + canvas->child_items[i].item->parent; } for (Set::Element *E=canvas->lights.front();E;E=E->next()) { - E->get()->canvas=RID(); + E->get()->canvas; } for (Set::Element *E=canvas->occluders.front();E;E=E->next()) { - E->get()->canvas=RID(); + E->get()->canvas; } canvas_owner.free( p_rid ); @@ -4726,7 +5998,7 @@ void VisualServerRaster::free( RID p_rid ) { for (int i=0;ichild_items.size();i++) { - canvas_item->child_items[i]->parent=RID(); + canvas_item->child_items[i]->parent; } if (canvas_item->material) { @@ -4798,7 +6070,7 @@ void VisualServerRaster::free( RID p_rid ) { while(occluder_poly->owners.size()) { - occluder_poly->owners.front()->get()->polygon=RID(); + occluder_poly->owners.front()->get()->polygon; occluder_poly->owners.erase( occluder_poly->owners.front() ); } @@ -6417,7 +7689,7 @@ void VisualServerRaster::_process_sampled_light(const Transform& p_camera,Instan for(Set::Element *F=p_sampled_light->baked_light_sampler_info->owned_instances.front();F;F=F->next()) { - F->get()->data.sampled_light=RID(); //do not use because nothing close + F->get()->data.sampled_light; //do not use because nothing close } } @@ -7679,7 +8951,7 @@ void VisualServerRaster::init() { Image img; img.create(default_mouse_cursor_xpm); - //img.convert(Image::FORMAT_RGB); + //img.convert(Image::FORMAT_RGB8); default_cursor_texture = texture_create_from_image(img, 0); aabb_random_points.resize( GLOBAL_DEF("render/aabb_random_points",16) ); @@ -7766,3 +9038,4 @@ VisualServerRaster::VisualServerRaster(Rasterizer *p_rasterizer) { VisualServerRaster::~VisualServerRaster() { } +#endif diff --git a/servers/visual/visual_server_raster.h b/servers/visual/visual_server_raster.h index 496820f4a89..4cad03928b3 100644 --- a/servers/visual/visual_server_raster.h +++ b/servers/visual/visual_server_raster.h @@ -40,6 +40,8 @@ */ + + class VisualServerRaster : public VisualServer { @@ -57,6 +59,12 @@ class VisualServerRaster : public VisualServer { }; + int changes; + bool draw_extra_frame; + RID test_cube; + + + #if 0 struct Room { bool occlude_exterior; @@ -374,97 +382,6 @@ class VisualServerRaster : public VisualServer { mutable RID_Owner canvas_item_material_owner; - struct CanvasItem : public Rasterizer::CanvasItem { - - - RID parent; // canvas it belongs to - List::Element *E; - RID viewport; - int z; - bool z_relative; - bool sort_y; - float opacity; - float self_opacity; - bool use_parent_material; - - - Vector child_items; - - - CanvasItem() { - E=NULL; - z=0; - opacity=1; - self_opacity=1; - sort_y=false; - use_parent_material=false; - z_relative=true; - } - }; - - - struct CanvasItemPtrSort { - - _FORCE_INLINE_ bool operator()(const CanvasItem* p_left,const CanvasItem* p_right) const { - - return p_left->xform.elements[2].y < p_right->xform.elements[2].y; - } - }; - - struct CanvasLightOccluder; - - struct CanvasLightOccluderPolygon { - - bool active; - Rect2 aabb; - CanvasOccluderPolygonCullMode cull_mode; - RID occluder; - Set owners; - - CanvasLightOccluderPolygon() { active=false; cull_mode=CANVAS_OCCLUDER_POLYGON_CULL_DISABLED; } - }; - - - RID_Owner canvas_light_occluder_polygon_owner; - - RID_Owner canvas_light_occluder_owner; - - struct CanvasLight; - - struct Canvas { - - Set viewports; - struct ChildItem { - - Point2 mirror; - CanvasItem *item; - }; - - Set lights; - Set occluders; - - Vector child_items; - Color modulate; - - int find_item(CanvasItem *p_item) { - for(int i=0;i=0) - child_items.remove(idx); - } - - Canvas() { modulate=Color(1,1,1,1); } - - }; - - - RID_Owner canvas_light_owner; struct Viewport { @@ -658,8 +575,14 @@ class VisualServerRaster : public VisualServer { Rasterizer *rasterizer; + +#endif + public: + + /* TEXTURE API */ + virtual RID texture_create(); virtual void texture_allocate(RID p_texture,int p_width, int p_height,Image::Format p_format,uint32_t p_flags=TEXTURE_FLAGS_DEFAULT); virtual void texture_set_data(RID p_texture,const Image& p_image,CubeMapSide p_cube_side=CUBEMAP_LEFT); @@ -670,29 +593,27 @@ public: virtual uint32_t texture_get_width(RID p_texture) const; virtual uint32_t texture_get_height(RID p_texture) const; virtual void texture_set_size_override(RID p_texture,int p_width, int p_height); - virtual bool texture_can_stream(RID p_texture) const; - virtual void texture_set_reload_hook(RID p_texture,ObjectID p_owner,const StringName& p_function) const; + + virtual void texture_set_path(RID p_texture,const String& p_path); virtual String texture_get_path(RID p_texture) const; - virtual void texture_debug_usage(List *r_info); - virtual void texture_set_shrink_all_x2_on_set_data(bool p_enable); + virtual void texture_debug_usage(List *r_info); + /* SHADER API */ - virtual RID shader_create(ShaderMode p_mode=SHADER_MATERIAL); + + virtual RID shader_create(ShaderMode p_mode=SHADER_SPATIAL); virtual void shader_set_mode(RID p_shader,ShaderMode p_mode); virtual ShaderMode shader_get_mode(RID p_shader) const; - virtual void shader_set_code(RID p_shader, const String& p_vertex, const String& p_fragment,const String& p_light,int p_vertex_ofs=0,int p_fragment_ofs=0,int p_light_ofs=0); - virtual String shader_get_vertex_code(RID p_shader) const; - virtual String shader_get_fragment_code(RID p_shader) const; - virtual String shader_get_light_code(RID p_shader) const; - + virtual void shader_set_code(RID p_shader, const String& p_code); + virtual String shader_get_code(RID p_shader) const; virtual void shader_get_param_list(RID p_shader, List *p_param_list) const; virtual void shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture); @@ -709,65 +630,29 @@ public: virtual void material_set_param(RID p_material, const StringName& p_param, const Variant& p_value); virtual Variant material_get_param(RID p_material, const StringName& p_param) const; - virtual void material_set_flag(RID p_material, MaterialFlag p_flag,bool p_enabled); - virtual bool material_get_flag(RID p_material,MaterialFlag p_flag) const; + /* MESH API */ - virtual void material_set_depth_draw_mode(RID p_material, MaterialDepthDrawMode p_mode); - virtual MaterialDepthDrawMode material_get_depth_draw_mode(RID p_material) const; - - virtual void material_set_blend_mode(RID p_material,MaterialBlendMode p_mode); - virtual MaterialBlendMode material_get_blend_mode(RID p_material) const; - - virtual void material_set_line_width(RID p_material,float p_line_width); - virtual float material_get_line_width(RID p_material) const; - - /* FIXED MATERIAL */ - - - virtual RID fixed_material_create(); - - virtual void fixed_material_set_flag(RID p_material, FixedMaterialFlags p_flag, bool p_enabled); - virtual bool fixed_material_get_flag(RID p_material, FixedMaterialFlags p_flag) const; - - virtual void fixed_material_set_param(RID p_material, FixedMaterialParam p_parameter, const Variant& p_value); - virtual Variant fixed_material_get_param(RID p_material,FixedMaterialParam p_parameter) const; - - virtual void fixed_material_set_texture(RID p_material,FixedMaterialParam p_parameter, RID p_texture); - virtual RID fixed_material_get_texture(RID p_material,FixedMaterialParam p_parameter) const; - - virtual void fixed_material_set_texcoord_mode(RID p_material,FixedMaterialParam p_parameter, FixedMaterialTexCoordMode p_mode); - virtual FixedMaterialTexCoordMode fixed_material_get_texcoord_mode(RID p_material,FixedMaterialParam p_parameter) const; - - - virtual void fixed_material_set_uv_transform(RID p_material,const Transform& p_transform); - virtual Transform fixed_material_get_uv_transform(RID p_material) const; - - virtual void fixed_material_set_light_shader(RID p_material,FixedMaterialLightShader p_shader); - virtual FixedMaterialLightShader fixed_material_get_light_shader(RID p_material) const; - - virtual void fixed_material_set_point_size(RID p_material,float p_size); - virtual float fixed_material_get_point_size(RID p_material) const; - - /* SURFACE API */ virtual RID mesh_create(); + virtual void mesh_add_surface(RID p_mesh,uint32_t p_format,PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes=Vector >()); + virtual void mesh_set_morph_target_count(RID p_mesh,int p_amount); virtual int mesh_get_morph_target_count(RID p_mesh) const; + virtual void mesh_set_morph_target_mode(RID p_mesh,MorphTargetMode p_mode); virtual MorphTargetMode mesh_get_morph_target_mode(RID p_mesh) const; - virtual void mesh_add_custom_surface(RID p_mesh,const Variant& p_dat); //this is used by each platform in a different way - - virtual void mesh_add_surface(RID p_mesh,PrimitiveType p_primitive,const Array& p_arrays,const Array& p_blend_shapes=Array(),bool p_alpha_sort=false); - virtual Array mesh_get_surface_arrays(RID p_mesh,int p_surface) const; - virtual Array mesh_get_surface_morph_arrays(RID p_mesh,int p_surface) const; - - virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material,bool p_owned=false); + virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material); virtual RID mesh_surface_get_material(RID p_mesh, int p_surface) const; virtual int mesh_surface_get_array_len(RID p_mesh, int p_surface) const; virtual int mesh_surface_get_array_index_len(RID p_mesh, int p_surface) const; + + virtual DVector mesh_surface_get_array(RID p_mesh, int p_surface) const; + virtual DVector mesh_surface_get_index_array(RID p_mesh, int p_surface) const; + + virtual uint32_t mesh_surface_get_format(RID p_mesh, int p_surface) const; virtual PrimitiveType mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const; @@ -781,199 +666,102 @@ public: /* MULTIMESH API */ + virtual RID multimesh_create(); - virtual void multimesh_set_instance_count(RID p_multimesh,int p_count); + + virtual void multimesh_allocate(RID p_multimesh,int p_instances,MultimeshTransformFormat p_transform_format,MultimeshColorFormat p_color_format,bool p_gen_aabb=true); virtual int multimesh_get_instance_count(RID p_multimesh) const; virtual void multimesh_set_mesh(RID p_multimesh,RID p_mesh); - virtual void multimesh_set_aabb(RID p_multimesh,const AABB& p_aabb); + virtual void multimesh_set_custom_aabb(RID p_multimesh,const AABB& p_aabb); virtual void multimesh_instance_set_transform(RID p_multimesh,int p_index,const Transform& p_transform); + virtual void multimesh_instance_set_transform_2d(RID p_multimesh,int p_index,const Matrix32& p_transform); virtual void multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color); virtual RID multimesh_get_mesh(RID p_multimesh) const; - virtual AABB multimesh_get_aabb(RID p_multimesh,const AABB& p_aabb) const; + virtual AABB multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const; + virtual Transform multimesh_instance_get_transform(RID p_multimesh,int p_index) const; + virtual Matrix32 multimesh_instance_get_transform_2d(RID p_multimesh,int p_index) const; virtual Color multimesh_instance_get_color(RID p_multimesh,int p_index) const; virtual void multimesh_set_visible_instances(RID p_multimesh,int p_visible); virtual int multimesh_get_visible_instances(RID p_multimesh) const; + /* IMMEDIATE API */ virtual RID immediate_create(); virtual void immediate_begin(RID p_immediate,PrimitiveType p_rimitive,RID p_texture=RID()); virtual void immediate_vertex(RID p_immediate,const Vector3& p_vertex); + virtual void immediate_vertex_2d(RID p_immediate,const Vector3& p_vertex); virtual void immediate_normal(RID p_immediate,const Vector3& p_normal); virtual void immediate_tangent(RID p_immediate,const Plane& p_tangent); virtual void immediate_color(RID p_immediate,const Color& p_color); - virtual void immediate_uv(RID p_immediate, const Vector2& p_uv); + virtual void immediate_uv(RID p_immediate,const Vector2& tex_uv); virtual void immediate_uv2(RID p_immediate,const Vector2& tex_uv); virtual void immediate_end(RID p_immediate); virtual void immediate_clear(RID p_immediate); virtual void immediate_set_material(RID p_immediate,RID p_material); virtual RID immediate_get_material(RID p_immediate) const; + /* SKELETON API */ - /* PARTICLES API */ - - virtual RID particles_create(); - - virtual void particles_set_amount(RID p_particles, int p_amount); - virtual int particles_get_amount(RID p_particles) const; - - virtual void particles_set_emitting(RID p_particles, bool p_emitting); - virtual bool particles_is_emitting(RID p_particles) const; - - virtual void particles_set_visibility_aabb(RID p_particles, const AABB& p_visibility); - virtual AABB particles_get_visibility_aabb(RID p_particles) const; - - virtual void particles_set_emission_half_extents(RID p_particles, const Vector3& p_half_extents); - virtual Vector3 particles_get_emission_half_extents(RID p_particles) const; - - virtual void particles_set_emission_base_velocity(RID p_particles, const Vector3& p_base_velocity); - virtual Vector3 particles_get_emission_base_velocity(RID p_particles) const; - - virtual void particles_set_emission_points(RID p_particles, const DVector& p_points); - virtual DVector particles_get_emission_points(RID p_particles) const; - - virtual void particles_set_gravity_normal(RID p_particles, const Vector3& p_normal); - virtual Vector3 particles_get_gravity_normal(RID p_particles) const; - - virtual void particles_set_variable(RID p_particles, ParticleVariable p_variable,float p_value); - virtual float particles_get_variable(RID p_particles, ParticleVariable p_variable) const; - - virtual void particles_set_randomness(RID p_particles, ParticleVariable p_variable,float p_randomness); - virtual float particles_get_randomness(RID p_particles, ParticleVariable p_variable) const; - - virtual void particles_set_color_phase_pos(RID p_particles, int p_phase, float p_pos); - virtual float particles_get_color_phase_pos(RID p_particles, int p_phase) const; - - virtual void particles_set_color_phases(RID p_particles, int p_phases); - virtual int particles_get_color_phases(RID p_particles) const; - - virtual void particles_set_color_phase_color(RID p_particles, int p_phase, const Color& p_color); - virtual Color particles_get_color_phase_color(RID p_particles, int p_phase) const; - - virtual void particles_set_attractors(RID p_particles, int p_attractors); - virtual int particles_get_attractors(RID p_particles) const; - - virtual void particles_set_attractor_pos(RID p_particles, int p_attractor, const Vector3& p_pos); - virtual Vector3 particles_get_attractor_pos(RID p_particles,int p_attractor) const; - - virtual void particles_set_attractor_strength(RID p_particles, int p_attractor, float p_force); - virtual float particles_get_attractor_strength(RID p_particles,int p_attractor) const; - - virtual void particles_set_material(RID p_particles, RID p_material,bool p_owned=false); - virtual RID particles_get_material(RID p_particles) const; - - virtual void particles_set_height_from_velocity(RID p_particles, bool p_enable); - virtual bool particles_has_height_from_velocity(RID p_particles) const; - - virtual void particles_set_use_local_coordinates(RID p_particles, bool p_enable); - virtual bool particles_is_using_local_coordinates(RID p_particles) const; - + virtual RID skeleton_create(); + virtual void skeleton_allocate(RID p_skeleton,int p_bones,bool p_2d_skeleton=false); + virtual int skeleton_get_bone_count(RID p_skeleton) const; + virtual void skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform); + virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone); + virtual void skeleton_bone_set_transform_2d(RID p_skeleton,int p_bone, const Matrix32& p_transform); + virtual Matrix32 skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone); /* Light API */ virtual RID light_create(LightType p_type); - virtual LightType light_get_type(RID p_light) const; - - virtual void light_set_color(RID p_light,LightColor p_type, const Color& p_color); - virtual Color light_get_color(RID p_light,LightColor p_type) const; - + virtual void light_set_color(RID p_light,const Color& p_color); + virtual void light_set_param(RID p_light,LightParam p_param,float p_value); virtual void light_set_shadow(RID p_light,bool p_enabled); - virtual bool light_has_shadow(RID p_light) const; - - virtual void light_set_volumetric(RID p_light,bool p_enabled); - virtual bool light_is_volumetric(RID p_light) const; - virtual void light_set_projector(RID p_light,RID p_texture); - virtual RID light_get_projector(RID p_light) const; + virtual void light_set_attenuation_texure(RID p_light,RID p_texture); + virtual void light_set_negative(RID p_light,bool p_enable); + virtual void light_set_cull_mask(RID p_light,uint32_t p_mask); + virtual void light_set_shader(RID p_light,RID p_shader); - virtual void light_set_param(RID p_light, LightParam p_var, float p_value); - virtual float light_get_param(RID p_light, LightParam p_var) const; - - virtual void light_set_operator(RID p_light,LightOp p_op); - virtual LightOp light_get_operator(RID p_light) const; - - virtual void light_omni_set_shadow_mode(RID p_light,LightOmniShadowMode p_mode); - virtual LightOmniShadowMode light_omni_get_shadow_mode(RID p_light) const; virtual void light_directional_set_shadow_mode(RID p_light,LightDirectionalShadowMode p_mode); - virtual LightDirectionalShadowMode light_directional_get_shadow_mode(RID p_light) const; - virtual void light_directional_set_shadow_param(RID p_light,LightDirectionalShadowParam p_param, float p_value); - virtual float light_directional_get_shadow_param(RID p_light,LightDirectionalShadowParam p_param) const; + /* PROBE API */ - /* SKELETON API */ + virtual RID reflection_probe_create(); + + virtual void reflection_probe_set_intensity(RID p_probe, float p_intensity); + virtual void reflection_probe_set_clip(RID p_probe, float p_near, float p_far); + virtual void reflection_probe_set_min_blend_distance(RID p_probe, float p_distance); + virtual void reflection_probe_set_extents(RID p_probe, const Vector3& p_extents); + virtual void reflection_probe_set_origin_offset(RID p_probe, const Vector3& p_offset); + virtual void reflection_probe_set_enable_parallax_correction(RID p_probe, bool p_enable); + virtual void reflection_probe_set_resolution(RID p_probe, int p_resolution); + virtual void reflection_probe_set_hide_skybox(RID p_probe, bool p_hide); + virtual void reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers); - virtual RID skeleton_create(); - virtual void skeleton_resize(RID p_skeleton,int p_bones); - virtual int skeleton_get_bone_count(RID p_skeleton) const; - virtual void skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform); - virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone); /* ROOM API */ virtual RID room_create(); - virtual void room_set_bounds(RID p_room, const BSP_Tree& p_bounds); - virtual BSP_Tree room_get_bounds(RID p_room) const; + virtual void room_add_bounds(RID p_room, const DVector& p_convex_polygon,float p_height,const Transform& p_transform); + virtual void room_clear_bounds(); /* PORTAL API */ + // portals are only (x/y) points, forming a convex shape, which its clockwise + // order points outside. (z is 0); + virtual RID portal_create(); virtual void portal_set_shape(RID p_portal, const Vector& p_shape); - virtual Vector portal_get_shape(RID p_portal) const; virtual void portal_set_enabled(RID p_portal, bool p_enabled); - virtual bool portal_is_enabled(RID p_portal) const; virtual void portal_set_disable_distance(RID p_portal, float p_distance); - virtual float portal_get_disable_distance(RID p_portal) const; virtual void portal_set_disabled_color(RID p_portal, const Color& p_color); - virtual Color portal_get_disabled_color(RID p_portal) const; - virtual void portal_set_connect_range(RID p_portal, float p_range); - virtual float portal_get_connect_range(RID p_portal) const; - - /* BAKED LIGHT */ - - virtual RID baked_light_create(); - - virtual void baked_light_set_mode(RID p_baked_light,BakedLightMode p_mode); - virtual BakedLightMode baked_light_get_mode(RID p_baked_light) const; - - virtual void baked_light_set_octree(RID p_baked_light,const DVector p_octree); - virtual DVector baked_light_get_octree(RID p_baked_light) const; - - virtual void baked_light_set_light(RID p_baked_light,const DVector p_light); - virtual DVector baked_light_get_light(RID p_baked_light) const; - - virtual void baked_light_set_sampler_octree(RID p_baked_light,const DVector &p_sampler); - virtual DVector baked_light_get_sampler_octree(RID p_baked_light) const; - - virtual void baked_light_set_lightmap_multiplier(RID p_baked_light,float p_multiplier); - virtual float baked_light_get_lightmap_multiplier(RID p_baked_light) const; - - virtual void baked_light_add_lightmap(RID p_baked_light,const RID p_texture,int p_id); - virtual void baked_light_clear_lightmaps(RID p_baked_light); - - virtual void baked_light_set_realtime_color_enabled(RID p_baked_light, const bool p_enabled); - virtual bool baked_light_get_realtime_color_enabled(RID p_baked_light) const; - - virtual void baked_light_set_realtime_color(RID p_baked_light, const Color& p_color); - virtual Color baked_light_get_realtime_color(RID p_baked_light) const; - - virtual void baked_light_set_realtime_energy(RID p_baked_light, const float p_energy); - virtual float baked_light_get_realtime_energy(RID p_baked_light) const; - - /* BAKED LIGHT SAMPLER */ - - virtual RID baked_light_sampler_create(); - - virtual void baked_light_sampler_set_param(RID p_baked_light_sampler,BakedLightSamplerParam p_param,float p_value); - virtual float baked_light_sampler_get_param(RID p_baked_light_sampler,BakedLightSamplerParam p_param) const; - - virtual void baked_light_sampler_set_resolution(RID p_baked_light_sampler,int p_resolution); - virtual int baked_light_sampler_get_resolution(RID p_baked_light_sampler) const; /* CAMERA API */ @@ -981,58 +769,44 @@ public: virtual void camera_set_perspective(RID p_camera,float p_fovy_degrees, float p_z_near, float p_z_far); virtual void camera_set_orthogonal(RID p_camera,float p_size, float p_z_near, float p_z_far); virtual void camera_set_transform(RID p_camera,const Transform& p_transform); - - virtual void camera_set_visible_layers(RID p_camera,uint32_t p_layers); - virtual uint32_t camera_get_visible_layers(RID p_camera) const; - + virtual void camera_set_cull_mask(RID p_camera,uint32_t p_layers); virtual void camera_set_environment(RID p_camera,RID p_env); - virtual RID camera_get_environment(RID p_camera) const; - virtual void camera_set_use_vertical_aspect(RID p_camera,bool p_enable); - virtual bool camera_is_using_vertical_aspect(RID p_camera,bool p_enable) const; - /* VIEWPORT API */ + + /* VIEWPORT TARGET API */ virtual RID viewport_create(); - virtual void viewport_attach_to_screen(RID p_viewport,int p_screen=0); + virtual void viewport_set_size(RID p_viewport,int p_width,int p_height); + + virtual void viewport_set_active(RID p_viewport,bool p_active); + + virtual void viewport_set_clear_mode(RID p_viewport,ViewportClearMode p_clear_mode); + + virtual void viewport_attach_to_screen(RID p_viewport,const Rect2& p_rect=Rect2(),int p_screen=0); virtual void viewport_detach(RID p_viewport); - virtual void viewport_set_as_render_target(RID p_viewport,bool p_enable); - virtual void viewport_set_render_target_update_mode(RID p_viewport,RenderTargetUpdateMode p_mode); - virtual RenderTargetUpdateMode viewport_get_render_target_update_mode(RID p_viewport) const; - virtual RID viewport_get_render_target_texture(RID p_viewport) const; - virtual void viewport_set_render_target_vflip(RID p_viewport,bool p_enable); - virtual bool viewport_get_render_target_vflip(RID p_viewport) const; - virtual void viewport_set_render_target_clear_on_new_frame(RID p_viewport,bool p_enable); - virtual bool viewport_get_render_target_clear_on_new_frame(RID p_viewport) const; - virtual void viewport_render_target_clear(RID p_viewport); - virtual void viewport_set_render_target_to_screen_rect(RID p_viewport,const Rect2& p_rect); + virtual void viewport_set_update_mode(RID p_viewport,ViewportUpdateMode p_mode); + virtual void viewport_set_vflip(RID p_viewport,bool p_enable); - virtual void viewport_queue_screen_capture(RID p_viewport); - virtual Image viewport_get_screen_capture(RID p_viewport) const; - - virtual void viewport_set_rect(RID p_viewport,const ViewportRect& p_rect); - virtual ViewportRect viewport_get_rect(RID p_viewport) const; + virtual RID viewport_get_texture(RID p_viewport) const; + virtual Image viewport_capture(RID p_viewport) const; virtual void viewport_set_hide_scenario(RID p_viewport,bool p_hide); virtual void viewport_set_hide_canvas(RID p_viewport,bool p_hide); virtual void viewport_set_disable_environment(RID p_viewport,bool p_disable); + virtual void viewport_attach_camera(RID p_viewport,RID p_camera); virtual void viewport_set_scenario(RID p_viewport,RID p_scenario); - - virtual RID viewport_get_attached_camera(RID p_viewport) const; - virtual RID viewport_get_scenario(RID p_viewport) const; virtual void viewport_attach_canvas(RID p_viewport,RID p_canvas); virtual void viewport_remove_canvas(RID p_viewport,RID p_canvas); virtual void viewport_set_canvas_transform(RID p_viewport,RID p_canvas,const Matrix32& p_offset); - virtual Matrix32 viewport_get_canvas_transform(RID p_viewport,RID p_canvas) const; - virtual void viewport_set_global_canvas_transform(RID p_viewport,const Matrix32& p_transform); - virtual Matrix32 viewport_get_global_canvas_transform(RID p_viewport) const; - virtual void viewport_set_canvas_layer(RID p_viewport,RID p_canvas,int p_layer); virtual void viewport_set_transparent_background(RID p_viewport,bool p_enabled); - virtual bool viewport_has_transparent_background(RID p_viewport) const; + + virtual void viewport_set_global_canvas_transform(RID p_viewport,const Matrix32& p_transform); + virtual void viewport_set_canvas_layer(RID p_viewport,RID p_canvas,int p_layer); /* ENVIRONMENT API */ @@ -1040,21 +814,24 @@ public: virtual RID environment_create(); virtual void environment_set_background(RID p_env,EnvironmentBG p_bg); - virtual EnvironmentBG environment_get_background(RID p_env) const; + virtual void environment_set_skybox(RID p_env,RID p_skybox,float p_energy=1.0); + virtual void environment_set_bg_color(RID p_env,const Color& p_color); + virtual void environment_set_canvas_max_layer(RID p_env,int p_max_layer); + virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0); - virtual void environment_set_background_param(RID p_env,EnvironmentBGParam p_param, const Variant& p_value); - virtual Variant environment_get_background_param(RID p_env,EnvironmentBGParam p_param) const; + virtual void environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,EnvironmentGlowBlendMode p_blend_mode); + virtual void environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture); - virtual void environment_set_enable_fx(RID p_env,EnvironmentFx p_effect,bool p_enabled); - virtual bool environment_is_fx_enabled(RID p_env,EnvironmentFx p_effect) const; - - - virtual void environment_fx_set_param(RID p_env,EnvironmentFxParam p_effect,const Variant& p_param); - virtual Variant environment_fx_get_param(RID p_env,EnvironmentFxParam p_effect) const; + virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,EnvironmentToneMapper p_tone_mapper); + virtual void environment_set_brightness(RID p_env,bool p_enable,float p_brightness); + virtual void environment_set_contrast(RID p_env,bool p_enable,float p_contrast); + virtual void environment_set_saturation(RID p_env,bool p_enable,float p_saturation); + virtual void environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp); /* SCENARIO API */ + virtual RID scenario_create(); virtual void scenario_set_debug(RID p_scenario,ScenarioDebugMode p_debug_mode); @@ -1063,137 +840,91 @@ public: virtual void scenario_set_fallback_environment(RID p_scenario, RID p_environment); - /* INSTANCING API */ + // from can be mesh, light, area and portal so far. + virtual RID instance_create(); // from can be mesh, light, poly, area and portal so far. - virtual RID instance_create(); - - virtual void instance_set_base(RID p_instance, RID p_base); - virtual RID instance_get_base(RID p_instance) const; - - virtual void instance_set_scenario(RID p_instance, RID p_scenario); - virtual RID instance_get_scenario(RID p_instance) const; - + virtual void instance_set_base(RID p_instance, RID p_base); // from can be mesh, light, poly, area and portal so far. + virtual void instance_set_scenario(RID p_instance, RID p_scenario); // from can be mesh, light, poly, area and portal so far. virtual void instance_set_layer_mask(RID p_instance, uint32_t p_mask); - virtual uint32_t instance_get_layer_mask(RID p_instance) const; - - virtual AABB instance_get_base_aabb(RID p_instance) const; - - virtual void instance_attach_object_instance_ID(RID p_instance,uint32_t p_ID); - virtual uint32_t instance_get_object_instance_ID(RID p_instance) const; - - virtual void instance_attach_skeleton(RID p_instance,RID p_skeleton); - virtual RID instance_get_skeleton(RID p_instance) const; - + virtual void instance_set_transform(RID p_instance, const Transform& p_transform); + virtual void instance_attach_object_instance_ID(RID p_instance,ObjectID p_ID); virtual void instance_set_morph_target_weight(RID p_instance,int p_shape, float p_weight); - virtual float instance_get_morph_target_weight(RID p_instance,int p_shape) const; - virtual void instance_set_surface_material(RID p_instance,int p_surface, RID p_material); - - virtual void instance_set_transform(RID p_instance, const Transform& p_transform); - virtual Transform instance_get_transform(RID p_instance) const; - + virtual void instance_attach_skeleton(RID p_instance,RID p_skeleton); virtual void instance_set_exterior( RID p_instance, bool p_enabled ); - virtual bool instance_is_exterior( RID p_instance) const; - virtual void instance_set_room( RID p_instance, RID p_room ); - virtual RID instance_get_room( RID p_instance ) const ; virtual void instance_set_extra_visibility_margin( RID p_instance, real_t p_margin ); - virtual real_t instance_get_extra_visibility_margin( RID p_instance ) const; - virtual Vector instances_cull_aabb(const AABB& p_aabb, RID p_scenario=RID()) const; - virtual Vector instances_cull_ray(const Vector3& p_from, const Vector3& p_to, RID p_scenario=RID()) const; - virtual Vector instances_cull_convex(const Vector& p_convex, RID p_scenario=RID()) const; + // don't use these in a game! + virtual Vector instances_cull_aabb(const AABB& p_aabb, RID p_scenario=RID()) const; + virtual Vector instances_cull_ray(const Vector3& p_from, const Vector3& p_to, RID p_scenario=RID()) const; + virtual Vector instances_cull_convex(const Vector& p_convex, RID p_scenario=RID()) const; + virtual void instance_geometry_set_flag(RID p_instance,InstanceFlags p_flags,bool p_enabled); - virtual bool instance_geometry_get_flag(RID p_instance,InstanceFlags p_flags) const; - - virtual void instance_geometry_set_cast_shadows_setting(RID p_instance, VS::ShadowCastingSetting p_shadow_casting_setting); - virtual VS::ShadowCastingSetting instance_geometry_get_cast_shadows_setting(RID p_instance) const; - + virtual void instance_geometry_set_cast_shadows_setting(RID p_instance, ShadowCastingSetting p_shadow_casting_setting); virtual void instance_geometry_set_material_override(RID p_instance, RID p_material); - virtual RID instance_geometry_get_material_override(RID p_instance) const; - virtual void instance_geometry_set_draw_range(RID p_instance,float p_min,float p_max); - virtual float instance_geometry_get_draw_range_max(RID p_instance) const; - virtual float instance_geometry_get_draw_range_min(RID p_instance) const; - virtual void instance_geometry_set_baked_light(RID p_instance,RID p_baked_light); - virtual RID instance_geometry_get_baked_light(RID p_instance) const; - - virtual void instance_geometry_set_baked_light_sampler(RID p_instance,RID p_baked_light_sampler); - virtual RID instance_geometry_get_baked_light_sampler(RID p_instance) const; - - virtual void instance_geometry_set_baked_light_texture_index(RID p_instance,int p_tex_id); - virtual int instance_geometry_get_baked_light_texture_index(RID p_instance) const; - - virtual void instance_light_set_enabled(RID p_instance,bool p_enabled); - virtual bool instance_light_is_enabled(RID p_instance) const; + virtual void instance_geometry_set_draw_range(RID p_instance,float p_min,float p_max,float p_min_margin,float p_max_margin); + virtual void instance_geometry_set_as_instance_lod(RID p_instance,RID p_as_lod_of_instance); /* CANVAS (2D) */ virtual RID canvas_create(); virtual void canvas_set_item_mirroring(RID p_canvas,RID p_item,const Point2& p_mirroring); - virtual Point2 canvas_get_item_mirroring(RID p_canvas,RID p_item) const; virtual void canvas_set_modulate(RID p_canvas,const Color& p_color); virtual RID canvas_item_create(); - - virtual void canvas_item_set_parent(RID p_item,RID p_parent_item); - virtual RID canvas_item_get_parent(RID p_canvas_item) const; + virtual void canvas_item_set_parent(RID p_item,RID p_parent); virtual void canvas_item_set_visible(RID p_item,bool p_visible); - virtual bool canvas_item_is_visible(RID p_item) const; + virtual void canvas_item_set_light_mask(RID p_item,int p_mask); - virtual void canvas_item_set_blend_mode(RID p_canvas_item,MaterialBlendMode p_blend); - virtual void canvas_item_set_light_mask(RID p_canvas_item,int p_mask); - - - - //virtual void canvas_item_set_rect(RID p_item, const Rect2& p_rect); virtual void canvas_item_set_transform(RID p_item, const Matrix32& p_transform); virtual void canvas_item_set_clip(RID p_item, bool p_clip); virtual void canvas_item_set_distance_field_mode(RID p_item, bool p_enable); virtual void canvas_item_set_custom_rect(RID p_item, bool p_custom_rect,const Rect2& p_rect=Rect2()); - virtual void canvas_item_set_opacity(RID p_item, float p_opacity); - virtual float canvas_item_get_opacity(RID p_item, float p_opacity) const; - virtual void canvas_item_set_on_top(RID p_item, bool p_on_top); - virtual bool canvas_item_is_on_top(RID p_item) const; + virtual void canvas_item_set_modulate(RID p_item, const Color& p_color); + virtual void canvas_item_set_self_modulate(RID p_item, const Color& p_color); - virtual void canvas_item_set_self_opacity(RID p_item, float p_self_opacity); - virtual float canvas_item_get_self_opacity(RID p_item, float p_self_opacity) const; + virtual void canvas_item_set_draw_behind_parent(RID p_item, bool p_enable); - virtual void canvas_item_attach_viewport(RID p_item, RID p_viewport); - virtual void canvas_item_add_line(RID p_item, const Point2& p_from, const Point2& p_to, const Color& p_color, float p_width=1.0, bool p_antialiased=false); + virtual void canvas_item_add_line(RID p_item, const Point2& p_from, const Point2& p_to,const Color& p_color,float p_width=1.0,bool p_antialiased=false); virtual void canvas_item_add_rect(RID p_item, const Rect2& p_rect, const Color& p_color); virtual void canvas_item_add_circle(RID p_item, const Point2& p_pos, float p_radius,const Color& p_color); virtual void canvas_item_add_texture_rect(RID p_item, const Rect2& p_rect, RID p_texture,bool p_tile=false,const Color& p_modulate=Color(1,1,1),bool p_transpose=false); virtual void canvas_item_add_texture_rect_region(RID p_item, const Rect2& p_rect, RID p_texture,const Rect2& p_src_rect,const Color& p_modulate=Color(1,1,1),bool p_transpose=false); - virtual void canvas_item_add_style_box(RID p_item, const Rect2& p_rect, const Rect2& p_source, RID p_texture,const Vector2& p_topleft, const Vector2& p_bottomright, bool p_draw_center=true,const Color& p_modulate=Color(1,1,1)); + virtual void canvas_item_add_nine_patch(RID p_item, const Rect2& p_rect, const Rect2& p_source, RID p_texture,const Vector2& p_topleft, const Vector2& p_bottomright,NinePatchAxisMode p_x_axis_mode=NINE_PATCH_STRETCH, NinePatchAxisMode p_y_axis_mode=NINE_PATCH_STRETCH,bool p_draw_center=true,const Color& p_modulate=Color(1,1,1)); virtual void canvas_item_add_primitive(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture,float p_width=1.0); virtual void canvas_item_add_polygon(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs=Vector(), RID p_texture=RID()); virtual void canvas_item_add_triangle_array(RID p_item, const Vector& p_indices, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs=Vector(), RID p_texture=RID(), int p_count=-1); - virtual void canvas_item_add_triangle_array_ptr(RID p_item, int p_count, const int* p_indices, const Point2* p_points, const Color* p_colors,const Point2* p_uvs=NULL, RID p_texture=RID()); + virtual void canvas_item_add_mesh(RID p_item, const RID& p_mesh,RID p_skeleton=RID()); + virtual void canvas_item_add_multimesh(RID p_item, RID p_mesh,RID p_skeleton=RID()); virtual void canvas_item_add_set_transform(RID p_item,const Matrix32& p_transform); - virtual void canvas_item_add_set_blend_mode(RID p_item, MaterialBlendMode p_blend); virtual void canvas_item_add_clip_ignore(RID p_item, bool p_ignore); virtual void canvas_item_set_sort_children_by_y(RID p_item, bool p_enable); virtual void canvas_item_set_z(RID p_item, int p_z); virtual void canvas_item_set_z_as_relative_to_parent(RID p_item, bool p_enable); virtual void canvas_item_set_copy_to_backbuffer(RID p_item, bool p_enable,const Rect2& p_rect); + virtual void canvas_item_clear(RID p_item); + virtual void canvas_item_set_draw_index(RID p_item,int p_index); + virtual void canvas_item_set_material(RID p_item, RID p_material); + virtual void canvas_item_set_use_parent_material(RID p_item, bool p_enable); virtual RID canvas_light_create(); virtual void canvas_light_attach_to_canvas(RID p_light,RID p_canvas); virtual void canvas_light_set_enabled(RID p_light, bool p_enabled); - virtual void canvas_light_set_transform(RID p_light, const Matrix32& p_transform); virtual void canvas_light_set_scale(RID p_light, float p_scale); + virtual void canvas_light_set_transform(RID p_light, const Matrix32& p_transform); virtual void canvas_light_set_texture(RID p_light, RID p_texture); virtual void canvas_light_set_texture_offset(RID p_light, const Vector2& p_offset); virtual void canvas_light_set_color(RID p_light, const Color& p_color); @@ -1201,13 +932,16 @@ public: virtual void canvas_light_set_energy(RID p_light, float p_energy); virtual void canvas_light_set_z_range(RID p_light, int p_min_z,int p_max_z); virtual void canvas_light_set_layer_range(RID p_light, int p_min_layer,int p_max_layer); - virtual void canvas_light_set_item_mask(RID p_light, int p_mask); - virtual void canvas_light_set_item_shadow_mask(RID p_light, int p_mask); + virtual void canvas_light_set_item_cull_mask(RID p_light, int p_mask); + virtual void canvas_light_set_item_shadow_cull_mask(RID p_light, int p_mask); virtual void canvas_light_set_mode(RID p_light, CanvasLightMode p_mode); + + virtual void canvas_light_set_shadow_enabled(RID p_light, bool p_enabled); virtual void canvas_light_set_shadow_buffer_size(RID p_light, int p_size); - virtual void canvas_light_set_shadow_esm_multiplier(RID p_light, float p_multiplier); + virtual void canvas_light_set_shadow_gradient_length(RID p_light, float p_length); + virtual void canvas_light_set_shadow_filter(RID p_light, CanvasLightShadowFilter p_filter); virtual void canvas_light_set_shadow_color(RID p_light, const Color& p_color); @@ -1219,72 +953,56 @@ public: virtual void canvas_light_occluder_set_transform(RID p_occluder,const Matrix32& p_xform); virtual void canvas_light_occluder_set_light_mask(RID p_occluder,int p_mask); - virtual RID canvas_occluder_polygon_create(); - virtual void canvas_occluder_polygon_set_shape(RID p_occluder_polygon,const DVector& p_shape,bool p_close); + virtual void canvas_occluder_polygon_set_shape(RID p_occluder_polygon,const DVector& p_shape,bool p_closed); virtual void canvas_occluder_polygon_set_shape_as_lines(RID p_occluder_polygon,const DVector& p_shape); + + virtual void canvas_occluder_polygon_set_cull_mode(RID p_occluder_polygon,CanvasOccluderPolygonCullMode p_mode); - - virtual void canvas_item_clear(RID p_item); - virtual void canvas_item_raise(RID p_item); - - /* CANVAS ITEM MATERIAL */ - - virtual RID canvas_item_material_create(); - virtual void canvas_item_material_set_shader(RID p_material, RID p_shader); - virtual void canvas_item_material_set_shader_param(RID p_material, const StringName& p_param, const Variant& p_value); - virtual Variant canvas_item_material_get_shader_param(RID p_material, const StringName& p_param) const; - virtual void canvas_item_material_set_shading_mode(RID p_material, CanvasItemShadingMode p_mode); - - - /* CURSOR */ virtual void cursor_set_rotation(float p_rotation, int p_cursor = 0); // radians - virtual void cursor_set_texture(RID p_texture, const Point2 &p_center_offset, int p_cursor=0, const Rect2 &p_region=Rect2()); + virtual void cursor_set_texture(RID p_texture, const Point2 &p_center_offset = Point2(0, 0), int p_cursor=0, const Rect2 &p_region=Rect2()); virtual void cursor_set_visible(bool p_visible, int p_cursor = 0); virtual void cursor_set_pos(const Point2& p_pos, int p_cursor = 0); /* BLACK BARS */ + virtual void black_bars_set_margins(int p_left, int p_top, int p_right, int p_bottom); virtual void black_bars_set_images(RID p_left, RID p_top, RID p_right, RID p_bottom); + /* FREE */ - virtual void free( RID p_rid ); - - /* CUSTOM SHADE MODEL */ - - virtual void custom_shade_model_set_shader(int p_model, RID p_shader); - virtual RID custom_shade_model_get_shader(int p_model) const; - virtual void custom_shade_model_set_name(int p_model, const String& p_name); - virtual String custom_shade_model_get_name(int p_model) const; - virtual void custom_shade_model_set_param_info(int p_model, const List& p_info); - virtual void custom_shade_model_get_param_info(int p_model, List* p_info) const; + virtual void free( RID p_rid ); ///< free RIDs associated with the visual server /* EVENT QUEUING */ virtual void draw(); virtual void sync(); - + virtual bool has_changed() const; virtual void init(); virtual void finish(); - virtual bool has_changed() const; + /* STATUS INFORMATION */ - /* RENDER INFO */ virtual int get_render_info(RenderInfo p_info); - virtual bool has_feature(Features p_feature) const; - RID get_test_cube(); + virtual RID get_test_cube(); - virtual void set_boot_image(const Image& p_image, const Color& p_color, bool p_scale); + + /* TESTING */ + + virtual void set_boot_image(const Image& p_image, const Color& p_color,bool p_scale); virtual void set_default_clear_color(const Color& p_color); - VisualServerRaster(Rasterizer *p_rasterizer); + virtual bool has_feature(Features p_feature) const; + + + VisualServerRaster(); ~VisualServerRaster(); }; diff --git a/servers/visual/visual_server_viewport.cpp b/servers/visual/visual_server_viewport.cpp new file mode 100644 index 00000000000..b30a57a5b84 --- /dev/null +++ b/servers/visual/visual_server_viewport.cpp @@ -0,0 +1,489 @@ +#include "visual_server_viewport.h" +#include "visual_server_global.h" +#include "visual_server_canvas.h" + +void VisualServerViewport::_draw_viewport(Viewport *p_viewport) { + + /* Camera should always be BEFORE any other 3D */ +#if 0 + bool scenario_draw_canvas_bg=false; + int scenario_canvas_max_layer=0; + + if (!p_viewport->hide_canvas && !p_viewport->disable_environment && scenario_owner.owns(p_viewport->scenario)) { + + Scenario *scenario=scenario_owner.get(p_viewport->scenario); + if (scenario->environment.is_valid()) { + if (rasterizer->is_environment(scenario->environment)) { + scenario_draw_canvas_bg=rasterizer->environment_get_background(scenario->environment)==VS::ENV_BG_CANVAS; + scenario_canvas_max_layer=rasterizer->environment_get_background_param(scenario->environment,VS::ENV_BG_PARAM_CANVAS_MAX_LAYER); + } + } + } + + bool can_draw_3d=!p_viewport->hide_scenario && camera_owner.owns(p_viewport->camera) && scenario_owner.owns(p_viewport->scenario); + + + if (scenario_draw_canvas_bg) { + + rasterizer->begin_canvas_bg(); + } + + if (!scenario_draw_canvas_bg && can_draw_3d) { + + _draw_viewport_camera(p_viewport,false); + + } else if (true /*|| !p_viewport->canvas_list.empty()*/){ + + //clear the viewport black because of no camera? i seriously should.. + if (p_viewport->render_target_clear_on_new_frame || p_viewport->render_target_clear) { + if (p_viewport->transparent_bg) { + rasterizer->clear_viewport(Color(0,0,0,0)); + } + else { + Color cc=clear_color; + if (scenario_draw_canvas_bg) + cc.a=0; + rasterizer->clear_viewport(cc); + } + p_viewport->render_target_clear=false; + } + } +#endif + + VSG::rasterizer->clear_render_target(Color(0.5,0.5,0.5,1.0)); + + if (!p_viewport->hide_canvas) { + int i=0; + + Map canvas_map; + + Rect2 clip_rect(0,0,p_viewport->size.x,p_viewport->size.y); + RasterizerCanvas::Light *lights=NULL; + RasterizerCanvas::Light *lights_with_shadow=NULL; + RasterizerCanvas::Light *lights_with_mask=NULL; + Rect2 shadow_rect; + + int light_count=0; + + for (Map::Element *E=p_viewport->canvas_map.front();E;E=E->next()) { + + Matrix32 xf = p_viewport->global_transform * E->get().transform; + + VisualServerCanvas::Canvas *canvas = static_cast(E->get().canvas); + + //find lights in canvas + + + for(Set::Element *F=canvas->lights.front();F;F=F->next()) { + + + RasterizerCanvas::Light* cl=F->get(); + if (cl->enabled && cl->texture.is_valid()) { + //not super efficient.. + Size2 tsize(VSG::storage->texture_get_width(cl->texture),VSG::storage->texture_get_height(cl->texture)); + tsize*=cl->scale; + + Vector2 offset=tsize/2.0; + cl->rect_cache=Rect2(-offset+cl->texture_offset,tsize); + cl->xform_cache=xf * cl->xform; + + + if (clip_rect.intersects_transformed(cl->xform_cache,cl->rect_cache)) { + + cl->filter_next_ptr=lights; + lights=cl; + cl->texture_cache=NULL; + Matrix32 scale; + scale.scale(cl->rect_cache.size); + scale.elements[2]=cl->rect_cache.pos; + cl->light_shader_xform = (cl->xform_cache * scale).affine_inverse(); + cl->light_shader_pos=cl->xform_cache[2]; + if (cl->shadow_buffer.is_valid()) { + + cl->shadows_next_ptr=lights_with_shadow; + if (lights_with_shadow==NULL) { + shadow_rect = cl->xform_cache.xform(cl->rect_cache); + } else { + shadow_rect=shadow_rect.merge( cl->xform_cache.xform(cl->rect_cache) ); + } + lights_with_shadow=cl; + cl->radius_cache=cl->rect_cache.size.length(); + + } + if (cl->mode==VS::CANVAS_LIGHT_MODE_MASK) { + cl->mask_next_ptr=lights_with_mask; + lights_with_mask=cl; + } + + light_count++; + } + + VSG::canvas_render->light_internal_update(cl->light_internal,cl); + + } + } + + //print_line("lights: "+itos(light_count)); + canvas_map[ Viewport::CanvasKey( E->key(), E->get().layer) ]=&E->get(); + + } + + if (lights_with_shadow) { + //update shadows if any + + RasterizerCanvas::LightOccluderInstance * occluders=NULL; + + //make list of occluders + for (Map::Element *E=p_viewport->canvas_map.front();E;E=E->next()) { + + VisualServerCanvas::Canvas *canvas = static_cast(E->get().canvas); + Matrix32 xf = p_viewport->global_transform * E->get().transform; + + + for(Set::Element *F=canvas->occluders.front();F;F=F->next()) { + + if (!F->get()->enabled) + continue; + F->get()->xform_cache = xf * F->get()->xform; + if (shadow_rect.intersects_transformed(F->get()->xform_cache,F->get()->aabb_cache)) { + + F->get()->next=occluders; + occluders=F->get(); + + } + } + } + //update the light shadowmaps with them + RasterizerCanvas::Light *light=lights_with_shadow; + while(light) { + + VSG::canvas_render->canvas_light_shadow_buffer_update(light->shadow_buffer,light->xform_cache.affine_inverse(),light->item_mask,light->radius_cache/1000.0,light->radius_cache*1.1,occluders,&light->shadow_matrix_cache); + light=light->shadows_next_ptr; + } + + VSG::rasterizer->restore_render_target(); + // VSG::canvas_render->reset_canvas(); + } + + + +#if 0 + if (scenario_draw_canvas_bg && canvas_map.front() && canvas_map.front()->key().layer>scenario_canvas_max_layer) { + + _draw_viewport_camera(p_viewport,!can_draw_3d); + scenario_draw_canvas_bg=false; + + } +#endif + for (Map::Element *E=canvas_map.front();E;E=E->next()) { + + VisualServerCanvas::Canvas *canvas = static_cast(E->get()->canvas); + + // print_line("canvas "+itos(i)+" size: "+itos(I->get()->canvas->child_items.size())); + //print_line("GT "+p_viewport->global_transform+". CT: "+E->get()->transform); + Matrix32 xform = p_viewport->global_transform * E->get()->transform; + + RasterizerCanvas::Light *canvas_lights=NULL; + + RasterizerCanvas::Light *ptr=lights; + while(ptr) { + if (E->get()->layer>=ptr->layer_min && E->get()->layer<=ptr->layer_max) { + ptr->next_ptr=canvas_lights; + canvas_lights=ptr; + } + ptr=ptr->filter_next_ptr; + } + + VSG::canvas->render_canvas( canvas,xform,canvas_lights,lights_with_mask,clip_rect ); + i++; +#if 0 + if (scenario_draw_canvas_bg && E->key().layer>=scenario_canvas_max_layer) { + _draw_viewport_camera(p_viewport,!can_draw_3d); + scenario_draw_canvas_bg=false; + } +#endif + + + } +#if 0 + if (scenario_draw_canvas_bg) { + _draw_viewport_camera(p_viewport,!can_draw_3d); + scenario_draw_canvas_bg=false; + } +#endif + + //VSG::canvas_render->canvas_debug_viewport_shadows(lights_with_shadow); + } + + + +} + +void VisualServerViewport::draw_viewports() { + + //sort viewports + + + //draw viewports + + + for(int i=0;iupdate_mode==VS::VIEWPORT_UPDATE_DISABLED) + continue; + + ERR_CONTINUE( !vp->render_target.is_valid() ); + + VSG::rasterizer->set_current_render_target(vp->render_target); + _draw_viewport(vp); + + if (vp->viewport_to_screen_rect!=Rect2()) { + //copy to screen if set as such + VSG::rasterizer->set_current_render_target(RID()); + VSG::rasterizer->blit_render_target_to_screen(vp->render_target,vp->viewport_to_screen_rect,vp->viewport_to_screen); + } + + if (vp->update_mode==VS::VIEWPORT_UPDATE_ONCE) { + vp->update_mode=VS::VIEWPORT_UPDATE_DISABLED; + } + } +} + + +RID VisualServerViewport::viewport_create() { + + Viewport * viewport = memnew( Viewport ); + + RID rid = viewport_owner.make_rid(viewport); + + viewport->self=rid; + viewport->hide_scenario=false; + viewport->hide_canvas=false; + viewport->render_target=VSG::storage->render_target_create(); + + return rid; + +} + +void VisualServerViewport::viewport_set_size(RID p_viewport,int p_width,int p_height){ + + ERR_FAIL_COND(p_width<0 && p_height<0); + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + + + viewport->size=Size2(p_width,p_height); + VSG::storage->render_target_set_size(viewport->render_target,p_width,p_height); + + +} + +void VisualServerViewport::viewport_set_active(RID p_viewport,bool p_active) { + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + if (p_active) { + ERR_FAIL_COND(active_viewports.find(viewport)!=-1);//already active + active_viewports.push_back(viewport); + } else { + active_viewports.erase(viewport); + } + + +} + +void VisualServerViewport::viewport_set_clear_mode(RID p_viewport,VS::ViewportClearMode p_clear_mode) { + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + viewport->clear_mode=p_clear_mode; + +} + + +void VisualServerViewport::viewport_attach_to_screen(RID p_viewport,const Rect2& p_rect,int p_screen){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + viewport->viewport_to_screen_rect=p_rect; + viewport->viewport_to_screen=p_screen; +} +void VisualServerViewport::viewport_detach(RID p_viewport){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + viewport->viewport_to_screen_rect=Rect2(); + viewport->viewport_to_screen=0; + +} + +void VisualServerViewport::viewport_set_update_mode(RID p_viewport,VS::ViewportUpdateMode p_mode){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + viewport->update_mode=p_mode; + +} +void VisualServerViewport::viewport_set_vflip(RID p_viewport,bool p_enable){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + VSG::storage->render_target_set_flag(viewport->render_target,RasterizerStorage::RENDER_TARGET_VFLIP,p_enable); + +} + +RID VisualServerViewport::viewport_get_texture(RID p_viewport) const{ + + const Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND_V(!viewport,RID()); + + return VSG::storage->render_target_get_texture(viewport->render_target); + +} +Image VisualServerViewport::viewport_capture(RID p_viewport) const{ + + const Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND_V(!viewport,Image()); + return VSG::storage->render_target_get_image(viewport->render_target); + +} + +void VisualServerViewport::viewport_set_hide_scenario(RID p_viewport,bool p_hide){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + viewport->hide_scenario=p_hide; +} +void VisualServerViewport::viewport_set_hide_canvas(RID p_viewport,bool p_hide){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + viewport->hide_canvas=p_hide; +} +void VisualServerViewport::viewport_set_disable_environment(RID p_viewport,bool p_disable){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + + viewport->disable_environment=p_disable; +} + +void VisualServerViewport::viewport_attach_camera(RID p_viewport,RID p_camera){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + viewport->camera=p_camera; +} +void VisualServerViewport::viewport_set_scenario(RID p_viewport,RID p_scenario){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + viewport->scenario=p_scenario; +} +void VisualServerViewport::viewport_attach_canvas(RID p_viewport,RID p_canvas){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + ERR_FAIL_COND(viewport->canvas_map.has(p_canvas)); + VisualServerCanvas::Canvas *canvas = VSG::canvas->canvas_owner.getornull(p_canvas); + ERR_FAIL_COND(!canvas); + + canvas->viewports.insert(p_viewport); + viewport->canvas_map[p_canvas]=Viewport::CanvasData(); + viewport->canvas_map[p_canvas].layer=0; + viewport->canvas_map[p_canvas].canvas=canvas; + +} + +void VisualServerViewport::viewport_remove_canvas(RID p_viewport,RID p_canvas){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + VisualServerCanvas::Canvas *canvas = VSG::canvas->canvas_owner.getornull(p_canvas); + ERR_FAIL_COND(!canvas); + + viewport->canvas_map.erase(p_canvas); + canvas->viewports.erase(p_viewport); + +} +void VisualServerViewport::viewport_set_canvas_transform(RID p_viewport,RID p_canvas,const Matrix32& p_offset){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + ERR_FAIL_COND(!viewport->canvas_map.has(p_canvas)); + viewport->canvas_map[p_canvas].transform=p_offset; + +} +void VisualServerViewport::viewport_set_transparent_background(RID p_viewport,bool p_enabled){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + VSG::storage->render_target_set_flag(viewport->render_target,RasterizerStorage::RENDER_TARGET_TRANSPARENT,p_enabled); + +} + +void VisualServerViewport::viewport_set_global_canvas_transform(RID p_viewport,const Matrix32& p_transform){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + viewport->global_transform=p_transform; + +} +void VisualServerViewport::viewport_set_canvas_layer(RID p_viewport,RID p_canvas,int p_layer){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + ERR_FAIL_COND(!viewport->canvas_map.has(p_canvas)); + viewport->canvas_map[p_canvas].layer=p_layer; + +} + +bool VisualServerViewport::free(RID p_rid) { + + Viewport * viewport = viewport_owner.getornull(p_rid); + if (!viewport) + return false; + + + VSG::storage->free( viewport->render_target ); + + while(viewport->canvas_map.front()) { + viewport_remove_canvas(p_rid,viewport->canvas_map.front()->key()); + } + + viewport_set_scenario(p_rid,RID()); + active_viewports.erase(viewport); + + viewport_owner.free(p_rid); + memdelete(viewport); + + + return true; + +} + +VisualServerViewport::VisualServerViewport() +{ + +} diff --git a/servers/visual/visual_server_viewport.h b/servers/visual/visual_server_viewport.h new file mode 100644 index 00000000000..e5c888fbcb9 --- /dev/null +++ b/servers/visual/visual_server_viewport.h @@ -0,0 +1,118 @@ +#ifndef VISUALSERVERVIEWPORT_H +#define VISUALSERVERVIEWPORT_H + +#include "servers/visual_server.h" +#include "rasterizer.h" +#include "self_list.h" + +class VisualServerViewport { +public: + + struct CanvasBase : public RID_Data { + + + }; + + struct Viewport : public RID_Data { + + RID self; + RID parent; + + Size2i size; + RID camera; + RID scenario; + + VS::ViewportUpdateMode update_mode; + RID render_target; + RID render_target_texture; + + int viewport_to_screen; + Rect2 viewport_to_screen_rect; + + bool hide_scenario; + bool hide_canvas; + bool disable_environment; + + Image capture; + + VS::ViewportClearMode clear_mode; + + bool rendered_in_prev_frame; + + struct CanvasKey { + + int layer; + RID canvas; + bool operator<(const CanvasKey& p_canvas) const { if (layer==p_canvas.layer) return canvas < p_canvas.canvas; return layer canvas_map; + + Viewport() { + update_mode=VS::VIEWPORT_UPDATE_WHEN_VISIBLE; + clear_mode=VS::VIEWPORT_CLEAR_ALWAYS; + rendered_in_prev_frame=false; + disable_environment=false; + viewport_to_screen=0; + + } + }; + + mutable RID_Owner viewport_owner; + Vector active_viewports; +private: + void _draw_viewport(Viewport *p_viewport); +public: + + + RID viewport_create(); + + void viewport_set_size(RID p_viewport,int p_width,int p_height); + + void viewport_attach_to_screen(RID p_viewport,const Rect2& p_rect=Rect2(),int p_screen=0); + void viewport_detach(RID p_viewport); + + void viewport_set_active(RID p_viewport,bool p_active); + + void viewport_set_update_mode(RID p_viewport,VS::ViewportUpdateMode p_mode); + void viewport_set_vflip(RID p_viewport,bool p_enable); + + + void viewport_set_clear_mode(RID p_viewport,VS::ViewportClearMode p_clear_mode); + + RID viewport_get_texture(RID p_viewport) const; + Image viewport_capture(RID p_viewport) const; + + void viewport_set_hide_scenario(RID p_viewport,bool p_hide); + void viewport_set_hide_canvas(RID p_viewport,bool p_hide); + void viewport_set_disable_environment(RID p_viewport,bool p_disable); + + void viewport_attach_camera(RID p_viewport,RID p_camera); + void viewport_set_scenario(RID p_viewport,RID p_scenario); + void viewport_attach_canvas(RID p_viewport,RID p_canvas); + void viewport_remove_canvas(RID p_viewport,RID p_canvas); + void viewport_set_canvas_transform(RID p_viewport,RID p_canvas,const Matrix32& p_offset); + void viewport_set_transparent_background(RID p_viewport,bool p_enabled); + + void viewport_set_global_canvas_transform(RID p_viewport,const Matrix32& p_transform); + void viewport_set_canvas_layer(RID p_viewport,RID p_canvas,int p_layer); + + void draw_viewports(); + + bool free(RID p_rid); + + VisualServerViewport(); +}; + +#endif // VISUALSERVERVIEWPORT_H diff --git a/servers/visual/visual_server_wrap_mt.cpp b/servers/visual/visual_server_wrap_mt.cpp deleted file mode 100644 index 5ea41453421..00000000000 --- a/servers/visual/visual_server_wrap_mt.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/*************************************************************************/ -/* visual_server_wrap_mt.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#include "visual_server_wrap_mt.h" -#include "os/os.h" -#include "globals.h" -void VisualServerWrapMT::thread_exit() { - - exit=true; -} - -void VisualServerWrapMT::thread_draw() { - - - draw_mutex->lock(); - - draw_pending--; - bool draw=(draw_pending==0);// only draw when no more flushes are pending - - draw_mutex->unlock(); - - if (draw) { - - visual_server->draw(); - } - -} - -void VisualServerWrapMT::thread_flush() { - - - draw_mutex->lock(); - - draw_pending--; - - draw_mutex->unlock(); - -} - - - -void VisualServerWrapMT::_thread_callback(void *_instance) { - - VisualServerWrapMT *vsmt = reinterpret_cast(_instance); - - - vsmt->thread_loop(); -} - -void VisualServerWrapMT::thread_loop() { - - server_thread=Thread::get_caller_ID(); - - OS::get_singleton()->make_rendering_thread(); - - visual_server->init(); - - exit=false; - draw_thread_up=true; - while(!exit) { - // flush commands one by one, until exit is requested - command_queue.wait_and_flush_one(); - } - - command_queue.flush_all(); // flush all - - visual_server->finish(); - -} - - -/* EVENT QUEUING */ - -void VisualServerWrapMT::sync() { - - if (create_thread) { - - /* TODO: sync with the thread */ - - /* - ERR_FAIL_COND(!draw_mutex); - draw_mutex->lock(); - draw_pending++; //cambiar por un saferefcount - draw_mutex->unlock(); - */ - //command_queue.push( this, &VisualServerWrapMT::thread_flush); - } else { - - command_queue.flush_all(); //flush all pending from other threads - } - -} - -void VisualServerWrapMT::draw() { - - - if (create_thread) { - - /* TODO: Make it draw - ERR_FAIL_COND(!draw_mutex); - draw_mutex->lock(); - draw_pending++; //cambiar por un saferefcount - draw_mutex->unlock(); - - command_queue.push( this, &VisualServerWrapMT::thread_draw); - */ - } else { - - visual_server->draw(); - } -} - - -void VisualServerWrapMT::init() { - - if (create_thread) { - - draw_mutex = Mutex::create(); - print_line("CREATING RENDER THREAD"); - OS::get_singleton()->release_rendering_thread(); - if (create_thread) { - thread = Thread::create( _thread_callback, this ); - print_line("STARTING RENDER THREAD"); - } - while(!draw_thread_up) { - OS::get_singleton()->delay_usec(1000); - } - print_line("DONE RENDER THREAD"); - } else { - - visual_server->init(); - } - -} - -void VisualServerWrapMT::finish() { - - - if (thread) { - - command_queue.push( this, &VisualServerWrapMT::thread_exit); - Thread::wait_to_finish( thread ); - memdelete(thread); - - - texture_free_cached_ids(); - mesh_free_cached_ids(); - - thread=NULL; - } else { - visual_server->finish(); - } - - if (draw_mutex) - memdelete(draw_mutex); - -} - - -VisualServerWrapMT::VisualServerWrapMT(VisualServer* p_contained,bool p_create_thread) : command_queue(p_create_thread) { - - visual_server=p_contained; - create_thread=p_create_thread; - thread=NULL; - draw_mutex=NULL; - draw_pending=0; - draw_thread_up=false; - alloc_mutex=Mutex::create(); - texture_pool_max_size=GLOBAL_DEF("render/thread_textures_prealloc",5); - mesh_pool_max_size=GLOBAL_DEF("core/rid_pool_prealloc",20); - if (!p_create_thread) { - server_thread=Thread::get_caller_ID(); - } else { - server_thread=0; - } -} - - -VisualServerWrapMT::~VisualServerWrapMT() { - - memdelete(visual_server); - memdelete(alloc_mutex); - //finish(); - -} - - diff --git a/servers/visual/visual_server_wrap_mt.h b/servers/visual/visual_server_wrap_mt.h deleted file mode 100644 index b4e374dd6fc..00000000000 --- a/servers/visual/visual_server_wrap_mt.h +++ /dev/null @@ -1,739 +0,0 @@ -/*************************************************************************/ -/* visual_server_wrap_mt.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#ifndef VISUAL_SERVER_WRAP_MT_H -#define VISUAL_SERVER_WRAP_MT_H - - -#include "servers/visual_server.h" -#include "command_queue_mt.h" -#include "os/thread.h" - -/** - @author Juan Linietsky -*/ -class VisualServerWrapMT : public VisualServer { - - // the real visual server - mutable VisualServer *visual_server; - - mutable CommandQueueMT command_queue; - - static void _thread_callback(void *_instance); - void thread_loop(); - - Thread::ID server_thread; - volatile bool exit; - Thread *thread; - volatile bool draw_thread_up; - bool create_thread; - - Mutex *draw_mutex; - int draw_pending; - void thread_draw(); - void thread_flush(); - - void thread_exit(); - - Mutex*alloc_mutex; - - - int texture_pool_max_size; - List texture_id_pool; - - int mesh_pool_max_size; - List mesh_id_pool; - -//#define DEBUG_SYNC - -#ifdef DEBUG_SYNC -#define SYNC_DEBUG print_line("sync on: "+String(__FUNCTION__)); -#else -#define SYNC_DEBUG -#endif - -public: - -#define ServerName VisualServer -#define ServerNameWrapMT VisualServerWrapMT -#define server_name visual_server -#include "servers/server_wrap_mt_common.h" - - //FUNC0R(RID,texture_create); - FUNCRID(texture); - FUNC5(texture_allocate,RID,int,int,Image::Format,uint32_t); - FUNC3(texture_set_data,RID,const Image&,CubeMapSide); - FUNC2RC(Image,texture_get_data,RID,CubeMapSide); - FUNC2(texture_set_flags,RID,uint32_t); - FUNC1RC(Image::Format,texture_get_format,RID); - FUNC1RC(uint32_t,texture_get_flags,RID); - FUNC1RC(uint32_t,texture_get_width,RID); - FUNC1RC(uint32_t,texture_get_height,RID); - FUNC3(texture_set_size_override,RID,int,int); - FUNC1RC(bool,texture_can_stream,RID); - FUNC3C(texture_set_reload_hook,RID,ObjectID,const StringName&); - - FUNC2(texture_set_path,RID,const String&); - FUNC1RC(String,texture_get_path,RID); - - FUNC1(texture_set_shrink_all_x2_on_set_data,bool); - - virtual void texture_debug_usage(List *r_info) { - //pass directly, should lock the server anyway - visual_server->texture_debug_usage(r_info); - } - - - /* SHADER API */ - - FUNC1R(RID,shader_create,ShaderMode); - FUNC2(shader_set_mode,RID,ShaderMode); - FUNC1RC(ShaderMode,shader_get_mode,RID); - FUNC7(shader_set_code,RID,const String&,const String&,const String&,int,int,int); - FUNC1RC(String,shader_get_vertex_code,RID); - FUNC1RC(String,shader_get_fragment_code,RID); - FUNC1RC(String,shader_get_light_code,RID); - FUNC2SC(shader_get_param_list,RID,List*); - - FUNC3(shader_set_default_texture_param,RID,const StringName&,RID); - FUNC2RC(RID,shader_get_default_texture_param,RID,const StringName&); - - - /*virtual void shader_get_param_list(RID p_shader, List *p_param_list) { - if (Thread::get_caller_ID()!=server_thread) { - command_queue.push_and_sync( visual_server, &VisualServer::shader_get_param_list,p_shader,p_param_list); - } else { - visual_server->m_type(p1, p2, p3, p4, p5); - } - }*/ - -// virtual void shader_get_param_list(RID p_shader, List *p_param_list); - - - /* COMMON MATERIAL API */ - - FUNC0R(RID,material_create); - FUNC2(material_set_shader,RID,RID); - FUNC1RC(RID,material_get_shader,RID); - - FUNC3(material_set_param,RID,const StringName&,const Variant&); - FUNC2RC(Variant,material_get_param,RID,const StringName&); - - FUNC3(material_set_flag,RID,MaterialFlag,bool); - FUNC2RC(bool,material_get_flag,RID,MaterialFlag); - - FUNC2(material_set_depth_draw_mode,RID,MaterialDepthDrawMode); - FUNC1RC(MaterialDepthDrawMode,material_get_depth_draw_mode,RID); - - FUNC2(material_set_blend_mode,RID,MaterialBlendMode); - FUNC1RC(MaterialBlendMode,material_get_blend_mode,RID); - - FUNC2(material_set_line_width,RID,float); - FUNC1RC(float,material_get_line_width,RID); - - /* FIXED MATERIAL */ - - - FUNC0R(RID,fixed_material_create); - - FUNC3(fixed_material_set_flag,RID, FixedMaterialFlags , bool ); - FUNC2RC(bool, fixed_material_get_flag,RID, FixedMaterialFlags); - - FUNC3(fixed_material_set_param,RID, FixedMaterialParam, const Variant& ); - FUNC2RC(Variant, fixed_material_get_param,RID ,FixedMaterialParam); - - FUNC3(fixed_material_set_texture,RID ,FixedMaterialParam, RID ); - FUNC2RC(RID, fixed_material_get_texture,RID,FixedMaterialParam); - - - - FUNC3(fixed_material_set_texcoord_mode,RID,FixedMaterialParam, FixedMaterialTexCoordMode ); - FUNC2RC(FixedMaterialTexCoordMode, fixed_material_get_texcoord_mode,RID,FixedMaterialParam); - - FUNC2(fixed_material_set_light_shader,RID,FixedMaterialLightShader); - FUNC1RC(FixedMaterialLightShader, fixed_material_get_light_shader,RID); - - FUNC2(fixed_material_set_uv_transform,RID,const Transform&); - FUNC1RC(Transform, fixed_material_get_uv_transform,RID); - - FUNC2(fixed_material_set_point_size,RID ,float); - FUNC1RC(float,fixed_material_get_point_size,RID); - - /* SURFACE API */ - FUNCRID(mesh); - - FUNC2(mesh_set_morph_target_count,RID,int); - FUNC1RC(int,mesh_get_morph_target_count,RID); - - - FUNC2(mesh_set_morph_target_mode,RID,MorphTargetMode); - FUNC1RC(MorphTargetMode,mesh_get_morph_target_mode,RID); - - FUNC2(mesh_add_custom_surface,RID,const Variant&); //this is used by each platform in a different way - - FUNC5(mesh_add_surface,RID,PrimitiveType,const Array&,const Array&,bool); - FUNC2RC(Array,mesh_get_surface_arrays,RID,int); - FUNC2RC(Array,mesh_get_surface_morph_arrays,RID,int); - - FUNC4(mesh_surface_set_material,RID, int, RID,bool); - FUNC2RC(RID,mesh_surface_get_material,RID, int); - - FUNC2RC(int,mesh_surface_get_array_len,RID, int); - FUNC2RC(int,mesh_surface_get_array_index_len,RID, int); - FUNC2RC(uint32_t,mesh_surface_get_format,RID, int); - FUNC2RC(PrimitiveType,mesh_surface_get_primitive_type,RID, int); - - FUNC2(mesh_remove_surface,RID,int); - FUNC1RC(int,mesh_get_surface_count,RID); - FUNC1(mesh_clear,RID); - - - FUNC2(mesh_set_custom_aabb,RID,const AABB&); - FUNC1RC(AABB,mesh_get_custom_aabb,RID); - - - /* MULTIMESH API */ - - FUNC0R(RID,multimesh_create); - FUNC2(multimesh_set_instance_count,RID,int); - FUNC1RC(int,multimesh_get_instance_count,RID); - - FUNC2(multimesh_set_mesh,RID,RID); - FUNC2(multimesh_set_aabb,RID,const AABB&); - FUNC3(multimesh_instance_set_transform,RID,int,const Transform&); - FUNC3(multimesh_instance_set_color,RID,int,const Color&); - - FUNC1RC(RID,multimesh_get_mesh,RID); - FUNC2RC(AABB,multimesh_get_aabb,RID,const AABB&); - FUNC2RC(Transform,multimesh_instance_get_transform,RID,int); - FUNC2RC(Color,multimesh_instance_get_color,RID,int); - - FUNC2(multimesh_set_visible_instances,RID,int); - FUNC1RC(int,multimesh_get_visible_instances,RID); - - /* IMMEDIATE API */ - - - FUNC0R(RID,immediate_create); - FUNC3(immediate_begin,RID,PrimitiveType,RID); - FUNC2(immediate_vertex,RID,const Vector3&); - FUNC2(immediate_normal,RID,const Vector3&); - FUNC2(immediate_tangent,RID,const Plane&); - FUNC2(immediate_color,RID,const Color&); - FUNC2(immediate_uv,RID,const Vector2&); - FUNC2(immediate_uv2,RID,const Vector2&); - FUNC1(immediate_end,RID); - FUNC1(immediate_clear,RID); - FUNC2(immediate_set_material,RID,RID); - FUNC1RC(RID,immediate_get_material,RID); - - - /* PARTICLES API */ - - FUNC0R(RID,particles_create); - - FUNC2(particles_set_amount,RID, int ); - FUNC1RC(int,particles_get_amount,RID); - - FUNC2(particles_set_emitting,RID, bool ); - FUNC1RC(bool,particles_is_emitting,RID); - - FUNC2(particles_set_visibility_aabb,RID, const AABB&); - FUNC1RC(AABB,particles_get_visibility_aabb,RID); - - FUNC2(particles_set_emission_half_extents,RID, const Vector3&); - FUNC1RC(Vector3,particles_get_emission_half_extents,RID); - - FUNC2(particles_set_emission_base_velocity,RID, const Vector3&); - FUNC1RC(Vector3,particles_get_emission_base_velocity,RID); - - FUNC2(particles_set_emission_points,RID, const DVector& ); - FUNC1RC(DVector,particles_get_emission_points,RID); - - FUNC2(particles_set_gravity_normal,RID, const Vector3& ); - FUNC1RC(Vector3,particles_get_gravity_normal,RID); - - FUNC3(particles_set_variable,RID, ParticleVariable ,float); - FUNC2RC(float,particles_get_variable,RID, ParticleVariable ); - - FUNC3(particles_set_randomness,RID, ParticleVariable ,float); - FUNC2RC(float,particles_get_randomness,RID, ParticleVariable ); - - FUNC3(particles_set_color_phase_pos,RID, int , float); - FUNC2RC(float,particles_get_color_phase_pos,RID, int ); - - FUNC2(particles_set_color_phases,RID, int ); - FUNC1RC(int,particles_get_color_phases,RID); - - FUNC3(particles_set_color_phase_color,RID, int , const Color& ); - FUNC2RC(Color,particles_get_color_phase_color,RID, int ); - - FUNC2(particles_set_attractors,RID, int); - FUNC1RC(int,particles_get_attractors,RID); - - FUNC3(particles_set_attractor_pos,RID, int, const Vector3&); - FUNC2RC(Vector3,particles_get_attractor_pos,RID,int); - - FUNC3(particles_set_attractor_strength,RID, int, float); - FUNC2RC(float,particles_get_attractor_strength,RID,int); - - FUNC3(particles_set_material,RID, RID,bool); - FUNC1RC(RID,particles_get_material,RID); - - FUNC2(particles_set_height_from_velocity,RID, bool); - FUNC1RC(bool,particles_has_height_from_velocity,RID); - - FUNC2(particles_set_use_local_coordinates,RID, bool); - FUNC1RC(bool,particles_is_using_local_coordinates,RID); - - - /* Light API */ - - FUNC1R(RID,light_create,LightType); - FUNC1RC(LightType,light_get_type,RID); - - FUNC3(light_set_color,RID,LightColor , const Color& ); - FUNC2RC(Color,light_get_color,RID,LightColor ); - - - FUNC2(light_set_shadow,RID,bool ); - FUNC1RC(bool,light_has_shadow,RID); - - FUNC2(light_set_volumetric,RID,bool ); - FUNC1RC(bool,light_is_volumetric,RID); - - FUNC2(light_set_projector,RID,RID ); - FUNC1RC(RID,light_get_projector,RID); - - FUNC3(light_set_param,RID, LightParam , float ); - FUNC2RC(float,light_get_param,RID, LightParam ); - - FUNC2(light_set_operator,RID,LightOp); - FUNC1RC(LightOp,light_get_operator,RID); - - FUNC2(light_omni_set_shadow_mode,RID,LightOmniShadowMode); - FUNC1RC(LightOmniShadowMode,light_omni_get_shadow_mode,RID); - - FUNC2(light_directional_set_shadow_mode,RID,LightDirectionalShadowMode); - FUNC1RC(LightDirectionalShadowMode,light_directional_get_shadow_mode,RID); - FUNC3(light_directional_set_shadow_param,RID,LightDirectionalShadowParam, float ); - FUNC2RC(float,light_directional_get_shadow_param,RID,LightDirectionalShadowParam ); - - - /* SKELETON API */ - - FUNC0R(RID,skeleton_create); - FUNC2(skeleton_resize,RID,int ); - FUNC1RC(int,skeleton_get_bone_count,RID) ; - FUNC3(skeleton_bone_set_transform,RID,int, const Transform&); - FUNC2R(Transform,skeleton_bone_get_transform,RID,int ); - - /* ROOM API */ - - FUNC0R(RID,room_create); - FUNC2(room_set_bounds,RID, const BSP_Tree&); - FUNC1RC(BSP_Tree,room_get_bounds,RID); - - /* PORTAL API */ - - FUNC0R(RID,portal_create); - FUNC2(portal_set_shape,RID,const Vector&); - FUNC1RC(Vector,portal_get_shape,RID); - FUNC2(portal_set_enabled,RID, bool); - FUNC1RC(bool,portal_is_enabled,RID); - FUNC2(portal_set_disable_distance,RID, float); - FUNC1RC(float,portal_get_disable_distance,RID); - FUNC2(portal_set_disabled_color,RID, const Color&); - FUNC1RC(Color,portal_get_disabled_color,RID); - FUNC2(portal_set_connect_range,RID, float); - FUNC1RC(float,portal_get_connect_range,RID); - - - FUNC0R(RID,baked_light_create); - FUNC2(baked_light_set_mode,RID,BakedLightMode); - FUNC1RC(BakedLightMode,baked_light_get_mode,RID); - - FUNC2(baked_light_set_octree,RID,DVector); - FUNC1RC(DVector,baked_light_get_octree,RID); - - FUNC2(baked_light_set_light,RID,DVector); - FUNC1RC(DVector,baked_light_get_light,RID); - - FUNC2(baked_light_set_sampler_octree,RID,const DVector&); - FUNC1RC(DVector,baked_light_get_sampler_octree,RID); - - FUNC2(baked_light_set_lightmap_multiplier,RID,float); - FUNC1RC(float,baked_light_get_lightmap_multiplier,RID); - - FUNC3(baked_light_add_lightmap,RID,RID,int); - FUNC1(baked_light_clear_lightmaps,RID); - - FUNC2(baked_light_set_realtime_color_enabled, RID, const bool); - FUNC1RC(bool, baked_light_get_realtime_color_enabled, RID); - - FUNC2(baked_light_set_realtime_color, RID, const Color&); - FUNC1RC(Color, baked_light_get_realtime_color, RID); - - FUNC2(baked_light_set_realtime_energy, RID, const float); - FUNC1RC(float, baked_light_get_realtime_energy, RID); - - FUNC0R(RID,baked_light_sampler_create); - - FUNC3(baked_light_sampler_set_param,RID, BakedLightSamplerParam , float ); - FUNC2RC(float,baked_light_sampler_get_param,RID, BakedLightSamplerParam ); - - FUNC2(baked_light_sampler_set_resolution,RID,int); - FUNC1RC(int,baked_light_sampler_get_resolution,RID); - - /* CAMERA API */ - - FUNC0R(RID,camera_create); - FUNC4(camera_set_perspective,RID,float , float , float ); - FUNC4(camera_set_orthogonal,RID,float, float , float ); - FUNC2(camera_set_transform,RID,const Transform& ); - - FUNC2(camera_set_visible_layers,RID,uint32_t); - FUNC1RC(uint32_t,camera_get_visible_layers,RID); - - FUNC2(camera_set_environment,RID,RID); - FUNC1RC(RID,camera_get_environment,RID); - - - FUNC2(camera_set_use_vertical_aspect,RID,bool); - FUNC2RC(bool,camera_is_using_vertical_aspect,RID,bool); - - - /* VIEWPORT API */ - - FUNC0R(RID,viewport_create); - - FUNC2(viewport_attach_to_screen,RID,int ); - FUNC1(viewport_detach,RID); - - FUNC2(viewport_set_as_render_target,RID,bool); - FUNC2(viewport_set_render_target_update_mode,RID,RenderTargetUpdateMode); - FUNC1RC(RenderTargetUpdateMode,viewport_get_render_target_update_mode,RID); - FUNC1RC(RID,viewport_get_render_target_texture,RID); - - FUNC2(viewport_set_render_target_vflip,RID,bool); - FUNC1RC(bool,viewport_get_render_target_vflip,RID); - FUNC2(viewport_set_render_target_to_screen_rect,RID,const Rect2&); - - FUNC2(viewport_set_render_target_clear_on_new_frame,RID,bool); - FUNC1RC(bool,viewport_get_render_target_clear_on_new_frame,RID); - FUNC1(viewport_render_target_clear,RID); - - FUNC1(viewport_queue_screen_capture,RID); - FUNC1RC(Image,viewport_get_screen_capture,RID); - - FUNC2(viewport_set_rect,RID,const ViewportRect&); - FUNC1RC(ViewportRect,viewport_get_rect,RID); - - FUNC2(viewport_set_hide_scenario,RID,bool ); - FUNC2(viewport_set_hide_canvas,RID,bool ); - FUNC2(viewport_attach_camera,RID,RID ); - FUNC2(viewport_set_scenario,RID,RID ); - FUNC2(viewport_set_disable_environment,RID,bool ); - - FUNC1RC(RID,viewport_get_attached_camera,RID); - FUNC1RC(RID,viewport_get_scenario,RID ); - FUNC2(viewport_attach_canvas,RID,RID); - FUNC2(viewport_remove_canvas,RID,RID); - FUNC3(viewport_set_canvas_transform,RID,RID,const Matrix32&); - FUNC2RC(Matrix32,viewport_get_canvas_transform,RID,RID); - FUNC2(viewport_set_global_canvas_transform,RID,const Matrix32&); - FUNC1RC(Matrix32,viewport_get_global_canvas_transform,RID); - FUNC3(viewport_set_canvas_layer,RID,RID ,int); - FUNC2(viewport_set_transparent_background,RID,bool); - FUNC1RC(bool,viewport_has_transparent_background,RID); - - - /* ENVIRONMENT API */ - - FUNC0R(RID,environment_create); - - FUNC2(environment_set_background,RID,EnvironmentBG); - FUNC1RC(EnvironmentBG,environment_get_background,RID); - - FUNC3(environment_set_background_param,RID,EnvironmentBGParam, const Variant&); - FUNC2RC(Variant,environment_get_background_param,RID,EnvironmentBGParam ); - - FUNC3(environment_set_enable_fx,RID,EnvironmentFx,bool); - FUNC2RC(bool,environment_is_fx_enabled,RID,EnvironmentFx); - - - FUNC3(environment_fx_set_param,RID,EnvironmentFxParam,const Variant&); - FUNC2RC(Variant,environment_fx_get_param,RID,EnvironmentFxParam); - - - /* SCENARIO API */ - - FUNC0R(RID,scenario_create); - - FUNC2(scenario_set_debug,RID,ScenarioDebugMode); - FUNC2(scenario_set_environment,RID, RID); - FUNC2RC(RID,scenario_get_environment,RID, RID); - FUNC2(scenario_set_fallback_environment,RID, RID); - - - /* INSTANCING API */ - - FUNC0R(RID,instance_create); - - FUNC2(instance_set_base,RID, RID); - FUNC1RC(RID,instance_get_base,RID); - - FUNC2(instance_set_scenario,RID, RID); - FUNC1RC(RID,instance_get_scenario,RID); - - FUNC2(instance_set_layer_mask,RID, uint32_t); - FUNC1RC(uint32_t,instance_get_layer_mask,RID); - - FUNC1RC(AABB,instance_get_base_aabb,RID); - - FUNC2(instance_attach_object_instance_ID,RID,uint32_t); - FUNC1RC(uint32_t,instance_get_object_instance_ID,RID); - - FUNC2(instance_attach_skeleton,RID,RID); - FUNC1RC(RID,instance_get_skeleton,RID); - - FUNC3(instance_set_morph_target_weight,RID,int, float); - FUNC2RC(float,instance_get_morph_target_weight,RID,int); - - FUNC3(instance_set_surface_material,RID,int, RID); - - FUNC2(instance_set_transform,RID, const Transform&); - FUNC1RC(Transform,instance_get_transform,RID); - - FUNC2(instance_set_exterior,RID, bool ); - FUNC1RC(bool,instance_is_exterior,RID); - - FUNC2(instance_set_room,RID, RID ); - FUNC1RC(RID,instance_get_room,RID ) ; - - FUNC2(instance_set_extra_visibility_margin,RID, real_t ); - FUNC1RC(real_t,instance_get_extra_visibility_margin,RID ); - - FUNC2RC(Vector,instances_cull_aabb,const AABB& , RID ); - FUNC3RC(Vector,instances_cull_ray,const Vector3& ,const Vector3&, RID ); - FUNC2RC(Vector,instances_cull_convex,const Vector& , RID ); - - FUNC3(instance_geometry_set_flag,RID,InstanceFlags ,bool ); - FUNC2RC(bool,instance_geometry_get_flag,RID,InstanceFlags ); - - FUNC2(instance_geometry_set_cast_shadows_setting, RID, ShadowCastingSetting); - FUNC1RC(ShadowCastingSetting, instance_geometry_get_cast_shadows_setting, RID); - - FUNC2(instance_geometry_set_material_override,RID, RID ); - FUNC1RC(RID,instance_geometry_get_material_override,RID); - - FUNC3(instance_geometry_set_draw_range,RID,float ,float); - FUNC1RC(float,instance_geometry_get_draw_range_max,RID); - FUNC1RC(float,instance_geometry_get_draw_range_min,RID); - - FUNC2(instance_geometry_set_baked_light,RID, RID ); - FUNC1RC(RID,instance_geometry_get_baked_light,RID); - - FUNC2(instance_geometry_set_baked_light_sampler,RID, RID ); - FUNC1RC(RID,instance_geometry_get_baked_light_sampler,RID); - - FUNC2(instance_geometry_set_baked_light_texture_index,RID, int); - FUNC1RC(int,instance_geometry_get_baked_light_texture_index,RID); - - FUNC2(instance_light_set_enabled,RID,bool); - FUNC1RC(bool,instance_light_is_enabled,RID); - - /* CANVAS (2D) */ - - FUNC0R(RID,canvas_create); - FUNC3(canvas_set_item_mirroring,RID,RID,const Point2&); - FUNC2RC(Point2,canvas_get_item_mirroring,RID,RID); - FUNC2(canvas_set_modulate,RID,const Color&); - - - FUNC0R(RID,canvas_item_create); - - FUNC2(canvas_item_set_parent,RID,RID ); - FUNC1RC(RID,canvas_item_get_parent,RID); - - FUNC2(canvas_item_set_visible,RID,bool ); - FUNC1RC(bool,canvas_item_is_visible,RID); - - FUNC2(canvas_item_set_blend_mode,RID,MaterialBlendMode ); - FUNC2(canvas_item_set_light_mask,RID,int ); - - //FUNC(canvas_item_set_rect,RID, const Rect2& p_rect); - FUNC2(canvas_item_set_transform,RID, const Matrix32& ); - FUNC2(canvas_item_set_clip,RID, bool ); - FUNC2(canvas_item_set_distance_field_mode,RID, bool ); - FUNC3(canvas_item_set_custom_rect,RID, bool ,const Rect2&); - FUNC2(canvas_item_set_opacity,RID, float ); - FUNC2RC(float,canvas_item_get_opacity,RID, float ); - FUNC2(canvas_item_set_on_top,RID, bool ); - FUNC1RC(bool,canvas_item_is_on_top,RID); - - FUNC2(canvas_item_set_self_opacity,RID, float ); - FUNC2RC(float,canvas_item_get_self_opacity,RID, float ); - - FUNC2(canvas_item_attach_viewport,RID, RID ); - - FUNC6(canvas_item_add_line,RID, const Point2& , const Point2& ,const Color& ,float,bool); - FUNC3(canvas_item_add_rect,RID, const Rect2& , const Color& ); - FUNC4(canvas_item_add_circle,RID, const Point2& , float ,const Color& ); - FUNC6(canvas_item_add_texture_rect,RID, const Rect2& , RID ,bool ,const Color&,bool ); - FUNC6(canvas_item_add_texture_rect_region,RID, const Rect2& , RID ,const Rect2& ,const Color&,bool ); - FUNC8(canvas_item_add_style_box,RID, const Rect2& , const Rect2&, RID ,const Vector2& ,const Vector2&, bool ,const Color& ); - FUNC6(canvas_item_add_primitive,RID, const Vector& , const Vector& ,const Vector& , RID ,float ); - FUNC5(canvas_item_add_polygon,RID, const Vector& , const Vector& ,const Vector& , RID ); - FUNC7(canvas_item_add_triangle_array,RID, const Vector& , const Vector& , const Vector& ,const Vector& , RID , int ); - FUNC7(canvas_item_add_triangle_array_ptr,RID, int , const int* , const Point2* , const Color* ,const Point2* , RID ); - - - FUNC2(canvas_item_add_set_transform,RID,const Matrix32& ); - FUNC2(canvas_item_add_set_blend_mode,RID, MaterialBlendMode ); - FUNC2(canvas_item_add_clip_ignore,RID, bool ); - - FUNC2(canvas_item_set_sort_children_by_y,RID,bool); - FUNC2(canvas_item_set_z,RID,int); - FUNC2(canvas_item_set_z_as_relative_to_parent,RID,bool); - FUNC3(canvas_item_set_copy_to_backbuffer,RID,bool,const Rect2&); - - - FUNC2(canvas_item_set_material,RID, RID ); - - FUNC2(canvas_item_set_use_parent_material,RID, bool ); - - FUNC1(canvas_item_clear,RID); - FUNC1(canvas_item_raise,RID); - - /* CANVAS LIGHT */ - FUNC0R(RID,canvas_light_create); - FUNC2(canvas_light_attach_to_canvas,RID,RID); - FUNC2(canvas_light_set_enabled,RID,bool); - FUNC2(canvas_light_set_transform,RID,const Matrix32&); - FUNC2(canvas_light_set_scale,RID,float); - FUNC2(canvas_light_set_texture,RID,RID); - FUNC2(canvas_light_set_texture_offset,RID,const Vector2&); - FUNC2(canvas_light_set_color,RID,const Color&); - FUNC2(canvas_light_set_height,RID,float); - FUNC2(canvas_light_set_energy,RID,float); - FUNC3(canvas_light_set_layer_range,RID,int,int); - FUNC3(canvas_light_set_z_range,RID,int,int); - FUNC2(canvas_light_set_item_mask,RID,int); - FUNC2(canvas_light_set_item_shadow_mask,RID,int); - - FUNC2(canvas_light_set_mode,RID,CanvasLightMode); - FUNC2(canvas_light_set_shadow_enabled,RID,bool); - FUNC2(canvas_light_set_shadow_buffer_size,RID,int); - FUNC2(canvas_light_set_shadow_esm_multiplier,RID,float); - FUNC2(canvas_light_set_shadow_color,RID,const Color&); - - - - /* CANVAS OCCLUDER */ - - FUNC0R(RID,canvas_light_occluder_create); - FUNC2(canvas_light_occluder_attach_to_canvas,RID,RID); - FUNC2(canvas_light_occluder_set_enabled,RID,bool); - FUNC2(canvas_light_occluder_set_polygon,RID,RID); - FUNC2(canvas_light_occluder_set_transform,RID,const Matrix32&); - FUNC2(canvas_light_occluder_set_light_mask,RID,int); - - - FUNC0R(RID,canvas_occluder_polygon_create); - FUNC3(canvas_occluder_polygon_set_shape,RID,const DVector&,bool); - FUNC2(canvas_occluder_polygon_set_shape_as_lines,RID,const DVector&); - FUNC2(canvas_occluder_polygon_set_cull_mode,RID,CanvasOccluderPolygonCullMode); - - /* CANVAS MATERIAL */ - - FUNC0R(RID,canvas_item_material_create); - FUNC2(canvas_item_material_set_shader,RID,RID); - FUNC3(canvas_item_material_set_shader_param,RID,const StringName&,const Variant&); - FUNC2RC(Variant,canvas_item_material_get_shader_param,RID,const StringName&); - FUNC2(canvas_item_material_set_shading_mode,RID,CanvasItemShadingMode); - - /* CURSOR */ - FUNC2(cursor_set_rotation,float , int ); // radians - FUNC4(cursor_set_texture,RID , const Point2 &, int, const Rect2 &); - FUNC2(cursor_set_visible,bool , int ); - FUNC2(cursor_set_pos,const Point2& , int ); - - /* BLACK BARS */ - - FUNC4(black_bars_set_margins,int , int , int , int ); - FUNC4(black_bars_set_images,RID , RID , RID , RID ); - - /* FREE */ - - FUNC1(free,RID); - - /* CUSTOM SHADE MODEL */ - - FUNC2(custom_shade_model_set_shader,int , RID ); - FUNC1RC(RID,custom_shade_model_get_shader,int ); - FUNC2(custom_shade_model_set_name,int , const String& ); - FUNC1RC(String,custom_shade_model_get_name,int ); - FUNC2(custom_shade_model_set_param_info,int , const List& ); - FUNC2SC(custom_shade_model_get_param_info,int , List* ); - - /* EVENT QUEUING */ - - - virtual void init(); - virtual void finish(); - virtual void draw(); - virtual void sync(); - FUNC0RC(bool,has_changed); - - /* RENDER INFO */ - - FUNC1R(int,get_render_info,RenderInfo ); - virtual bool has_feature(Features p_feature) const { return visual_server->has_feature(p_feature); } - - FUNC3(set_boot_image,const Image& , const Color&,bool ); - FUNC1(set_default_clear_color,const Color& ); - - FUNC0R(RID,get_test_cube ); - - - VisualServerWrapMT(VisualServer* p_contained,bool p_create_thread); - ~VisualServerWrapMT(); - -#undef ServerName -#undef ServerNameWrapMT -#undef server_name - -}; - -#ifdef DEBUG_SYNC -#undef DEBUG_SYNC -#endif -#undef SYNC_DEBUG - -#endif diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index f69580254cd..175dfd47d1d 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -39,16 +39,6 @@ VisualServer *VisualServer::get_singleton() { } -void VisualServer::set_mipmap_policy(MipMapPolicy p_policy) { - - mm_policy=p_policy; -} - -VisualServer::MipMapPolicy VisualServer::get_mipmap_policy() const { - - return (VisualServer::MipMapPolicy)mm_policy; -} - DVector VisualServer::_shader_get_param_list(RID p_shader) const { @@ -93,38 +83,48 @@ RID VisualServer::texture_create_from_image(const Image& p_image,uint32_t p_flag RID VisualServer::get_test_texture() { - if (test_texture) { + if (test_texture.is_valid()) { return test_texture; }; #define TEST_TEXTURE_SIZE 256 - Image data(TEST_TEXTURE_SIZE,TEST_TEXTURE_SIZE,0,Image::FORMAT_RGB); - for (int x=0;x test_data; + test_data.resize(TEST_TEXTURE_SIZE*TEST_TEXTURE_SIZE*3); - for (int y=0;y::Write w=test_data.write(); - Color c; - int r=255-(x+y)/2; + for (int x=0;x()),DEFVAL(RID())); - ObjectTypeDB::bind_method(_MD("canvas_item_add_circle"),&VisualServer::canvas_item_add_circle); - - ObjectTypeDB::bind_method(_MD("viewport_set_canvas_transform"),&VisualServer::viewport_set_canvas_transform); - - ObjectTypeDB::bind_method(_MD("canvas_item_clear"),&VisualServer::canvas_item_clear); - ObjectTypeDB::bind_method(_MD("canvas_item_raise"),&VisualServer::canvas_item_raise); - - - ObjectTypeDB::bind_method(_MD("cursor_set_rotation"),&VisualServer::cursor_set_rotation); - ObjectTypeDB::bind_method(_MD("cursor_set_texture"),&VisualServer::cursor_set_texture); - ObjectTypeDB::bind_method(_MD("cursor_set_visible"),&VisualServer::cursor_set_visible); - ObjectTypeDB::bind_method(_MD("cursor_set_pos"),&VisualServer::cursor_set_pos); - - ObjectTypeDB::bind_method(_MD("black_bars_set_margins","left","top","right","bottom"),&VisualServer::black_bars_set_margins); - ObjectTypeDB::bind_method(_MD("black_bars_set_images","left","top","right","bottom"),&VisualServer::black_bars_set_images); - - ObjectTypeDB::bind_method(_MD("make_sphere_mesh"),&VisualServer::make_sphere_mesh); - ObjectTypeDB::bind_method(_MD("mesh_add_surface_from_planes"),&VisualServer::mesh_add_surface_from_planes); - - ObjectTypeDB::bind_method(_MD("draw"),&VisualServer::draw); - ObjectTypeDB::bind_method(_MD("sync"),&VisualServer::sync); - ObjectTypeDB::bind_method(_MD("free_rid"),&VisualServer::free); - - ObjectTypeDB::bind_method(_MD("set_default_clear_color"),&VisualServer::set_default_clear_color); - - ObjectTypeDB::bind_method(_MD("get_render_info"),&VisualServer::get_render_info); - - BIND_CONSTANT( NO_INDEX_ARRAY ); - BIND_CONSTANT( CUSTOM_ARRAY_SIZE ); - BIND_CONSTANT( ARRAY_WEIGHTS_SIZE ); - BIND_CONSTANT( MAX_PARTICLE_COLOR_PHASES ); - BIND_CONSTANT( MAX_PARTICLE_ATTRACTORS ); - BIND_CONSTANT( MAX_CURSORS ); - - BIND_CONSTANT( TEXTURE_FLAG_MIPMAPS ); - BIND_CONSTANT( TEXTURE_FLAG_REPEAT ); - BIND_CONSTANT( TEXTURE_FLAG_FILTER ); - BIND_CONSTANT( TEXTURE_FLAG_CUBEMAP ); - BIND_CONSTANT( TEXTURE_FLAGS_DEFAULT ); - - BIND_CONSTANT( CUBEMAP_LEFT ); - BIND_CONSTANT( CUBEMAP_RIGHT ); - BIND_CONSTANT( CUBEMAP_BOTTOM ); - BIND_CONSTANT( CUBEMAP_TOP ); - BIND_CONSTANT( CUBEMAP_FRONT ); - BIND_CONSTANT( CUBEMAP_BACK ); - - BIND_CONSTANT( SHADER_MATERIAL ); ///< param 0: name - BIND_CONSTANT( SHADER_POST_PROCESS ); ///< param 0: name - - BIND_CONSTANT( MATERIAL_FLAG_VISIBLE ); - BIND_CONSTANT( MATERIAL_FLAG_DOUBLE_SIDED ); - BIND_CONSTANT( MATERIAL_FLAG_INVERT_FACES ); - BIND_CONSTANT( MATERIAL_FLAG_UNSHADED ); - BIND_CONSTANT( MATERIAL_FLAG_ONTOP ); - BIND_CONSTANT( MATERIAL_FLAG_MAX ); - - BIND_CONSTANT( MATERIAL_BLEND_MODE_MIX ); - BIND_CONSTANT( MATERIAL_BLEND_MODE_ADD ); - BIND_CONSTANT( MATERIAL_BLEND_MODE_SUB ); - BIND_CONSTANT( MATERIAL_BLEND_MODE_MUL ); - - BIND_CONSTANT( FIXED_MATERIAL_PARAM_DIFFUSE ); - BIND_CONSTANT( FIXED_MATERIAL_PARAM_DETAIL ); - BIND_CONSTANT( FIXED_MATERIAL_PARAM_SPECULAR ); - BIND_CONSTANT( FIXED_MATERIAL_PARAM_EMISSION ); - BIND_CONSTANT( FIXED_MATERIAL_PARAM_SPECULAR_EXP ); - BIND_CONSTANT( FIXED_MATERIAL_PARAM_GLOW ); - BIND_CONSTANT( FIXED_MATERIAL_PARAM_NORMAL ); - BIND_CONSTANT( FIXED_MATERIAL_PARAM_SHADE_PARAM ); - BIND_CONSTANT( FIXED_MATERIAL_PARAM_MAX ); - - - - BIND_CONSTANT( FIXED_MATERIAL_TEXCOORD_SPHERE ); - BIND_CONSTANT( FIXED_MATERIAL_TEXCOORD_UV ); - BIND_CONSTANT( FIXED_MATERIAL_TEXCOORD_UV_TRANSFORM ); - BIND_CONSTANT( FIXED_MATERIAL_TEXCOORD_UV2 ); - - - BIND_CONSTANT( ARRAY_VERTEX ); - BIND_CONSTANT( ARRAY_NORMAL ); - BIND_CONSTANT( ARRAY_TANGENT ); - BIND_CONSTANT( ARRAY_COLOR ); - BIND_CONSTANT( ARRAY_TEX_UV ); - BIND_CONSTANT( ARRAY_BONES ); - BIND_CONSTANT( ARRAY_WEIGHTS ); - BIND_CONSTANT( ARRAY_INDEX ); - BIND_CONSTANT( ARRAY_MAX ); - - BIND_CONSTANT( ARRAY_FORMAT_VERTEX ); - BIND_CONSTANT( ARRAY_FORMAT_NORMAL ); - BIND_CONSTANT( ARRAY_FORMAT_TANGENT ); - BIND_CONSTANT( ARRAY_FORMAT_COLOR ); - BIND_CONSTANT( ARRAY_FORMAT_TEX_UV ); - BIND_CONSTANT( ARRAY_FORMAT_BONES ); - BIND_CONSTANT( ARRAY_FORMAT_WEIGHTS ); - BIND_CONSTANT( ARRAY_FORMAT_INDEX ); - - BIND_CONSTANT( PRIMITIVE_POINTS ); - BIND_CONSTANT( PRIMITIVE_LINES ); - BIND_CONSTANT( PRIMITIVE_LINE_STRIP ); - BIND_CONSTANT( PRIMITIVE_LINE_LOOP ); - BIND_CONSTANT( PRIMITIVE_TRIANGLES ); - BIND_CONSTANT( PRIMITIVE_TRIANGLE_STRIP ); - BIND_CONSTANT( PRIMITIVE_TRIANGLE_FAN ); - BIND_CONSTANT( PRIMITIVE_MAX ); - - BIND_CONSTANT( PARTICLE_LIFETIME ); - BIND_CONSTANT( PARTICLE_SPREAD ); - BIND_CONSTANT( PARTICLE_GRAVITY ); - BIND_CONSTANT( PARTICLE_LINEAR_VELOCITY ); - BIND_CONSTANT( PARTICLE_ANGULAR_VELOCITY ); - BIND_CONSTANT( PARTICLE_LINEAR_ACCELERATION ); - BIND_CONSTANT( PARTICLE_RADIAL_ACCELERATION ); - BIND_CONSTANT( PARTICLE_TANGENTIAL_ACCELERATION ); - BIND_CONSTANT( PARTICLE_INITIAL_SIZE ); - BIND_CONSTANT( PARTICLE_FINAL_SIZE ); - BIND_CONSTANT( PARTICLE_INITIAL_ANGLE ); - BIND_CONSTANT( PARTICLE_HEIGHT ); - BIND_CONSTANT( PARTICLE_HEIGHT_SPEED_SCALE ); - BIND_CONSTANT( PARTICLE_VAR_MAX ); - - BIND_CONSTANT( LIGHT_DIRECTIONAL ); - BIND_CONSTANT( LIGHT_OMNI ); - BIND_CONSTANT( LIGHT_SPOT ); - - - BIND_CONSTANT( LIGHT_COLOR_DIFFUSE ); - BIND_CONSTANT( LIGHT_COLOR_SPECULAR ); - - BIND_CONSTANT( LIGHT_PARAM_SPOT_ATTENUATION ); - BIND_CONSTANT( LIGHT_PARAM_SPOT_ANGLE ); - BIND_CONSTANT( LIGHT_PARAM_RADIUS ); - BIND_CONSTANT( LIGHT_PARAM_ENERGY ); - BIND_CONSTANT( LIGHT_PARAM_ATTENUATION ); - BIND_CONSTANT( LIGHT_PARAM_MAX ); - - BIND_CONSTANT( SCENARIO_DEBUG_DISABLED ); - BIND_CONSTANT( SCENARIO_DEBUG_WIREFRAME ); - BIND_CONSTANT( SCENARIO_DEBUG_OVERDRAW ); - - BIND_CONSTANT( INSTANCE_MESH ); - BIND_CONSTANT( INSTANCE_MULTIMESH ); - - BIND_CONSTANT( INSTANCE_PARTICLES ); - BIND_CONSTANT( INSTANCE_LIGHT ); - BIND_CONSTANT( INSTANCE_ROOM ); - BIND_CONSTANT( INSTANCE_PORTAL ); - BIND_CONSTANT( INSTANCE_GEOMETRY_MASK ); - - - BIND_CONSTANT( INFO_OBJECTS_IN_FRAME ); - BIND_CONSTANT( INFO_VERTICES_IN_FRAME ); - BIND_CONSTANT( INFO_MATERIAL_CHANGES_IN_FRAME ); - BIND_CONSTANT( INFO_SHADER_CHANGES_IN_FRAME ); - BIND_CONSTANT( INFO_SURFACE_CHANGES_IN_FRAME ); - BIND_CONSTANT( INFO_DRAW_CALLS_IN_FRAME ); - BIND_CONSTANT( INFO_USAGE_VIDEO_MEM_TOTAL ); - BIND_CONSTANT( INFO_VIDEO_MEM_USED ); - BIND_CONSTANT( INFO_TEXTURE_MEM_USED ); - BIND_CONSTANT( INFO_VERTEX_MEM_USED ); - } void VisualServer::_canvas_item_add_style_box(RID p_item, const Rect2& p_rect, const Rect2& p_source, RID p_texture,const Vector& p_margins, const Color& p_modulate) { ERR_FAIL_COND(p_margins.size()!=4); - canvas_item_add_style_box(p_item,p_rect,p_source,p_texture,Vector2(p_margins[0],p_margins[1]),Vector2(p_margins[2],p_margins[3]),true,p_modulate); + //canvas_item_add_style_box(p_item,p_rect,p_source,p_texture,Vector2(p_margins[0],p_margins[1]),Vector2(p_margins[2],p_margins[3]),true,p_modulate); } void VisualServer::_camera_set_orthogonal(RID p_camera,float p_size,float p_z_near,float p_z_far) { @@ -716,21 +381,6 @@ void VisualServer::_camera_set_orthogonal(RID p_camera,float p_size,float p_z_ne camera_set_orthogonal(p_camera,p_size,p_z_near,p_z_far); } -void VisualServer::_viewport_set_rect(RID p_viewport,const Rect2& p_rect) { - - ViewportRect r; - r.x=p_rect.pos.x; - r.y=p_rect.pos.y; - r.width=p_rect.size.x; - r.height=p_rect.size.y; - viewport_set_rect(p_viewport,r); -} -Rect2 VisualServer::_viewport_get_rect(RID p_viewport) const { - - ViewportRect r=viewport_get_rect(p_viewport); - return Rect2(r.x,r.y,r.width,r.height); -} - @@ -761,7 +411,7 @@ void VisualServer::mesh_add_surface_from_mesh_data( RID p_mesh, const Geometry:: d.resize(VS::ARRAY_MAX); d[ARRAY_VERTEX]=vertices; d[ARRAY_NORMAL]=normals; - mesh_add_surface(p_mesh,PRIMITIVE_TRIANGLES, d); + mesh_add_surface_from_arrays(p_mesh,PRIMITIVE_TRIANGLES, d); #else @@ -810,9 +460,6 @@ VisualServer::VisualServer() { // ERR_FAIL_COND(singleton); singleton=this; - mm_policy=GLOBAL_DEF("render/mipmap_policy",0); - if (mm_policy<0 || mm_policy>2) - mm_policy=0; } diff --git a/servers/visual_server.h b/servers/visual_server.h index 2f3d8371f6a..918b0b102ec 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -50,8 +50,6 @@ class VisualServer : public Object { DVector _shader_get_param_list(RID p_shader) const; void _camera_set_orthogonal(RID p_camera,float p_size,float p_z_near,float p_z_far); - void _viewport_set_rect(RID p_viewport,const Rect2& p_rect); - Rect2 _viewport_get_rect(RID p_viewport) const; void _canvas_item_add_style_box(RID p_item, const Rect2& p_rect, const Rect2& p_source, RID p_texture,const Vector& p_margins, const Color& p_modulate=Color(1,1,1)); protected: RID _make_test_cube(); @@ -68,26 +66,12 @@ public: static VisualServer *get_singleton(); static VisualServer *create(); - enum MipMapPolicy { - - MIPMAPS_ENABLED, - MIPMAPS_ENABLED_FOR_PO2, - MIPMAPS_DISABLED - }; - - - virtual void set_mipmap_policy(MipMapPolicy p_policy); - virtual MipMapPolicy get_mipmap_policy() const; - enum { NO_INDEX_ARRAY=-1, - CUSTOM_ARRAY_SIZE=8, ARRAY_WEIGHTS_SIZE=4, - MAX_PARTICLE_COLOR_PHASES=4, - MAX_PARTICLE_ATTRACTORS=4, CANVAS_ITEM_Z_MIN=-4096, CANVAS_ITEM_Z_MAX=4096, @@ -106,7 +90,7 @@ public: TEXTURE_FLAG_CONVERT_TO_LINEAR=16, TEXTURE_FLAG_MIRRORED_REPEAT=32, /// Repeat texture, with alternate sections mirrored TEXTURE_FLAG_CUBEMAP=2048, - TEXTURE_FLAG_VIDEO_SURFACE=4096, + TEXTURE_FLAG_USED_FOR_STREAMING=4096, TEXTURE_FLAGS_DEFAULT=TEXTURE_FLAG_REPEAT|TEXTURE_FLAG_MIPMAPS|TEXTURE_FLAG_FILTER }; @@ -132,8 +116,6 @@ public: virtual uint32_t texture_get_width(RID p_texture) const=0; virtual uint32_t texture_get_height(RID p_texture) const=0; virtual void texture_set_size_override(RID p_texture,int p_width, int p_height)=0; - virtual bool texture_can_stream(RID p_texture) const=0; - virtual void texture_set_reload_hook(RID p_texture,ObjectID p_owner,const StringName& p_function) const=0; virtual void texture_set_path(RID p_texture,const String& p_path)=0; virtual String texture_get_path(RID p_texture) const=0; @@ -155,21 +137,19 @@ public: enum ShaderMode { - SHADER_MATERIAL, + SHADER_SPATIAL, SHADER_CANVAS_ITEM, - SHADER_POST_PROCESS, + SHADER_LIGHT, }; - virtual RID shader_create(ShaderMode p_mode=SHADER_MATERIAL)=0; + virtual RID shader_create(ShaderMode p_mode=SHADER_SPATIAL)=0; virtual void shader_set_mode(RID p_shader,ShaderMode p_mode)=0; virtual ShaderMode shader_get_mode(RID p_shader) const=0; - virtual void shader_set_code(RID p_shader, const String& p_vertex, const String& p_fragment,const String& p_light, int p_vertex_ofs=0,int p_fragment_ofs=0,int p_light_ofs=0)=0; - virtual String shader_get_fragment_code(RID p_shader) const=0; - virtual String shader_get_vertex_code(RID p_shader) const=0; - virtual String shader_get_light_code(RID p_shader) const=0; + virtual void shader_set_code(RID p_shader, const String& p_code)=0; + virtual String shader_get_code(RID p_shader) const=0; virtual void shader_get_param_list(RID p_shader, List *p_param_list) const=0; virtual void shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture)=0; @@ -186,114 +166,6 @@ public: virtual void material_set_param(RID p_material, const StringName& p_param, const Variant& p_value)=0; virtual Variant material_get_param(RID p_material, const StringName& p_param) const=0; - enum MaterialFlag { - MATERIAL_FLAG_VISIBLE, - MATERIAL_FLAG_DOUBLE_SIDED, - MATERIAL_FLAG_INVERT_FACES, ///< Invert front/back of the object - MATERIAL_FLAG_UNSHADED, - MATERIAL_FLAG_ONTOP, - MATERIAL_FLAG_LIGHTMAP_ON_UV2, - MATERIAL_FLAG_COLOR_ARRAY_SRGB, - MATERIAL_FLAG_MAX, - }; - - virtual void material_set_flag(RID p_material, MaterialFlag p_flag,bool p_enabled)=0; - virtual bool material_get_flag(RID p_material,MaterialFlag p_flag) const=0; - - enum MaterialDepthDrawMode { - MATERIAL_DEPTH_DRAW_ALWAYS, - MATERIAL_DEPTH_DRAW_OPAQUE_ONLY, - MATERIAL_DEPTH_DRAW_OPAQUE_PRE_PASS_ALPHA, - MATERIAL_DEPTH_DRAW_NEVER - }; - - virtual void material_set_depth_draw_mode(RID p_material, MaterialDepthDrawMode p_mode)=0; - virtual MaterialDepthDrawMode material_get_depth_draw_mode(RID p_material) const=0; - - enum MaterialBlendMode { - MATERIAL_BLEND_MODE_MIX, //default - MATERIAL_BLEND_MODE_ADD, - MATERIAL_BLEND_MODE_SUB, - MATERIAL_BLEND_MODE_MUL, - MATERIAL_BLEND_MODE_PREMULT_ALPHA - }; - - - virtual void material_set_blend_mode(RID p_material,MaterialBlendMode p_mode)=0; - virtual MaterialBlendMode material_get_blend_mode(RID p_material) const=0; - - virtual void material_set_line_width(RID p_material,float p_line_width)=0; - virtual float material_get_line_width(RID p_material) const=0; - - - //fixed material api - - virtual RID fixed_material_create()=0; - - enum FixedMaterialParam { - - FIXED_MATERIAL_PARAM_DIFFUSE, - FIXED_MATERIAL_PARAM_DETAIL, - FIXED_MATERIAL_PARAM_SPECULAR, - FIXED_MATERIAL_PARAM_EMISSION, - FIXED_MATERIAL_PARAM_SPECULAR_EXP, - FIXED_MATERIAL_PARAM_GLOW, - FIXED_MATERIAL_PARAM_NORMAL, - FIXED_MATERIAL_PARAM_SHADE_PARAM, - FIXED_MATERIAL_PARAM_MAX - }; - - enum FixedMaterialTexCoordMode { - - FIXED_MATERIAL_TEXCOORD_UV, - FIXED_MATERIAL_TEXCOORD_UV_TRANSFORM, - FIXED_MATERIAL_TEXCOORD_UV2, - FIXED_MATERIAL_TEXCOORD_SPHERE - }; - - enum FixedMaterialFlags { - - FIXED_MATERIAL_FLAG_USE_ALPHA, - FIXED_MATERIAL_FLAG_USE_COLOR_ARRAY, - FIXED_MATERIAL_FLAG_USE_POINT_SIZE, - FIXED_MATERIAL_FLAG_DISCARD_ALPHA, - FIXED_MATERIAL_FLAG_USE_XY_NORMALMAP, - FIXED_MATERIAL_FLAG_MAX, - }; - - - virtual void fixed_material_set_flag(RID p_material, FixedMaterialFlags p_flag, bool p_enabled)=0; - virtual bool fixed_material_get_flag(RID p_material, FixedMaterialFlags p_flag) const=0; - - virtual void fixed_material_set_param(RID p_material, FixedMaterialParam p_parameter, const Variant& p_value)=0; - virtual Variant fixed_material_get_param(RID p_material,FixedMaterialParam p_parameter) const=0; - - virtual void fixed_material_set_texture(RID p_material,FixedMaterialParam p_parameter, RID p_texture)=0; - virtual RID fixed_material_get_texture(RID p_material,FixedMaterialParam p_parameter) const=0; - - - enum FixedMaterialLightShader { - - FIXED_MATERIAL_LIGHT_SHADER_LAMBERT, - FIXED_MATERIAL_LIGHT_SHADER_WRAP, - FIXED_MATERIAL_LIGHT_SHADER_VELVET, - FIXED_MATERIAL_LIGHT_SHADER_TOON, - - }; - - - virtual void fixed_material_set_light_shader(RID p_material,FixedMaterialLightShader p_shader)=0; - virtual FixedMaterialLightShader fixed_material_get_light_shader(RID p_material) const=0; - - virtual void fixed_material_set_texcoord_mode(RID p_material,FixedMaterialParam p_parameter, FixedMaterialTexCoordMode p_mode)=0; - virtual FixedMaterialTexCoordMode fixed_material_get_texcoord_mode(RID p_material,FixedMaterialParam p_parameter) const=0; - - virtual void fixed_material_set_uv_transform(RID p_material,const Transform& p_transform)=0; - virtual Transform fixed_material_get_uv_transform(RID p_material) const=0; - - virtual void fixed_material_set_point_size(RID p_material,float p_size)=0; - virtual float fixed_material_get_point_size(RID p_material) const=0; - /* MESH API */ enum ArrayType { @@ -321,8 +193,25 @@ public: ARRAY_FORMAT_BONES=1<& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes=Vector >())=0; - virtual void mesh_add_custom_surface(RID p_mesh,const Variant& p_dat)=0; //this is used by each platform in a different way virtual void mesh_set_morph_target_count(RID p_mesh,int p_amount)=0; virtual int mesh_get_morph_target_count(RID p_mesh) const=0; @@ -353,11 +240,16 @@ public: virtual void mesh_set_morph_target_mode(RID p_mesh,MorphTargetMode p_mode)=0; virtual MorphTargetMode mesh_get_morph_target_mode(RID p_mesh) const=0; - virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material,bool p_owned=false)=0; + virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material)=0; virtual RID mesh_surface_get_material(RID p_mesh, int p_surface) const=0; virtual int mesh_surface_get_array_len(RID p_mesh, int p_surface) const=0; virtual int mesh_surface_get_array_index_len(RID p_mesh, int p_surface) const=0; + + virtual DVector mesh_surface_get_array(RID p_mesh, int p_surface) const=0; + virtual DVector mesh_surface_get_index_array(RID p_mesh, int p_surface) const=0; + + virtual uint32_t mesh_surface_get_format(RID p_mesh, int p_surface) const=0; virtual PrimitiveType mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const=0; @@ -371,30 +263,47 @@ public: /* MULTIMESH API */ + virtual RID multimesh_create()=0; - virtual void multimesh_set_instance_count(RID p_multimesh,int p_count)=0; + enum MultimeshTransformFormat { + MULTIMESH_TRANSFORM_NONE, + MULTIMESH_TRANSFORM_2D, + MULTIMESH_TRANSFORM_3D, + }; + + enum MultimeshColorFormat { + MULTIMESH_COLOR_NONE, + MULTIMESH_COLOR_8BIT, + MULTIMESH_COLOR_FLOAT, + }; + + virtual void multimesh_allocate(RID p_multimesh,int p_instances,MultimeshTransformFormat p_transform_format,MultimeshColorFormat p_color_format,bool p_gen_aabb=true)=0; virtual int multimesh_get_instance_count(RID p_multimesh) const=0; virtual void multimesh_set_mesh(RID p_multimesh,RID p_mesh)=0; - virtual void multimesh_set_aabb(RID p_multimesh,const AABB& p_aabb)=0; + virtual void multimesh_set_custom_aabb(RID p_multimesh,const AABB& p_aabb)=0; virtual void multimesh_instance_set_transform(RID p_multimesh,int p_index,const Transform& p_transform)=0; + virtual void multimesh_instance_set_transform_2d(RID p_multimesh,int p_index,const Matrix32& p_transform)=0; virtual void multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color)=0; virtual RID multimesh_get_mesh(RID p_multimesh) const=0; - virtual AABB multimesh_get_aabb(RID p_multimesh,const AABB& p_aabb) const=0;; + virtual AABB multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const=0; virtual Transform multimesh_instance_get_transform(RID p_multimesh,int p_index) const=0; + virtual Matrix32 multimesh_instance_get_transform_2d(RID p_multimesh,int p_index) const=0; virtual Color multimesh_instance_get_color(RID p_multimesh,int p_index) const=0; virtual void multimesh_set_visible_instances(RID p_multimesh,int p_visible)=0; virtual int multimesh_get_visible_instances(RID p_multimesh) const=0; + /* IMMEDIATE API */ virtual RID immediate_create()=0; virtual void immediate_begin(RID p_immediate,PrimitiveType p_rimitive,RID p_texture=RID())=0; virtual void immediate_vertex(RID p_immediate,const Vector3& p_vertex)=0; + virtual void immediate_vertex_2d(RID p_immediate,const Vector3& p_vertex)=0; virtual void immediate_normal(RID p_immediate,const Vector3& p_normal)=0; virtual void immediate_tangent(RID p_immediate,const Plane& p_tangent)=0; virtual void immediate_color(RID p_immediate,const Color& p_color)=0; @@ -405,82 +314,15 @@ public: virtual void immediate_set_material(RID p_immediate,RID p_material)=0; virtual RID immediate_get_material(RID p_immediate) const=0; + /* SKELETON API */ - /* PARTICLES API */ - - virtual RID particles_create()=0; - - enum ParticleVariable { - PARTICLE_LIFETIME, - PARTICLE_SPREAD, - PARTICLE_GRAVITY, - PARTICLE_LINEAR_VELOCITY, - PARTICLE_ANGULAR_VELOCITY, - PARTICLE_LINEAR_ACCELERATION, - PARTICLE_RADIAL_ACCELERATION, - PARTICLE_TANGENTIAL_ACCELERATION, - PARTICLE_DAMPING, - PARTICLE_INITIAL_SIZE, - PARTICLE_FINAL_SIZE, - PARTICLE_INITIAL_ANGLE, - PARTICLE_HEIGHT, - PARTICLE_HEIGHT_SPEED_SCALE, - PARTICLE_VAR_MAX - }; - - virtual void particles_set_amount(RID p_particles, int p_amount)=0; - virtual int particles_get_amount(RID p_particles) const=0; - - virtual void particles_set_emitting(RID p_particles, bool p_emitting)=0; - virtual bool particles_is_emitting(RID p_particles) const=0; - - virtual void particles_set_visibility_aabb(RID p_particles, const AABB& p_visibility)=0; - virtual AABB particles_get_visibility_aabb(RID p_particles) const=0; - - virtual void particles_set_emission_half_extents(RID p_particles, const Vector3& p_half_extents)=0; - virtual Vector3 particles_get_emission_half_extents(RID p_particles) const=0; - - virtual void particles_set_emission_base_velocity(RID p_particles, const Vector3& p_base_velocity)=0; - virtual Vector3 particles_get_emission_base_velocity(RID p_particles) const=0; - - virtual void particles_set_emission_points(RID p_particles, const DVector& p_points)=0; - virtual DVector particles_get_emission_points(RID p_particles) const=0; - - virtual void particles_set_gravity_normal(RID p_particles, const Vector3& p_normal)=0; - virtual Vector3 particles_get_gravity_normal(RID p_particles) const=0; - - virtual void particles_set_variable(RID p_particles, ParticleVariable p_variable,float p_value)=0; - virtual float particles_get_variable(RID p_particles, ParticleVariable p_variable) const=0; - - virtual void particles_set_randomness(RID p_particles, ParticleVariable p_variable,float p_randomness)=0; - virtual float particles_get_randomness(RID p_particles, ParticleVariable p_variable) const=0; - - virtual void particles_set_color_phases(RID p_particles, int p_phases)=0; - virtual int particles_get_color_phases(RID p_particles) const=0; - - virtual void particles_set_color_phase_pos(RID p_particles, int p_phase, float p_pos)=0; - virtual float particles_get_color_phase_pos(RID p_particles, int p_phase) const=0; - - virtual void particles_set_color_phase_color(RID p_particles, int p_phase, const Color& p_color)=0; - virtual Color particles_get_color_phase_color(RID p_particles, int p_phase) const=0; - - virtual void particles_set_attractors(RID p_particles, int p_attractors)=0; - virtual int particles_get_attractors(RID p_particles) const=0; - - virtual void particles_set_attractor_pos(RID p_particles, int p_attractor, const Vector3& p_pos)=0; - virtual Vector3 particles_get_attractor_pos(RID p_particles,int p_attractor) const=0; - - virtual void particles_set_attractor_strength(RID p_particles, int p_attractor, float p_force)=0; - virtual float particles_get_attractor_strength(RID p_particles,int p_attractor) const=0; - - virtual void particles_set_material(RID p_particles, RID p_material,bool p_owned=false)=0; - virtual RID particles_get_material(RID p_particles) const=0; - - virtual void particles_set_height_from_velocity(RID p_particles, bool p_enable)=0; - virtual bool particles_has_height_from_velocity(RID p_particles) const=0; - - virtual void particles_set_use_local_coordinates(RID p_particles, bool p_enable)=0; - virtual bool particles_is_using_local_coordinates(RID p_particles) const=0; + virtual RID skeleton_create()=0; + virtual void skeleton_allocate(RID p_skeleton,int p_bones,bool p_2d_skeleton=false)=0; + virtual int skeleton_get_bone_count(RID p_skeleton) const=0; + virtual void skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform)=0; + virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone)=0; + virtual void skeleton_bone_set_transform_2d(RID p_skeleton,int p_bone, const Matrix32& p_transform)=0; + virtual Matrix32 skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone)=0; /* Light API */ @@ -490,62 +332,36 @@ public: LIGHT_SPOT }; - enum LightColor { - LIGHT_COLOR_DIFFUSE, - LIGHT_COLOR_SPECULAR - }; - enum LightParam { - LIGHT_PARAM_SPOT_ATTENUATION, - LIGHT_PARAM_SPOT_ANGLE, - LIGHT_PARAM_RADIUS, LIGHT_PARAM_ENERGY, - LIGHT_PARAM_ATTENUATION, - LIGHT_PARAM_SHADOW_DARKENING, - LIGHT_PARAM_SHADOW_Z_OFFSET, - LIGHT_PARAM_SHADOW_Z_SLOPE_SCALE, - LIGHT_PARAM_SHADOW_ESM_MULTIPLIER, - LIGHT_PARAM_SHADOW_BLUR_PASSES, + LIGHT_PARAM_SPECULAR, + LIGHT_PARAM_RANGE, + LIGHT_PARAM_SPOT_ANGLE, + LIGHT_PARAM_SHADOW_MAX_DISTANCE, + LIGHT_PARAM_SHADOW_DARKNESS, + LIGHT_PARAM_SHADOW_SPLIT_1_OFFSET, + LIGHT_PARAM_SHADOW_SPLIT_2_OFFSET, + LIGHT_PARAM_SHADOW_SPLIT_3_OFFSET, + LIGHT_PARAM_SHADOW_SPLIT_4_OFFSET, + LIGHT_PARAM_SHADOW_NORMAL_BIAS, + LIGHT_PARAM_SHADOW_BIAS_1, + LIGHT_PARAM_SHADOW_BIAS_2, + LIGHT_PARAM_SHADOW_BIAS_3, + LIGHT_PARAM_SHADOW_BIAS_4, LIGHT_PARAM_MAX }; virtual RID light_create(LightType p_type)=0; - virtual LightType light_get_type(RID p_light) const=0; - - virtual void light_set_color(RID p_light,LightColor p_type, const Color& p_color)=0; - virtual Color light_get_color(RID p_light,LightColor p_type) const=0; + virtual void light_set_color(RID p_light,const Color& p_color)=0; + virtual void light_set_param(RID p_light,LightParam p_param,float p_value)=0; virtual void light_set_shadow(RID p_light,bool p_enabled)=0; - virtual bool light_has_shadow(RID p_light) const=0; - - virtual void light_set_volumetric(RID p_light,bool p_enabled)=0; - virtual bool light_is_volumetric(RID p_light) const=0; - virtual void light_set_projector(RID p_light,RID p_texture)=0; - virtual RID light_get_projector(RID p_light) const=0; - - virtual void light_set_param(RID p_light, LightParam p_var, float p_value)=0; - virtual float light_get_param(RID p_light, LightParam p_var) const=0; - - enum LightOp { - - LIGHT_OPERATOR_ADD, - LIGHT_OPERATOR_SUB - }; - - virtual void light_set_operator(RID p_light,LightOp p_op)=0; - virtual LightOp light_get_operator(RID p_light) const=0; - - // omni light - enum LightOmniShadowMode { - LIGHT_OMNI_SHADOW_DEFAULT, - LIGHT_OMNI_SHADOW_DUAL_PARABOLOID, - LIGHT_OMNI_SHADOW_CUBEMAP - }; - - virtual void light_omni_set_shadow_mode(RID p_light,LightOmniShadowMode p_mode)=0; - virtual LightOmniShadowMode light_omni_get_shadow_mode(RID p_light) const=0; + virtual void light_set_attenuation_texure(RID p_light,RID p_texture)=0; + virtual void light_set_negative(RID p_light,bool p_enable)=0; + virtual void light_set_cull_mask(RID p_light,uint32_t p_mask)=0; + virtual void light_set_shader(RID p_light,RID p_shader)=0; // directional light enum LightDirectionalShadowMode { @@ -556,33 +372,27 @@ public: }; virtual void light_directional_set_shadow_mode(RID p_light,LightDirectionalShadowMode p_mode)=0; - virtual LightDirectionalShadowMode light_directional_get_shadow_mode(RID p_light) const=0; - enum LightDirectionalShadowParam { + /* PROBE API */ - LIGHT_DIRECTIONAL_SHADOW_PARAM_MAX_DISTANCE, - LIGHT_DIRECTIONAL_SHADOW_PARAM_PSSM_SPLIT_WEIGHT, - LIGHT_DIRECTIONAL_SHADOW_PARAM_PSSM_ZOFFSET_SCALE, - }; + virtual RID reflection_probe_create()=0; - virtual void light_directional_set_shadow_param(RID p_light,LightDirectionalShadowParam p_param, float p_value)=0; - virtual float light_directional_get_shadow_param(RID p_light,LightDirectionalShadowParam p_param) const=0; + virtual void reflection_probe_set_intensity(RID p_probe, float p_intensity)=0; + virtual void reflection_probe_set_clip(RID p_probe, float p_near, float p_far)=0; + virtual void reflection_probe_set_min_blend_distance(RID p_probe, float p_distance)=0; + virtual void reflection_probe_set_extents(RID p_probe, const Vector3& p_extents)=0; + virtual void reflection_probe_set_origin_offset(RID p_probe, const Vector3& p_offset)=0; + virtual void reflection_probe_set_enable_parallax_correction(RID p_probe, bool p_enable)=0; + virtual void reflection_probe_set_resolution(RID p_probe, int p_resolution)=0; + virtual void reflection_probe_set_hide_skybox(RID p_probe, bool p_hide)=0; + virtual void reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers)=0; - //@TODO fallof model and all that stuff - - /* SKELETON API */ - - virtual RID skeleton_create()=0; - virtual void skeleton_resize(RID p_skeleton,int p_bones)=0; - virtual int skeleton_get_bone_count(RID p_skeleton) const=0; - virtual void skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform)=0; - virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone)=0; /* ROOM API */ virtual RID room_create()=0; - virtual void room_set_bounds(RID p_room, const BSP_Tree& p_bounds)=0; - virtual BSP_Tree room_get_bounds(RID p_room) const=0; + virtual void room_add_bounds(RID p_room, const DVector& p_convex_polygon,float p_height,const Transform& p_transform)=0; + virtual void room_clear_bounds()=0; /* PORTAL API */ @@ -591,144 +401,55 @@ public: virtual RID portal_create()=0; virtual void portal_set_shape(RID p_portal, const Vector& p_shape)=0; - virtual Vector portal_get_shape(RID p_portal) const=0; virtual void portal_set_enabled(RID p_portal, bool p_enabled)=0; - virtual bool portal_is_enabled(RID p_portal) const=0; virtual void portal_set_disable_distance(RID p_portal, float p_distance)=0; - virtual float portal_get_disable_distance(RID p_portal) const=0; virtual void portal_set_disabled_color(RID p_portal, const Color& p_color)=0; - virtual Color portal_get_disabled_color(RID p_portal) const=0; - virtual void portal_set_connect_range(RID p_portal, float p_range) =0; - virtual float portal_get_connect_range(RID p_portal) const =0; - /* BAKED LIGHT API */ - virtual RID baked_light_create()=0; - enum BakedLightMode { - BAKED_LIGHT_OCTREE, - BAKED_LIGHT_LIGHTMAPS - }; - - virtual void baked_light_set_mode(RID p_baked_light,BakedLightMode p_mode)=0; - virtual BakedLightMode baked_light_get_mode(RID p_baked_light) const=0; - - virtual void baked_light_set_octree(RID p_baked_light,const DVector p_octree)=0; - virtual DVector baked_light_get_octree(RID p_baked_light) const=0; - - virtual void baked_light_set_light(RID p_baked_light,const DVector p_light)=0; - virtual DVector baked_light_get_light(RID p_baked_light) const=0; - - virtual void baked_light_set_sampler_octree(RID p_baked_light,const DVector &p_sampler)=0; - virtual DVector baked_light_get_sampler_octree(RID p_baked_light) const=0; - - virtual void baked_light_set_lightmap_multiplier(RID p_baked_light,float p_multiplier)=0; - virtual float baked_light_get_lightmap_multiplier(RID p_baked_light) const=0; - - virtual void baked_light_add_lightmap(RID p_baked_light,const RID p_texture,int p_id)=0; - virtual void baked_light_clear_lightmaps(RID p_baked_light)=0; - - virtual void baked_light_set_realtime_color_enabled(RID p_baked_light, const bool p_enabled)=0; - virtual bool baked_light_get_realtime_color_enabled(RID p_baked_light) const=0; - - virtual void baked_light_set_realtime_color(RID p_baked_light, const Color& p_color)=0; - virtual Color baked_light_get_realtime_color(RID p_baked_light) const=0; - - virtual void baked_light_set_realtime_energy(RID p_baked_light, const float p_energy) = 0; - virtual float baked_light_get_realtime_energy(RID p_baked_light) const = 0; - - /* BAKED LIGHT SAMPLER */ - - virtual RID baked_light_sampler_create()=0; - - enum BakedLightSamplerParam { - BAKED_LIGHT_SAMPLER_RADIUS, - BAKED_LIGHT_SAMPLER_STRENGTH, - BAKED_LIGHT_SAMPLER_ATTENUATION, - BAKED_LIGHT_SAMPLER_DETAIL_RATIO, - BAKED_LIGHT_SAMPLER_MAX - }; - - virtual void baked_light_sampler_set_param(RID p_baked_light_sampler,BakedLightSamplerParam p_param,float p_value)=0; - virtual float baked_light_sampler_get_param(RID p_baked_light_sampler,BakedLightSamplerParam p_param) const=0; - - virtual void baked_light_sampler_set_resolution(RID p_baked_light_sampler,int p_resolution)=0; - virtual int baked_light_sampler_get_resolution(RID p_baked_light_sampler) const=0; - /* CAMERA API */ virtual RID camera_create()=0; virtual void camera_set_perspective(RID p_camera,float p_fovy_degrees, float p_z_near, float p_z_far)=0; virtual void camera_set_orthogonal(RID p_camera,float p_size, float p_z_near, float p_z_far)=0; virtual void camera_set_transform(RID p_camera,const Transform& p_transform)=0; - - virtual void camera_set_visible_layers(RID p_camera,uint32_t p_layers)=0; - virtual uint32_t camera_get_visible_layers(RID p_camera) const=0; - + virtual void camera_set_cull_mask(RID p_camera,uint32_t p_layers)=0; virtual void camera_set_environment(RID p_camera,RID p_env)=0; - virtual RID camera_get_environment(RID p_camera) const=0; - virtual void camera_set_use_vertical_aspect(RID p_camera,bool p_enable)=0; - virtual bool camera_is_using_vertical_aspect(RID p_camera,bool p_enable) const=0; - -/* - virtual void camera_add_layer(RID p_camera); - virtual void camera_layer_move_up(RID p_camera,int p_layer); - virtual void camera_layer_move_down(RID p_camera,int p_layer); - virtual void camera_layer_set_mask(RID p_camera,int p_layer,int p_mask); - virtual int camera_layer_get_mask(RID p_camera,int p_layer) const; - - enum CameraLayerFlag { - - FLAG_CLEAR_DEPTH, - FLAG_CLEAR_COLOR, - FLAG_IGNORE_FOG, - }; - virtual void camera_layer_set_flag(RID p_camera,int p_layer,bool p_enable); - virtual bool camera_layer_get_flag(RID p_camera,int p_layer) const; - -*/ - /* VIEWPORT API */ + /* VIEWPORT TARGET API */ virtual RID viewport_create()=0; - virtual void viewport_attach_to_screen(RID p_viewport,int p_screen=0)=0; + virtual void viewport_set_size(RID p_viewport,int p_width,int p_height)=0; + virtual void viewport_set_active(RID p_viewport,bool p_active)=0; + + virtual void viewport_attach_to_screen(RID p_viewport,const Rect2& p_rect=Rect2(),int p_screen=0)=0; virtual void viewport_detach(RID p_viewport)=0; - virtual void viewport_set_render_target_to_screen_rect(RID p_viewport,const Rect2& p_rect)=0; - enum RenderTargetUpdateMode { - RENDER_TARGET_UPDATE_DISABLED, - RENDER_TARGET_UPDATE_ONCE, //then goes to disabled - RENDER_TARGET_UPDATE_WHEN_VISIBLE, // default - RENDER_TARGET_UPDATE_ALWAYS + enum ViewportUpdateMode { + VIEWPORT_UPDATE_DISABLED, + VIEWPORT_UPDATE_ONCE, //then goes to disabled, must be manually updated + VIEWPORT_UPDATE_WHEN_VISIBLE, // default + VIEWPORT_UPDATE_ALWAYS }; - virtual void viewport_set_as_render_target(RID p_viewport,bool p_enable)=0; - virtual void viewport_set_render_target_update_mode(RID p_viewport,RenderTargetUpdateMode p_mode)=0; - virtual RenderTargetUpdateMode viewport_get_render_target_update_mode(RID p_viewport) const=0; - virtual RID viewport_get_render_target_texture(RID p_viewport) const=0; - virtual void viewport_set_render_target_vflip(RID p_viewport,bool p_enable)=0; - virtual bool viewport_get_render_target_vflip(RID p_viewport) const=0; - virtual void viewport_set_render_target_clear_on_new_frame(RID p_viewport,bool p_enable)=0; - virtual bool viewport_get_render_target_clear_on_new_frame(RID p_viewport) const=0; - virtual void viewport_render_target_clear(RID p_viewport)=0; + virtual void viewport_set_update_mode(RID p_viewport,ViewportUpdateMode p_mode)=0; + virtual void viewport_set_vflip(RID p_viewport,bool p_enable)=0; - virtual void viewport_queue_screen_capture(RID p_viewport)=0; - virtual Image viewport_get_screen_capture(RID p_viewport) const=0; + enum ViewportClearMode { - - - struct ViewportRect { - - int x,y,width,height; - ViewportRect() { x=y=width=height=0; } + VIEWPORT_CLEAR_ALWAYS, + VIWEPORT_CLEAR_NEVER, + VIEWPORT_CLEAR_ONLY_NEXT_FRAME }; - virtual void viewport_set_rect(RID p_viewport,const ViewportRect& p_rect)=0; - virtual ViewportRect viewport_get_rect(RID p_viewport) const=0; + virtual void viewport_set_clear_mode(RID p_viewport,ViewportClearMode p_clear_mode)=0; + + virtual RID viewport_get_texture(RID p_viewport) const=0; + virtual Image viewport_capture(RID p_viewport) const=0; virtual void viewport_set_hide_scenario(RID p_viewport,bool p_hide)=0; virtual void viewport_set_hide_canvas(RID p_viewport,bool p_hide)=0; @@ -736,22 +457,15 @@ public: virtual void viewport_attach_camera(RID p_viewport,RID p_camera)=0; virtual void viewport_set_scenario(RID p_viewport,RID p_scenario)=0; - virtual RID viewport_get_attached_camera(RID p_viewport) const=0; - virtual RID viewport_get_scenario(RID p_viewport) const=0; virtual void viewport_attach_canvas(RID p_viewport,RID p_canvas)=0; virtual void viewport_remove_canvas(RID p_viewport,RID p_canvas)=0; virtual void viewport_set_canvas_transform(RID p_viewport,RID p_canvas,const Matrix32& p_offset)=0; - virtual Matrix32 viewport_get_canvas_transform(RID p_viewport,RID p_canvas) const=0; virtual void viewport_set_transparent_background(RID p_viewport,bool p_enabled)=0; - virtual bool viewport_has_transparent_background(RID p_viewport) const=0; - virtual void viewport_set_global_canvas_transform(RID p_viewport,const Matrix32& p_transform)=0; - virtual Matrix32 viewport_get_global_canvas_transform(RID p_viewport) const=0; virtual void viewport_set_canvas_layer(RID p_viewport,RID p_canvas,int p_layer)=0; - /* ENVIRONMENT API */ virtual RID environment_create()=0; @@ -759,103 +473,48 @@ public: enum EnvironmentBG { ENV_BG_KEEP, - ENV_BG_DEFAULT_COLOR, ENV_BG_COLOR, - ENV_BG_TEXTURE, - ENV_BG_CUBEMAP, + ENV_BG_SKYBOX, ENV_BG_CANVAS, ENV_BG_MAX }; virtual void environment_set_background(RID p_env,EnvironmentBG p_bg)=0; - virtual EnvironmentBG environment_get_background(RID p_env) const=0; + virtual void environment_set_skybox(RID p_env,RID p_skybox,float p_energy=1.0)=0; + virtual void environment_set_bg_color(RID p_env,const Color& p_color)=0; + virtual void environment_set_canvas_max_layer(RID p_env,int p_max_layer)=0; + virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0)=0; - enum EnvironmentBGParam { + //set default SSAO options + //set default SSR options + //set default SSSSS options - ENV_BG_PARAM_CANVAS_MAX_LAYER, - ENV_BG_PARAM_COLOR, - ENV_BG_PARAM_TEXTURE, - ENV_BG_PARAM_CUBEMAP, - ENV_BG_PARAM_ENERGY, - ENV_BG_PARAM_SCALE, - ENV_BG_PARAM_GLOW, - ENV_BG_PARAM_MAX + enum EnvironmentGlowBlendMode { + GLOW_BLEND_MODE_ADDITIVE, + GLOW_BLEND_MODE_SCREEN, + GLOW_BLEND_MODE_SOFTLIGHT, + GLOW_BLEND_MODE_DISABLED, + }; + virtual void environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,EnvironmentGlowBlendMode p_blend_mode)=0; + virtual void environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture)=0; + + + enum EnvironmentToneMapper { + ENV_TONE_MAPPER_LINEAR, + ENV_TONE_MAPPER_REINHARDT, + ENV_TONE_MAPPER_FILMIC }; - - virtual void environment_set_background_param(RID p_env,EnvironmentBGParam p_param, const Variant& p_value)=0; - virtual Variant environment_get_background_param(RID p_env,EnvironmentBGParam p_param) const=0; - - enum EnvironmentFx { - ENV_FX_AMBIENT_LIGHT, - ENV_FX_FXAA, - ENV_FX_GLOW, - ENV_FX_DOF_BLUR, - ENV_FX_HDR, - ENV_FX_FOG, - ENV_FX_BCS, - ENV_FX_SRGB, - ENV_FX_MAX - }; - - - - virtual void environment_set_enable_fx(RID p_env,EnvironmentFx p_effect,bool p_enabled)=0; - virtual bool environment_is_fx_enabled(RID p_env,EnvironmentFx p_mode) const=0; - - enum EnvironmentFxBlurBlendMode { - ENV_FX_BLUR_BLEND_MODE_ADDITIVE, - ENV_FX_BLUR_BLEND_MODE_SCREEN, - ENV_FX_BLUR_BLEND_MODE_SOFTLIGHT, - }; - - enum EnvironmentFxHDRToneMapper { - ENV_FX_HDR_TONE_MAPPER_LINEAR, - ENV_FX_HDR_TONE_MAPPER_LOG, - ENV_FX_HDR_TONE_MAPPER_REINHARDT, - ENV_FX_HDR_TONE_MAPPER_REINHARDT_AUTOWHITE, - }; - - enum EnvironmentFxParam { - ENV_FX_PARAM_AMBIENT_LIGHT_COLOR, - ENV_FX_PARAM_AMBIENT_LIGHT_ENERGY, - ENV_FX_PARAM_GLOW_BLUR_PASSES, - ENV_FX_PARAM_GLOW_BLUR_SCALE, - ENV_FX_PARAM_GLOW_BLUR_STRENGTH, - ENV_FX_PARAM_GLOW_BLUR_BLEND_MODE, - ENV_FX_PARAM_GLOW_BLOOM, - ENV_FX_PARAM_GLOW_BLOOM_TRESHOLD, - ENV_FX_PARAM_DOF_BLUR_PASSES, - ENV_FX_PARAM_DOF_BLUR_BEGIN, - ENV_FX_PARAM_DOF_BLUR_RANGE, - ENV_FX_PARAM_HDR_TONEMAPPER, - ENV_FX_PARAM_HDR_EXPOSURE, - ENV_FX_PARAM_HDR_WHITE, - ENV_FX_PARAM_HDR_GLOW_TRESHOLD, - ENV_FX_PARAM_HDR_GLOW_SCALE, - ENV_FX_PARAM_HDR_MIN_LUMINANCE, - ENV_FX_PARAM_HDR_MAX_LUMINANCE, - ENV_FX_PARAM_HDR_EXPOSURE_ADJUST_SPEED, - ENV_FX_PARAM_FOG_BEGIN, - ENV_FX_PARAM_FOG_BEGIN_COLOR, - ENV_FX_PARAM_FOG_END_COLOR, - ENV_FX_PARAM_FOG_ATTENUATION, - ENV_FX_PARAM_FOG_BG, - ENV_FX_PARAM_BCS_BRIGHTNESS, - ENV_FX_PARAM_BCS_CONTRAST, - ENV_FX_PARAM_BCS_SATURATION, - ENV_FX_PARAM_MAX - }; - - virtual void environment_fx_set_param(RID p_env,EnvironmentFxParam p_effect,const Variant& p_param)=0; - virtual Variant environment_fx_get_param(RID p_env,EnvironmentFxParam p_effect) const=0; + virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,EnvironmentToneMapper p_tone_mapper)=0; + virtual void environment_set_brightness(RID p_env,bool p_enable,float p_brightness)=0; + virtual void environment_set_contrast(RID p_env,bool p_enable,float p_contrast)=0; + virtual void environment_set_saturation(RID p_env,bool p_enable,float p_saturation)=0; + virtual void environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp)=0; /* SCENARIO API */ - - virtual RID scenario_create()=0; enum ScenarioDebugMode { @@ -881,14 +540,14 @@ public: INSTANCE_MESH, INSTANCE_MULTIMESH, INSTANCE_IMMEDIATE, - INSTANCE_PARTICLES, INSTANCE_LIGHT, + INSTANCE_REFLECTION_PROBE, INSTANCE_ROOM, INSTANCE_PORTAL, - INSTANCE_BAKED_LIGHT, - INSTANCE_BAKED_LIGHT_SAMPLER, + /*INSTANCE_BAKED_LIGHT, + INSTANCE_BAKED_LIGHT_SAMPLER,*/ - INSTANCE_GEOMETRY_MASK=(1< instances_cull_aabb(const AABB& p_aabb, RID p_scenario=RID()) const=0; - virtual Vector instances_cull_ray(const Vector3& p_from, const Vector3& p_to, RID p_scenario=RID()) const=0; - virtual Vector instances_cull_convex(const Vector& p_convex, RID p_scenario=RID()) const=0; + virtual Vector instances_cull_aabb(const AABB& p_aabb, RID p_scenario=RID()) const=0; + virtual Vector instances_cull_ray(const Vector3& p_from, const Vector3& p_to, RID p_scenario=RID()) const=0; + virtual Vector instances_cull_convex(const Vector& p_convex, RID p_scenario=RID()) const=0; enum InstanceFlags { INSTANCE_FLAG_VISIBLE, INSTANCE_FLAG_BILLBOARD, INSTANCE_FLAG_BILLBOARD_FIX_Y, INSTANCE_FLAG_CAST_SHADOW, - INSTANCE_FLAG_RECEIVE_SHADOWS, INSTANCE_FLAG_DEPH_SCALE, INSTANCE_FLAG_VISIBLE_IN_ALL_ROOMS, INSTANCE_FLAG_USE_BAKED_LIGHT, @@ -958,80 +595,53 @@ public: }; virtual void instance_geometry_set_flag(RID p_instance,InstanceFlags p_flags,bool p_enabled)=0; - virtual bool instance_geometry_get_flag(RID p_instance,InstanceFlags p_flags) const=0; - virtual void instance_geometry_set_cast_shadows_setting(RID p_instance, ShadowCastingSetting p_shadow_casting_setting) = 0; - virtual ShadowCastingSetting instance_geometry_get_cast_shadows_setting(RID p_instance) const = 0; - virtual void instance_geometry_set_material_override(RID p_instance, RID p_material)=0; - virtual RID instance_geometry_get_material_override(RID p_instance) const=0; - - virtual void instance_geometry_set_draw_range(RID p_instance,float p_min,float p_max)=0; - virtual float instance_geometry_get_draw_range_max(RID p_instance) const=0; - virtual float instance_geometry_get_draw_range_min(RID p_instance) const=0; - - virtual void instance_geometry_set_baked_light(RID p_instance,RID p_baked_light)=0; - virtual RID instance_geometry_get_baked_light(RID p_instance) const=0; - - virtual void instance_geometry_set_baked_light_sampler(RID p_instance,RID p_baked_light_sampler)=0; - virtual RID instance_geometry_get_baked_light_sampler(RID p_instance) const=0; - - virtual void instance_geometry_set_baked_light_texture_index(RID p_instance,int p_tex_id)=0; - virtual int instance_geometry_get_baked_light_texture_index(RID p_instance) const=0; - virtual void instance_light_set_enabled(RID p_instance,bool p_enabled)=0; - virtual bool instance_light_is_enabled(RID p_instance) const=0; + virtual void instance_geometry_set_draw_range(RID p_instance,float p_min,float p_max,float p_min_margin,float p_max_margin)=0; + virtual void instance_geometry_set_as_instance_lod(RID p_instance,RID p_as_lod_of_instance)=0; /* CANVAS (2D) */ virtual RID canvas_create()=0; virtual void canvas_set_item_mirroring(RID p_canvas,RID p_item,const Point2& p_mirroring)=0; - virtual Point2 canvas_get_item_mirroring(RID p_canvas,RID p_item) const=0; virtual void canvas_set_modulate(RID p_canvas,const Color& p_color)=0; - virtual RID canvas_item_create()=0; virtual void canvas_item_set_parent(RID p_item,RID p_parent)=0; - virtual RID canvas_item_get_parent(RID p_canvas_item) const=0; virtual void canvas_item_set_visible(RID p_item,bool p_visible)=0; - virtual bool canvas_item_is_visible(RID p_item) const=0; - virtual void canvas_item_set_light_mask(RID p_item,int p_mask)=0; - virtual void canvas_item_set_blend_mode(RID p_canvas_item,MaterialBlendMode p_blend)=0; - - virtual void canvas_item_attach_viewport(RID p_item, RID p_viewport)=0; - - //virtual void canvas_item_set_rect(RID p_item, const Rect2& p_rect)=0; - virtual void canvas_item_set_transform(RID p_item, const Matrix32& p_transform)=0; virtual void canvas_item_set_clip(RID p_item, bool p_clip)=0; virtual void canvas_item_set_distance_field_mode(RID p_item, bool p_enable)=0; virtual void canvas_item_set_custom_rect(RID p_item, bool p_custom_rect,const Rect2& p_rect=Rect2())=0; - virtual void canvas_item_set_opacity(RID p_item, float p_opacity)=0; - virtual float canvas_item_get_opacity(RID p_item, float p_opacity) const=0; + virtual void canvas_item_set_modulate(RID p_item, const Color& p_color)=0; + virtual void canvas_item_set_self_modulate(RID p_item, const Color& p_color)=0; - virtual void canvas_item_set_self_opacity(RID p_item, float p_self_opacity)=0; - virtual float canvas_item_get_self_opacity(RID p_item, float p_self_opacity) const=0; + virtual void canvas_item_set_draw_behind_parent(RID p_item, bool p_enable)=0; - virtual void canvas_item_set_on_top(RID p_item, bool p_on_top)=0; - virtual bool canvas_item_is_on_top(RID p_item) const=0; + enum NinePatchAxisMode { + NINE_PATCH_STRETCH, + NINE_PATCH_TILE, + NINE_PATCH_TILE_FIT, + }; virtual void canvas_item_add_line(RID p_item, const Point2& p_from, const Point2& p_to,const Color& p_color,float p_width=1.0,bool p_antialiased=false)=0; virtual void canvas_item_add_rect(RID p_item, const Rect2& p_rect, const Color& p_color)=0; virtual void canvas_item_add_circle(RID p_item, const Point2& p_pos, float p_radius,const Color& p_color)=0; virtual void canvas_item_add_texture_rect(RID p_item, const Rect2& p_rect, RID p_texture,bool p_tile=false,const Color& p_modulate=Color(1,1,1),bool p_transpose=false)=0; virtual void canvas_item_add_texture_rect_region(RID p_item, const Rect2& p_rect, RID p_texture,const Rect2& p_src_rect,const Color& p_modulate=Color(1,1,1),bool p_transpose=false)=0; - virtual void canvas_item_add_style_box(RID p_item, const Rect2& p_rect, const Rect2& p_source, RID p_texture,const Vector2& p_topleft, const Vector2& p_bottomright, bool p_draw_center=true,const Color& p_modulate=Color(1,1,1))=0; + virtual void canvas_item_add_nine_patch(RID p_item, const Rect2& p_rect, const Rect2& p_source, RID p_texture,const Vector2& p_topleft, const Vector2& p_bottomright,NinePatchAxisMode p_x_axis_mode=NINE_PATCH_STRETCH, NinePatchAxisMode p_y_axis_mode=NINE_PATCH_STRETCH,bool p_draw_center=true,const Color& p_modulate=Color(1,1,1))=0; virtual void canvas_item_add_primitive(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture,float p_width=1.0)=0; virtual void canvas_item_add_polygon(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs=Vector(), RID p_texture=RID())=0; virtual void canvas_item_add_triangle_array(RID p_item, const Vector& p_indices, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs=Vector(), RID p_texture=RID(), int p_count=-1)=0; - virtual void canvas_item_add_triangle_array_ptr(RID p_item, int p_count, const int* p_indices, const Point2* p_points, const Color* p_colors,const Point2* p_uvs=NULL, RID p_texture=RID())=0; + virtual void canvas_item_add_mesh(RID p_item, const RID& p_mesh,RID p_skeleton=RID())=0; + virtual void canvas_item_add_multimesh(RID p_item, RID p_mesh,RID p_skeleton=RID())=0; virtual void canvas_item_add_set_transform(RID p_item,const Matrix32& p_transform)=0; - virtual void canvas_item_add_set_blend_mode(RID p_item, MaterialBlendMode p_blend)=0; virtual void canvas_item_add_clip_ignore(RID p_item, bool p_ignore)=0; virtual void canvas_item_set_sort_children_by_y(RID p_item, bool p_enable)=0; virtual void canvas_item_set_z(RID p_item, int p_z)=0; @@ -1039,7 +649,7 @@ public: virtual void canvas_item_set_copy_to_backbuffer(RID p_item, bool p_enable,const Rect2& p_rect)=0; virtual void canvas_item_clear(RID p_item)=0; - virtual void canvas_item_raise(RID p_item)=0; + virtual void canvas_item_set_draw_index(RID p_item,int p_index)=0; virtual void canvas_item_set_material(RID p_item, RID p_material)=0; @@ -1057,8 +667,8 @@ public: virtual void canvas_light_set_energy(RID p_light, float p_energy)=0; virtual void canvas_light_set_z_range(RID p_light, int p_min_z,int p_max_z)=0; virtual void canvas_light_set_layer_range(RID p_light, int p_min_layer,int p_max_layer)=0; - virtual void canvas_light_set_item_mask(RID p_light, int p_mask)=0; - virtual void canvas_light_set_item_shadow_mask(RID p_light, int p_mask)=0; + virtual void canvas_light_set_item_cull_mask(RID p_light, int p_mask)=0; + virtual void canvas_light_set_item_shadow_cull_mask(RID p_light, int p_mask)=0; enum CanvasLightMode { CANVAS_LIGHT_MODE_ADD, @@ -1068,9 +678,20 @@ public: }; virtual void canvas_light_set_mode(RID p_light, CanvasLightMode p_mode)=0; + + + enum CanvasLightShadowFilter { + CANVAS_LIGHT_FILTER_NONE, + CANVAS_LIGHT_FILTER_PCF3, + CANVAS_LIGHT_FILTER_PCF5, + CANVAS_LIGHT_FILTER_PCF9, + CANVAS_LIGHT_FILTER_PCF13, + }; + virtual void canvas_light_set_shadow_enabled(RID p_light, bool p_enabled)=0; virtual void canvas_light_set_shadow_buffer_size(RID p_light, int p_size)=0; - virtual void canvas_light_set_shadow_esm_multiplier(RID p_light, float p_multiplier)=0; + virtual void canvas_light_set_shadow_gradient_length(RID p_light, float p_length)=0; + virtual void canvas_light_set_shadow_filter(RID p_light, CanvasLightShadowFilter p_filter)=0; virtual void canvas_light_set_shadow_color(RID p_light, const Color& p_color)=0; @@ -1085,6 +706,7 @@ public: virtual RID canvas_occluder_polygon_create()=0; virtual void canvas_occluder_polygon_set_shape(RID p_occluder_polygon,const DVector& p_shape,bool p_closed)=0; virtual void canvas_occluder_polygon_set_shape_as_lines(RID p_occluder_polygon,const DVector& p_shape)=0; + enum CanvasOccluderPolygonCullMode { CANVAS_OCCLUDER_POLYGON_CULL_DISABLED, CANVAS_OCCLUDER_POLYGON_CULL_CLOCKWISE, @@ -1092,21 +714,6 @@ public: }; virtual void canvas_occluder_polygon_set_cull_mode(RID p_occluder_polygon,CanvasOccluderPolygonCullMode p_mode)=0; - /* CANVAS ITEM MATERIAL */ - - virtual RID canvas_item_material_create()=0; - virtual void canvas_item_material_set_shader(RID p_material, RID p_shader)=0; - virtual void canvas_item_material_set_shader_param(RID p_material, const StringName& p_param, const Variant& p_value)=0; - virtual Variant canvas_item_material_get_shader_param(RID p_material, const StringName& p_param) const=0; - - - enum CanvasItemShadingMode { - CANVAS_ITEM_SHADING_NORMAL, - CANVAS_ITEM_SHADING_UNSHADED, - CANVAS_ITEM_SHADING_ONLY_LIGHT, - }; - - virtual void canvas_item_material_set_shading_mode(RID p_material, CanvasItemShadingMode p_mode)=0; /* CURSOR */ virtual void cursor_set_rotation(float p_rotation, int p_cursor = 0)=0; // radians @@ -1125,15 +732,6 @@ public: virtual void free( RID p_rid )=0; ///< free RIDs associated with the visual server - /* CUSTOM SHADING */ - - virtual void custom_shade_model_set_shader(int p_model, RID p_shader)=0; - virtual RID custom_shade_model_get_shader(int p_model) const=0; - virtual void custom_shade_model_set_name(int p_model, const String& p_name)=0; - virtual String custom_shade_model_get_name(int p_model) const=0; - virtual void custom_shade_model_set_param_info(int p_model, const List& p_info)=0; - virtual void custom_shade_model_get_param_info(int p_model, List* p_info) const=0; - /* EVENT QUEUING */ virtual void draw()=0; @@ -1166,7 +764,6 @@ public: RID material_2d_get(bool p_shaded, bool p_transparent, bool p_cut_alpha,bool p_opaque_prepass); - /* TESTING */ virtual RID get_test_cube()=0; @@ -1185,7 +782,6 @@ public: enum Features { FEATURE_SHADERS, FEATURE_MULTITHREADED, - FEATURE_NEEDS_RELOAD_HOOK, }; virtual bool has_feature(Features p_feature) const=0; @@ -1200,19 +796,15 @@ public: VARIANT_ENUM_CAST( VisualServer::CubeMapSide ); VARIANT_ENUM_CAST( VisualServer::TextureFlags ); VARIANT_ENUM_CAST( VisualServer::ShaderMode ); -VARIANT_ENUM_CAST( VisualServer::MaterialFlag ); -VARIANT_ENUM_CAST( VisualServer::MaterialBlendMode ); -VARIANT_ENUM_CAST( VisualServer::ParticleVariable ); VARIANT_ENUM_CAST( VisualServer::ArrayType ); VARIANT_ENUM_CAST( VisualServer::ArrayFormat ); VARIANT_ENUM_CAST( VisualServer::PrimitiveType ); VARIANT_ENUM_CAST( VisualServer::LightType ); -VARIANT_ENUM_CAST( VisualServer::LightColor ); VARIANT_ENUM_CAST( VisualServer::LightParam ); VARIANT_ENUM_CAST( VisualServer::ScenarioDebugMode ); VARIANT_ENUM_CAST( VisualServer::InstanceType ); VARIANT_ENUM_CAST( VisualServer::RenderInfo ); -VARIANT_ENUM_CAST( VisualServer::MipMapPolicy ); + //typedef VisualServer VS; // makes it easier to use #define VS VisualServer diff --git a/tools/editor/code_editor.cpp b/tools/editor/code_editor.cpp index 9240e3527ca..6fb796ae6ab 100644 --- a/tools/editor/code_editor.cpp +++ b/tools/editor/code_editor.cpp @@ -895,7 +895,7 @@ void FindReplaceDialog::_bind_methods() { FindReplaceDialog::FindReplaceDialog() { - set_self_opacity(0.8); + set_self_modulate(Color(1,1,1,0.8)); VBoxContainer *vb = memnew( VBoxContainer ); add_child(vb); diff --git a/tools/editor/editor_autoload_settings.cpp b/tools/editor/editor_autoload_settings.cpp index 97062b1480e..bf8a891247d 100644 --- a/tools/editor/editor_autoload_settings.cpp +++ b/tools/editor/editor_autoload_settings.cpp @@ -399,7 +399,7 @@ Variant EditorAutoloadSettings::get_drag_data_fw(const Point2& p_point, Control for (int i = 0; i < max_size; i++) { Label *label = memnew( Label(autoloads[i]) ); - label->set_self_opacity(Math::lerp(1, 0, float(i)/PREVIEW_LIST_MAX_SIZE)); + label->set_self_modulate(Color(1,1,1,Math::lerp(1, 0, float(i)/PREVIEW_LIST_MAX_SIZE))); preview->add_child(label); } diff --git a/tools/editor/editor_help.cpp b/tools/editor/editor_help.cpp index 4f83dc2f662..628870c70f0 100644 --- a/tools/editor/editor_help.cpp +++ b/tools/editor/editor_help.cpp @@ -1710,7 +1710,7 @@ EditorHelp::EditorHelp() { search_dialog->get_ok()->set_text(TTR("Find")); search_dialog->connect("confirmed",this,"_search_cbk"); search_dialog->set_hide_on_ok(false); - search_dialog->set_self_opacity(0.8); + search_dialog->set_self_modulate(Color(1,1,1,0.8)); /*class_search = memnew( EditorHelpSearch(editor) ); diff --git a/tools/editor/editor_node.cpp b/tools/editor/editor_node.cpp index 8274272a7ea..7f026241074 100644 --- a/tools/editor/editor_node.cpp +++ b/tools/editor/editor_node.cpp @@ -907,10 +907,11 @@ void EditorNode::_save_scene_with_preview(String p_file) { _editor_select(is2d?EDITOR_2D:EDITOR_3D); - VS::get_singleton()->viewport_queue_screen_capture(viewport); + save.step(TTR("Creating Thumbnail"),2); save.step(TTR("Creating Thumbnail"),3); - Image img = VS::get_singleton()->viewport_get_screen_capture(viewport); +#if 0 + Image img = VS::get_singleton()->viewport_texture(scree_capture(viewport); int preview_size = EditorSettings::get_singleton()->get("file_dialog/thumbnail_size");; preview_size*=EDSCALE; int width,height; @@ -928,7 +929,7 @@ void EditorNode::_save_scene_with_preview(String p_file) { height=img.get_height(); } - img.convert(Image::FORMAT_RGB); + img.convert(Image::FORMAT_RGB8); img.resize(width,height); String pfile = EditorSettings::get_singleton()->get_settings_path().plus_file("tmp/last_scene_preview.png"); @@ -940,7 +941,7 @@ void EditorNode::_save_scene_with_preview(String p_file) { if (editor_data.get_edited_scene_import_metadata().is_null()) editor_data.set_edited_scene_import_metadata(Ref( memnew( ResourceImportMetadata ) ) ); editor_data.get_edited_scene_import_metadata()->set_option("thumbnail",imgdata); - +#endif //tamanio tel thumbnail if (screen!=-1) { _editor_select(screen); @@ -1243,7 +1244,7 @@ void EditorNode::_dialog_action(String p_file) { ml = Ref( memnew( MeshLibrary )); } - MeshLibraryEditor::update_library_file(editor_data.get_edited_scene_root(),ml,true); +// MeshLibraryEditor::update_library_file(editor_data.get_edited_scene_root(),ml,true); Error err = ResourceSaver::save(p_file,ml); if (err) { @@ -4145,7 +4146,7 @@ void EditorNode::register_editor_types() { ObjectTypeDB::register_type(); //ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); +// ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); @@ -6537,10 +6538,10 @@ EditorNode::EditorNode() { //more visually meaningful to have this later raise_bottom_panel_item(AnimationPlayerEditor::singleton); - add_editor_plugin( memnew( ShaderGraphEditorPlugin(this,true) ) ); +/* add_editor_plugin( memnew( ShaderGraphEditorPlugin(this,true) ) ); add_editor_plugin( memnew( ShaderGraphEditorPlugin(this,false) ) ); add_editor_plugin( memnew( ShaderEditorPlugin(this,true) ) ); - add_editor_plugin( memnew( ShaderEditorPlugin(this,false) ) ); + add_editor_plugin( memnew( ShaderEditorPlugin(this,false) ) );*/ add_editor_plugin( memnew( CameraEditorPlugin(this) ) ); add_editor_plugin( memnew( SampleEditorPlugin(this) ) ); add_editor_plugin( memnew( SampleLibraryEditorPlugin(this) ) ); @@ -6549,14 +6550,14 @@ EditorNode::EditorNode() { add_editor_plugin( memnew( MeshInstanceEditorPlugin(this) ) ); add_editor_plugin( memnew( AnimationTreeEditorPlugin(this) ) ); //add_editor_plugin( memnew( SamplePlayerEditorPlugin(this) ) ); - this is kind of useless at this point - add_editor_plugin( memnew( MeshLibraryEditorPlugin(this) ) ); +// add_editor_plugin( memnew( MeshLibraryEditorPlugin(this) ) ); //add_editor_plugin( memnew( StreamEditorPlugin(this) ) ); add_editor_plugin( memnew( StyleBoxEditorPlugin(this) ) ); - add_editor_plugin( memnew( ParticlesEditorPlugin(this) ) ); + //add_editor_plugin( memnew( ParticlesEditorPlugin(this) ) ); add_editor_plugin( memnew( ResourcePreloaderEditorPlugin(this) ) ); add_editor_plugin( memnew( ItemListEditorPlugin(this) ) ); //add_editor_plugin( memnew( RichTextEditorPlugin(this) ) ); - add_editor_plugin( memnew( CollisionPolygonEditorPlugin(this) ) ); +// add_editor_plugin( memnew( CollisionPolygonEditorPlugin(this) ) ); add_editor_plugin( memnew( CollisionPolygon2DEditorPlugin(this) ) ); add_editor_plugin( memnew( TileSetEditorPlugin(this) ) ); add_editor_plugin( memnew( TileMapEditorPlugin(this) ) ); @@ -6564,16 +6565,16 @@ EditorNode::EditorNode() { add_editor_plugin( memnew( TextureRegionEditorPlugin(this) ) ); add_editor_plugin( memnew( Particles2DEditorPlugin(this) ) ); add_editor_plugin( memnew( Path2DEditorPlugin(this) ) ); - add_editor_plugin( memnew( PathEditorPlugin(this) ) ); - add_editor_plugin( memnew( BakedLightEditorPlugin(this) ) ); +// add_editor_plugin( memnew( PathEditorPlugin(this) ) ); + //add_editor_plugin( memnew( BakedLightEditorPlugin(this) ) ); add_editor_plugin( memnew( Polygon2DEditorPlugin(this) ) ); add_editor_plugin( memnew( LightOccluder2DEditorPlugin(this) ) ); add_editor_plugin( memnew( NavigationPolygonEditorPlugin(this) ) ); add_editor_plugin( memnew( ColorRampEditorPlugin(this) ) ); add_editor_plugin( memnew( CollisionShape2DEditorPlugin(this) ) ); add_editor_plugin( memnew( TextureEditorPlugin(this) ) ); - add_editor_plugin( memnew( MaterialEditorPlugin(this) ) ); - add_editor_plugin( memnew( MeshEditorPlugin(this) ) ); +// add_editor_plugin( memnew( MaterialEditorPlugin(this) ) ); +// add_editor_plugin( memnew( MeshEditorPlugin(this) ) ); for(int i=0;iadd_preview_generator( Ref( memnew(EditorTexturePreviewPlugin ))); + /*resource_preview->add_preview_generator( Ref( memnew(EditorTexturePreviewPlugin ))); resource_preview->add_preview_generator( Ref( memnew(EditorPackedScenePreviewPlugin ))); resource_preview->add_preview_generator( Ref( memnew(EditorMaterialPreviewPlugin ))); resource_preview->add_preview_generator( Ref( memnew(EditorScriptPreviewPlugin ))); resource_preview->add_preview_generator( Ref( memnew(EditorSamplePreviewPlugin ))); resource_preview->add_preview_generator( Ref( memnew(EditorMeshPreviewPlugin ))); resource_preview->add_preview_generator( Ref( memnew(EditorBitmapPreviewPlugin ))); - +*/ circle_step_msec=OS::get_singleton()->get_ticks_msec(); diff --git a/tools/editor/editor_profiler.cpp b/tools/editor/editor_profiler.cpp index 13327f0be9e..435ad78b89a 100644 --- a/tools/editor/editor_profiler.cpp +++ b/tools/editor/editor_profiler.cpp @@ -338,7 +338,7 @@ void EditorProfiler::_update_plot() { wr = DVector::Write(); - Image img(w,h,0,Image::FORMAT_RGBA,graph_image); + Image img(w,h,0,Image::FORMAT_RGBA8,graph_image); if (reset_texture) { diff --git a/tools/editor/icons/SCsub b/tools/editor/icons/SCsub index 44e28f49e60..e9054f30d94 100644 --- a/tools/editor/icons/SCsub +++ b/tools/editor/icons/SCsub @@ -63,7 +63,7 @@ def make_editor_icons_action(target, source, env): s.write("\tRef texture( memnew( ImageTexture ) );\n") s.write("\tbool use_hidpi_image=(editor_get_scale()>1.0&&p_hidpi_png);\n") s.write("\tImage img(use_hidpi_image?p_hidpi_png:p_png);\n") - s.write("\tif (editor_get_scale()>1.0 && !p_hidpi_png) { img.convert(Image::FORMAT_RGBA); img.expand_x2_hq2x(); use_hidpi_image=true;}\n") + s.write("\tif (editor_get_scale()>1.0 && !p_hidpi_png) { img.convert(Image::FORMAT_RGBA8); img.expand_x2_hq2x(); use_hidpi_image=true;}\n") s.write("\timg.resize(img.get_width()*EDSCALE/(use_hidpi_image?2:1),img.get_height()*EDSCALE/(use_hidpi_image?2:1));\n") s.write("\ttexture->create_from_image( img,ImageTexture::FLAG_FILTER );\n") s.write("\treturn texture;\n") diff --git a/tools/editor/io_plugins/editor_font_import_plugin.cpp b/tools/editor/io_plugins/editor_font_import_plugin.cpp index df3741f0d45..0a3b0d49175 100644 --- a/tools/editor/io_plugins/editor_font_import_plugin.cpp +++ b/tools/editor/io_plugins/editor_font_import_plugin.cpp @@ -1330,7 +1330,7 @@ Ref EditorFontImportPlugin::generate_font(const Ref EditorFontImportPlugin::generate_font(const Ref::Write(); - Image img(s.width,s.height,0,Image::FORMAT_RGBA,pixels); + Image img(s.width,s.height,0,Image::FORMAT_RGBA8,pixels); font_data_list[i]->blit=img; font_data_list[i]->blit_ofs=o; @@ -1537,7 +1537,7 @@ Ref EditorFontImportPlugin::generate_font(const Ref EditorFontImportPlugin::generate_font(const Refhas_option("color/monochrome") && bool(from->get_option("color/monochrome"))) { - atlas.convert(Image::FORMAT_GRAYSCALE_ALPHA); + atlas.convert(Image::FORMAT_LA8); } diff --git a/tools/editor/io_plugins/editor_import_collada.cpp b/tools/editor/io_plugins/editor_import_collada.cpp index 1130e2be308..6b1873d49a6 100644 --- a/tools/editor/io_plugins/editor_import_collada.cpp +++ b/tools/editor/io_plugins/editor_import_collada.cpp @@ -237,8 +237,8 @@ Error ColladaImport::_create_scene(Collada::Node *p_node, Spatial *p_parent) { //well, it's an ambient light.. Light *l = memnew( DirectionalLight ); // l->set_color(Light::COLOR_AMBIENT,ld.color); - l->set_color(Light::COLOR_DIFFUSE,Color(0,0,0)); - l->set_color(Light::COLOR_SPECULAR,Color(0,0,0)); +// l->set_color(Light::COLOR_DIFFUSE,Color(0,0,0)); +// l->set_color(Light::COLOR_SPECULAR,Color(0,0,0)); node = l; } else if (ld.mode==Collada::LightData::MODE_DIRECTIONAL) { @@ -248,8 +248,8 @@ Error ColladaImport::_create_scene(Collada::Node *p_node, Spatial *p_parent) { //if (found_ambient) //use it here // l->set_color(Light::COLOR_AMBIENT,ambient); - l->set_color(Light::COLOR_DIFFUSE,ld.color); - l->set_color(Light::COLOR_SPECULAR,Color(1,1,1)); +// l->set_color(Light::COLOR_DIFFUSE,ld.color); +// l->set_color(Light::COLOR_SPECULAR,Color(1,1,1)); node = l; } else { @@ -259,14 +259,14 @@ Error ColladaImport::_create_scene(Collada::Node *p_node, Spatial *p_parent) { l=memnew( OmniLight ); else { l=memnew( SpotLight ); - l->set_parameter(Light::PARAM_SPOT_ANGLE,ld.spot_angle); - l->set_parameter(Light::PARAM_SPOT_ATTENUATION,ld.spot_exp); +// l->set_parameter(Light::PARAM_SPOT_ANGLE,ld.spot_angle); +// l->set_parameter(Light::PARAM_SPOT_ATTENUATION,ld.spot_exp); } // - l->set_color(Light::COLOR_DIFFUSE,ld.color); - l->set_color(Light::COLOR_SPECULAR,Color(1,1,1)); - l->approximate_opengl_attenuation(ld.constant_att,ld.linear_att,ld.quad_att); +// l->set_color(Light::COLOR_DIFFUSE,ld.color); +// l->set_color(Light::COLOR_SPECULAR,Color(1,1,1)); +// l->approximate_opengl_attenuation(ld.constant_att,ld.linear_att,ld.quad_att); node=l; } @@ -394,14 +394,14 @@ Error ColladaImport::_create_material(const String& p_target) { Ref texture = ResourceLoader::load(texfile,"Texture"); if (texture.is_valid()) { - material->set_texture(FixedMaterial::PARAM_DIFFUSE,texture); - material->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1,1)); +// material->set_texture(FixedMaterial::PARAM_DIFFUSE,texture); +// material->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1,1)); } else { missing_textures.push_back(texfile.get_file()); } } } else { - material->set_parameter(FixedMaterial::PARAM_DIFFUSE,effect.diffuse.color); +// material->set_parameter(FixedMaterial::PARAM_DIFFUSE,effect.diffuse.color); } // SPECULAR @@ -414,15 +414,15 @@ Error ColladaImport::_create_material(const String& p_target) { Ref texture = ResourceLoader::load(texfile,"Texture"); if (texture.is_valid()) { - material->set_texture(FixedMaterial::PARAM_SPECULAR,texture); - material->set_parameter(FixedMaterial::PARAM_SPECULAR,Color(1,1,1,1)); +// material->set_texture(FixedMaterial::PARAM_SPECULAR,texture); +// material->set_parameter(FixedMaterial::PARAM_SPECULAR,Color(1,1,1,1)); } else { missing_textures.push_back(texfile.get_file()); } } } else { - material->set_parameter(FixedMaterial::PARAM_SPECULAR,effect.specular.color); +// material->set_parameter(FixedMaterial::PARAM_SPECULAR,effect.specular.color); } // EMISSION @@ -435,15 +435,15 @@ Error ColladaImport::_create_material(const String& p_target) { Ref texture = ResourceLoader::load(texfile,"Texture"); if (texture.is_valid()) { - material->set_texture(FixedMaterial::PARAM_EMISSION,texture); - material->set_parameter(FixedMaterial::PARAM_EMISSION,Color(1,1,1,1)); +// material->set_texture(FixedMaterial::PARAM_EMISSION,texture); +// material->set_parameter(FixedMaterial::PARAM_EMISSION,Color(1,1,1,1)); }else { - missing_textures.push_back(texfile.get_file()); +// missing_textures.push_back(texfile.get_file()); } } } else { - material->set_parameter(FixedMaterial::PARAM_EMISSION,effect.emission.color); +// material->set_parameter(FixedMaterial::PARAM_EMISSION,effect.emission.color); } // NORMAL @@ -456,18 +456,18 @@ Error ColladaImport::_create_material(const String& p_target) { Ref texture = ResourceLoader::load(texfile,"Texture"); if (texture.is_valid()) { - material->set_texture(FixedMaterial::PARAM_NORMAL,texture); + // material->set_texture(FixedMaterial::PARAM_NORMAL,texture); }else { - missing_textures.push_back(texfile.get_file()); +// missing_textures.push_back(texfile.get_file()); } } } - material->set_parameter(FixedMaterial::PARAM_SPECULAR_EXP,effect.shininess); - material->set_flag(Material::FLAG_DOUBLE_SIDED,effect.double_sided); - material->set_flag(Material::FLAG_UNSHADED,effect.unshaded); +// material->set_parameter(FixedMaterial::PARAM_SPECULAR_EXP,effect.shininess); +// material->set_flag(Material::FLAG_DOUBLE_SIDED,effect.double_sided); +// material->set_flag(Material::FLAG_UNSHADED,effect.unshaded); @@ -1196,7 +1196,7 @@ Error ColladaImport::_create_mesh_surfaces(bool p_optimize,Ref& p_mesh,con tarrayw = DVector::Write(); final_tangent_array=tarray; - } else if (final_normal_array.size() && primitive==Mesh::PRIMITIVE_TRIANGLES && final_uv_array.size() && (force_make_tangents || (material.is_valid() && material->get_texture(FixedMaterial::PARAM_NORMAL).is_valid()))){ + } else if (final_normal_array.size() && primitive==Mesh::PRIMITIVE_TRIANGLES && final_uv_array.size() && (force_make_tangents || (material.is_valid()))){ //if this uses triangles, there are uvs and the material is using a normalmap, generate tangents and binormals, because they WILL be needed //generate binormals/tangents _generate_tangents_and_binormals(index_array,final_vertex_array,final_uv_array,final_normal_array,final_tangent_array); diff --git a/tools/editor/io_plugins/editor_scene_import_plugin.cpp b/tools/editor/io_plugins/editor_scene_import_plugin.cpp index 190b56faba1..9d68807c461 100644 --- a/tools/editor/io_plugins/editor_scene_import_plugin.cpp +++ b/tools/editor/io_plugins/editor_scene_import_plugin.cpp @@ -1406,9 +1406,9 @@ void EditorSceneImportPlugin::_find_resources(const Variant& p_var, Mapcast_to()->set_fixed_flag(FixedMaterial::FLAG_USE_XY_NORMALMAP,true); - } + //if (p_flags&SCENE_FLAG_CONVERT_NORMALMAPS_TO_XY) + // res->cast_to()->set_fixed_flag(FixedMaterial::FLAG_USE_XY_NORMALMAP,true); + }// } else { @@ -1516,10 +1516,10 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map Ref fm = m->surface_get_material(i); if (fm.is_valid()) { - fm->set_flag(Material::FLAG_UNSHADED,true); - fm->set_flag(Material::FLAG_DOUBLE_SIDED,true); - fm->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - fm->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); + // fm->set_flag(Material::FLAG_UNSHADED,true); + // fm->set_flag(Material::FLAG_DOUBLE_SIDED,true); + // fm->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); + // fm->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); } } } @@ -1543,17 +1543,17 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map if (p_flags&SCENE_FLAG_DETECT_ALPHA && _teststr(mat->get_name(),"alpha")) { - mat->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); - mat->set_name(_fixstr(mat->get_name(),"alpha")); + // mat->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); + // mat->set_name(_fixstr(mat->get_name(),"alpha")); } if (p_flags&SCENE_FLAG_DETECT_VCOLOR && _teststr(mat->get_name(),"vcol")) { - mat->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY,true); - mat->set_name(_fixstr(mat->get_name(),"vcol")); + //mat->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY,true); + //mat->set_name(_fixstr(mat->get_name(),"vcol")); } if (p_flags&SCENE_FLAG_SET_LIGHTMAP_TO_UV2_IF_EXISTS && m->surface_get_format(i)&Mesh::ARRAY_FORMAT_TEX_UV2) { - mat->set_flag(Material::FLAG_LIGHTMAP_ON_UV2,true); + //mat->set_flag(Material::FLAG_LIGHTMAP_ON_UV2,true); } } @@ -1612,11 +1612,11 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map float dist = d.to_double(); mi->set_flag(GeometryInstance::FLAG_BILLBOARD,true); mi->set_flag(GeometryInstance::FLAG_BILLBOARD_FIX_Y,true); - mi->set_draw_range_begin(dist); - mi->set_draw_range_end(100000); + //mi->set_draw_range_begin(dist); + //mi->set_draw_range_end(100000); - mip->set_draw_range_begin(0); - mip->set_draw_range_end(dist); + //mip->set_draw_range_begin(0); + //mip->set_draw_range_end(dist); if (mi->get_mesh().is_valid()) { @@ -1625,10 +1625,10 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map Ref fm = m->surface_get_material(i); if (fm.is_valid()) { - fm->set_flag(Material::FLAG_UNSHADED,true); - fm->set_flag(Material::FLAG_DOUBLE_SIDED,true); - fm->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - fm->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); + // fm->set_flag(Material::FLAG_UNSHADED,true); + // fm->set_flag(Material::FLAG_DOUBLE_SIDED,true); + // fm->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); + // fm->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); } } } @@ -1660,11 +1660,11 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map d=d.substr(1,d.length()); if (d.length() && d[0]>='0' && d[0]<='9') { float dist = d.to_double(); - mi->set_draw_range_begin(dist); - mi->set_draw_range_end(100000); + /// mi->set_draw_range_begin(dist); + // mi->set_draw_range_end(100000); - mip->set_draw_range_begin(0); - mip->set_draw_range_end(dist); + // mip->set_draw_range_begin(0); + // mip->set_draw_range_end(dist); /*if (mi->get_mesh().is_valid()) { @@ -1888,8 +1888,8 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map BSP_Tree bsptree(faces); Ref area = memnew( RoomBounds ); - area->set_bounds(faces); - area->set_geometry_hint(faces); + //area->set_bounds(faces); + //area->set_geometry_hint(faces); Room * room = memnew( Room ); @@ -1917,7 +1917,7 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map memdelete(p_node); p_node=room; - room->compute_room_from_subtree(); + //room->compute_room_from_subtree(); } else if (p_flags&SCENE_FLAG_CREATE_PORTALS &&_teststr(name,"portal") && p_node->cast_to()) { @@ -2047,7 +2047,7 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map Ref fm = mesh->surface_get_material(i); if (fm.is_valid()) { String name = fm->get_name(); - if (_teststr(name,"alpha")) { + /* if (_teststr(name,"alpha")) { fm->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); name=_fixstr(name,"alpha"); } @@ -2055,7 +2055,7 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map if (_teststr(name,"vcol")) { fm->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY,true); name=_fixstr(name,"vcol"); - } + }*/ fm->set_name(name); } } diff --git a/tools/editor/io_plugins/editor_scene_importer_fbxconv.cpp b/tools/editor/io_plugins/editor_scene_importer_fbxconv.cpp index ac3c4637c22..f117182365f 100644 --- a/tools/editor/io_plugins/editor_scene_importer_fbxconv.cpp +++ b/tools/editor/io_plugins/editor_scene_importer_fbxconv.cpp @@ -33,7 +33,7 @@ #include "scene/3d/mesh_instance.h" #include "scene/animation/animation_player.h" - +#if 0 String EditorSceneImporterFBXConv::_id(const String& p_id) const { return p_id.replace(":","_").replace("/","_"); @@ -1132,3 +1132,4 @@ EditorSceneImporterFBXConv::EditorSceneImporterFBXConv() { #endif } +#endif diff --git a/tools/editor/io_plugins/editor_scene_importer_fbxconv.h b/tools/editor/io_plugins/editor_scene_importer_fbxconv.h index b0cbc07ba38..e23c0e2faac 100644 --- a/tools/editor/io_plugins/editor_scene_importer_fbxconv.h +++ b/tools/editor/io_plugins/editor_scene_importer_fbxconv.h @@ -32,6 +32,7 @@ #include "tools/editor/io_plugins/editor_scene_import_plugin.h" #include "scene/3d/skeleton.h" +#if 0 class EditorSceneImporterFBXConv : public EditorSceneImporter { @@ -107,3 +108,4 @@ public: }; #endif // EDITOR_SCENE_IMPORTER_FBXCONV_H +#endif diff --git a/tools/editor/io_plugins/editor_texture_import_plugin.cpp b/tools/editor/io_plugins/editor_texture_import_plugin.cpp index 2935ea8fe24..4397956be79 100644 --- a/tools/editor/io_plugins/editor_texture_import_plugin.cpp +++ b/tools/editor/io_plugins/editor_texture_import_plugin.cpp @@ -843,33 +843,27 @@ void EditorTextureImportPlugin::compress_image(EditorExportPlatform::ImageCompre //do absolutely nothing - } break; - case EditorExportPlatform::IMAGE_COMPRESSION_INDEXED: { - - //quantize - image.quantize(); - - } break; + } break; case EditorExportPlatform::IMAGE_COMPRESSION_BC: { // for maximum compatibility, BC shall always use mipmaps and be PO2 image.resize_to_po2(); - if (image.get_mipmaps()==0) + if (!image.has_mipmaps()) image.generate_mipmaps(); - image.compress(Image::COMPRESS_BC); + image.compress(Image::COMPRESS_S3TC); /* if (has_alpha) { if (flags&IMAGE_FLAG_ALPHA_BIT) { - image.convert(Image::FORMAT_BC3); + image.convert(Image::FORMAT_DXT5); } else { - image.convert(Image::FORMAT_BC2); + image.convert(Image::FORMAT_DXT3); } } else { - image.convert(Image::FORMAT_BC1); + image.convert(Image::FORMAT_DXT1); }*/ @@ -880,24 +874,24 @@ void EditorTextureImportPlugin::compress_image(EditorExportPlatform::ImageCompre // for maximum compatibility (hi apple!), PVRT shall always // use mipmaps, be PO2 and square - if (image.get_mipmaps()==0) + if (!image.has_mipmaps()) image.generate_mipmaps(); image.resize_to_po2(true); if (p_smaller) { image.compress(Image::COMPRESS_PVRTC2); - //image.convert(has_alpha ? Image::FORMAT_PVRTC2_ALPHA : Image::FORMAT_PVRTC2); + //image.convert(has_alpha ? Image::FORMAT_PVRTC2A : Image::FORMAT_PVRTC2); } else { image.compress(Image::COMPRESS_PVRTC4); - //image.convert(has_alpha ? Image::FORMAT_PVRTC4_ALPHA : Image::FORMAT_PVRTC4); + //image.convert(has_alpha ? Image::FORMAT_PVRTC4A : Image::FORMAT_PVRTC4); } } break; case EditorExportPlatform::IMAGE_COMPRESSION_ETC1: { image.resize_to_po2(); //square or not? - if (image.get_mipmaps()==0) + if (!image.has_mipmaps()) image.generate_mipmaps(); if (!image.detect_alpha()) { //ETC1 is only opaque @@ -930,18 +924,18 @@ Error EditorTextureImportPlugin::_process_texture_data(Ref &textur ERR_FAIL_COND_V(image.empty(),ERR_INVALID_DATA); bool has_alpha=image.detect_alpha(); - if (!has_alpha && image.get_format()==Image::FORMAT_RGBA) { + if (!has_alpha && image.get_format()==Image::FORMAT_RGBA8) { - image.convert(Image::FORMAT_RGB); + image.convert(Image::FORMAT_RGB8); } - if (image.get_format()==Image::FORMAT_RGBA && flags&IMAGE_FLAG_FIX_BORDER_ALPHA) { + if (image.get_format()==Image::FORMAT_RGBA8 && flags&IMAGE_FLAG_FIX_BORDER_ALPHA) { image.fix_alpha_edges(); } - if (image.get_format()==Image::FORMAT_RGBA && flags&IMAGE_FLAG_PREMULT_ALPHA) { + if (image.get_format()==Image::FORMAT_RGBA8 && flags&IMAGE_FLAG_PREMULT_ALPHA) { image.premultiply_alpha(); } @@ -950,7 +944,7 @@ Error EditorTextureImportPlugin::_process_texture_data(Ref &textur image.normalmap_to_xy(); } - //if ((image.get_format()==Image::FORMAT_RGB || image.get_format()==Image::FORMAT_RGBA) && flags&IMAGE_FLAG_CONVERT_TO_LINEAR) { + //if ((image.get_format()==Image::FORMAT_RGB8 || image.get_format()==Image::FORMAT_RGBA8) && flags&IMAGE_FLAG_CONVERT_TO_LINEAR) { // image.srgb_to_linear(); //} @@ -989,18 +983,18 @@ Error EditorTextureImportPlugin::_process_texture_data(Ref &textur bool has_alpha=image.detect_alpha(); - if (!has_alpha && image.get_format()==Image::FORMAT_RGBA) { + if (!has_alpha && image.get_format()==Image::FORMAT_RGBA8) { - image.convert(Image::FORMAT_RGB); + image.convert(Image::FORMAT_RGB8); } - if (image.get_format()==Image::FORMAT_RGBA && flags&IMAGE_FLAG_FIX_BORDER_ALPHA) { + if (image.get_format()==Image::FORMAT_RGBA8 && flags&IMAGE_FLAG_FIX_BORDER_ALPHA) { image.fix_alpha_edges(); } - if (image.get_format()==Image::FORMAT_RGBA && flags&IMAGE_FLAG_PREMULT_ALPHA) { + if (image.get_format()==Image::FORMAT_RGBA8 && flags&IMAGE_FLAG_PREMULT_ALPHA) { image.premultiply_alpha(); } @@ -1009,7 +1003,7 @@ Error EditorTextureImportPlugin::_process_texture_data(Ref &textur image.normalmap_to_xy(); } - //if ((image.get_format()==Image::FORMAT_RGB || image.get_format()==Image::FORMAT_RGBA) && flags&IMAGE_FLAG_CONVERT_TO_LINEAR) { + //if ((image.get_format()==Image::FORMAT_RGB8 || image.get_format()==Image::FORMAT_RGBA8) && flags&IMAGE_FLAG_CONVERT_TO_LINEAR) { // // print_line("CONVERT BECAUSE: "+itos(flags)); // image.srgb_to_linear(); @@ -1200,9 +1194,9 @@ Error EditorTextureImportPlugin::import2(const String& p_path, const Ref data = src.get_data(); @@ -1280,7 +1274,7 @@ Error EditorTextureImportPlugin::import2(const String& p_path, const Refget_source_count()); @@ -1411,18 +1405,18 @@ Error EditorTextureImportPlugin::import2(const String& p_path, const Ref& if (image.empty()) return NULL; - if (image.get_format()!=Image::FORMAT_RGBA) { + if (image.get_format()!=Image::FORMAT_RGBA8) { if (image.get_format()>Image::FORMAT_INDEXED_ALPHA) { Error err = image.decompress(); if (err) return NULL; } - if (image.get_format()!=Image::FORMAT_RGBA) - image.convert(Image::FORMAT_RGBA); + if (image.get_format()!=Image::FORMAT_RGBA8) + image.convert(Image::FORMAT_RGBA8); } if (imgtex->get_flags()&Texture::FLAG_CONVERT_TO_LINEAR) { @@ -2619,7 +2619,7 @@ Error BakedLightBaker::transfer_to_lightmaps() { copymem(w.ptr(),baked_textures[i].data.ptr(),baked_textures[i].data.size()); } - Image img(baked_textures[i].width,baked_textures[i].height,0,Image::FORMAT_RGBA,dv); + Image img(baked_textures[i].width,baked_textures[i].height,0,Image::FORMAT_RGBA8,dv); Ref tex = memnew( ImageTexture ); tex->create_from_image(img); baked_light->set_lightmap_texture(i,tex); @@ -2720,3 +2720,4 @@ BakedLightBaker::~BakedLightBaker() { clear(); } +#endif diff --git a/tools/editor/plugins/baked_light_baker.h b/tools/editor/plugins/baked_light_baker.h index d0fddf5563a..99340fcbe84 100644 --- a/tools/editor/plugins/baked_light_baker.h +++ b/tools/editor/plugins/baked_light_baker.h @@ -34,6 +34,8 @@ #include "scene/3d/mesh_instance.h" #include "os/thread.h" +#if 0 + class BakedLightBaker { public: @@ -375,3 +377,4 @@ public: }; #endif // BAKED_LIGHT_BAKER_H +#endif diff --git a/tools/editor/plugins/baked_light_editor_plugin.cpp b/tools/editor/plugins/baked_light_editor_plugin.cpp index a58a0c25e23..ed5aafeba03 100644 --- a/tools/editor/plugins/baked_light_editor_plugin.cpp +++ b/tools/editor/plugins/baked_light_editor_plugin.cpp @@ -33,7 +33,7 @@ #include "io/resource_saver.h" - +#if 0 void BakedLightEditor::_end_baking() { @@ -145,7 +145,7 @@ void BakedLightEditor::_notification(int p_option) { #if 1 //debug - Image img(baker->baked_octree_texture_w,baker->baked_octree_texture_h,0,Image::FORMAT_RGBA,octree_texture); + Image img(baker->baked_octree_texture_w,baker->baked_octree_texture_h,0,Image::FORMAT_RGBA8,octree_texture); Ref it = memnew( ImageTexture ); it->create_from_image(img); ResourceSaver::save("baked_octree.png",it); @@ -373,3 +373,4 @@ BakedLightEditorPlugin::~BakedLightEditorPlugin() } +#endif diff --git a/tools/editor/plugins/baked_light_editor_plugin.h b/tools/editor/plugins/baked_light_editor_plugin.h index 4985d7513ee..a556bd447a3 100644 --- a/tools/editor/plugins/baked_light_editor_plugin.h +++ b/tools/editor/plugins/baked_light_editor_plugin.h @@ -40,7 +40,7 @@ @author Juan Linietsky */ - +#if 0 class MeshInstance; @@ -116,5 +116,5 @@ public: }; #endif // MULTIMESH_EDITOR_PLUGIN_H - +#endif diff --git a/tools/editor/plugins/collision_polygon_editor_plugin.cpp b/tools/editor/plugins/collision_polygon_editor_plugin.cpp index 0b06b3ba21c..ccec7eaed11 100644 --- a/tools/editor/plugins/collision_polygon_editor_plugin.cpp +++ b/tools/editor/plugins/collision_polygon_editor_plugin.cpp @@ -33,6 +33,8 @@ #include "scene/3d/camera.h" #include "canvas_item_editor_plugin.h" +#if 0 + void CollisionPolygonEditor::_notification(int p_what) { switch(p_what) { @@ -642,3 +644,4 @@ CollisionPolygonEditorPlugin::~CollisionPolygonEditorPlugin() { } +#endif diff --git a/tools/editor/plugins/collision_polygon_editor_plugin.h b/tools/editor/plugins/collision_polygon_editor_plugin.h index 45e287ef000..4c1f45eca97 100644 --- a/tools/editor/plugins/collision_polygon_editor_plugin.h +++ b/tools/editor/plugins/collision_polygon_editor_plugin.h @@ -40,6 +40,8 @@ /** @author Juan Linietsky */ + +#if 0 class CanvasItemEditor; class CollisionPolygonEditor : public HBoxContainer { @@ -117,5 +119,5 @@ public: ~CollisionPolygonEditorPlugin(); }; - +#endif #endif // COLLISION_POLYGON_EDITOR_PLUGIN_H diff --git a/tools/editor/plugins/cube_grid_theme_editor_plugin.cpp b/tools/editor/plugins/cube_grid_theme_editor_plugin.cpp index b6f3db73f71..9bc624ef65d 100644 --- a/tools/editor/plugins/cube_grid_theme_editor_plugin.cpp +++ b/tools/editor/plugins/cube_grid_theme_editor_plugin.cpp @@ -28,6 +28,7 @@ /*************************************************************************/ #include "cube_grid_theme_editor_plugin.h" +#if 0 #include "scene/3d/mesh_instance.h" #include "scene/3d/physics_body.h" #include "scene/main/viewport.h" @@ -353,4 +354,4 @@ MeshLibraryEditorPlugin::MeshLibraryEditorPlugin(EditorNode *p_node) { theme_editor->hide(); } - +#endif diff --git a/tools/editor/plugins/cube_grid_theme_editor_plugin.h b/tools/editor/plugins/cube_grid_theme_editor_plugin.h index 72ee171e19b..a2875d24c22 100644 --- a/tools/editor/plugins/cube_grid_theme_editor_plugin.h +++ b/tools/editor/plugins/cube_grid_theme_editor_plugin.h @@ -32,7 +32,7 @@ #include "scene/resources/mesh_library.h" #include "tools/editor/editor_node.h" - +#if 0 class MeshLibraryEditor : public Control { OBJ_TYPE( MeshLibraryEditor, Control ); @@ -93,3 +93,4 @@ public: #endif // CUBE_GRID_THEME_EDITOR_PLUGIN_H +#endif diff --git a/tools/editor/plugins/editor_preview_plugins.cpp b/tools/editor/plugins/editor_preview_plugins.cpp index b1bce604844..66f766a0be8 100644 --- a/tools/editor/plugins/editor_preview_plugins.cpp +++ b/tools/editor/plugins/editor_preview_plugins.cpp @@ -37,6 +37,7 @@ #include "scene/resources/bit_mask.h" #include "tools/editor/editor_scale.h" +#if 0 bool EditorTexturePreviewPlugin::handles(const String& p_type) const { return (ObjectTypeDB::is_type(p_type,"ImageTexture") || ObjectTypeDB::is_type(p_type, "AtlasTexture")); @@ -69,8 +70,8 @@ Ref EditorTexturePreviewPlugin::generate(const RES& p_from) { if (img.is_compressed()) { if (img.decompress()!=OK) return Ref(); - } else if (img.get_format()!=Image::FORMAT_RGB && img.get_format()!=Image::FORMAT_RGBA) { - img.convert(Image::FORMAT_RGBA); + } else if (img.get_format()!=Image::FORMAT_RGB8 && img.get_format()!=Image::FORMAT_RGBA8) { + img.convert(Image::FORMAT_RGBA8); } int width,height; @@ -138,15 +139,15 @@ Ref EditorBitmapPreviewPlugin::generate(const RES& p_from) { } - Image img(bm->get_size().width,bm->get_size().height,0,Image::FORMAT_GRAYSCALE,data); + Image img(bm->get_size().width,bm->get_size().height,0,Image::FORMAT_L8,data); int thumbnail_size = EditorSettings::get_singleton()->get("file_dialog/thumbnail_size"); thumbnail_size*=EDSCALE; if (img.is_compressed()) { if (img.decompress()!=OK) return Ref(); - } else if (img.get_format()!=Image::FORMAT_RGB && img.get_format()!=Image::FORMAT_RGBA) { - img.convert(Image::FORMAT_RGBA); + } else if (img.get_format()!=Image::FORMAT_RGB8 && img.get_format()!=Image::FORMAT_RGBA8) { + img.convert(Image::FORMAT_RGBA8); } int width,height; @@ -434,7 +435,7 @@ Ref EditorScriptPreviewPlugin::generate(const RES& p_from) { int col=0; int thumbnail_size = EditorSettings::get_singleton()->get("file_dialog/thumbnail_size"); thumbnail_size*=EDSCALE; - Image img(thumbnail_size,thumbnail_size,0,Image::FORMAT_RGBA); + Image img(thumbnail_size,thumbnail_size,0,Image::FORMAT_RGBA8); @@ -778,7 +779,7 @@ Ref EditorSamplePreviewPlugin::generate(const RES& p_from) { imgdata = DVector::Write(); Ref ptex = Ref( memnew( ImageTexture)); - ptex->create_from_image(Image(w,h,0,Image::FORMAT_RGB,img),0); + ptex->create_from_image(Image(w,h,0,Image::FORMAT_RGB8,img),0); return ptex; } @@ -888,6 +889,7 @@ EditorMeshPreviewPlugin::EditorMeshPreviewPlugin() { } + EditorMeshPreviewPlugin::~EditorMeshPreviewPlugin() { //VS::get_singleton()->free(sphere); @@ -901,3 +903,4 @@ EditorMeshPreviewPlugin::~EditorMeshPreviewPlugin() { VS::get_singleton()->free(scenario); } +#endif diff --git a/tools/editor/plugins/editor_preview_plugins.h b/tools/editor/plugins/editor_preview_plugins.h index b33aefaa23e..9807d835a21 100644 --- a/tools/editor/plugins/editor_preview_plugins.h +++ b/tools/editor/plugins/editor_preview_plugins.h @@ -31,6 +31,7 @@ #include "tools/editor/editor_resource_preview.h" +#if 0 class EditorTexturePreviewPlugin : public EditorResourcePreviewGenerator { public: @@ -123,5 +124,5 @@ public: ~EditorMeshPreviewPlugin(); }; - +#endif #endif // EDITORPREVIEWPLUGINS_H diff --git a/tools/editor/plugins/material_editor_plugin.cpp b/tools/editor/plugins/material_editor_plugin.cpp index 876fab0d6e8..1036016194c 100644 --- a/tools/editor/plugins/material_editor_plugin.cpp +++ b/tools/editor/plugins/material_editor_plugin.cpp @@ -1,6 +1,8 @@ #include "material_editor_plugin.h" #include "scene/main/viewport.h" +#if 0 + void MaterialEditor::_input_event(InputEvent p_event) { @@ -379,3 +381,4 @@ MaterialEditorPlugin::~MaterialEditorPlugin() } +#endif diff --git a/tools/editor/plugins/material_editor_plugin.h b/tools/editor/plugins/material_editor_plugin.h index 49e92493b34..68b3b3b1a78 100644 --- a/tools/editor/plugins/material_editor_plugin.h +++ b/tools/editor/plugins/material_editor_plugin.h @@ -8,7 +8,7 @@ #include "scene/3d/mesh_instance.h" #include "scene/3d/camera.h" - +#if 0 class MaterialEditor : public Control { OBJ_TYPE(MaterialEditor, Control); @@ -69,3 +69,4 @@ public: }; #endif // MATERIAL_EDITOR_PLUGIN_H +#endif diff --git a/tools/editor/plugins/mesh_editor_plugin.cpp b/tools/editor/plugins/mesh_editor_plugin.cpp index b70cbad25f6..cd7860bcab6 100644 --- a/tools/editor/plugins/mesh_editor_plugin.cpp +++ b/tools/editor/plugins/mesh_editor_plugin.cpp @@ -28,6 +28,7 @@ /*************************************************************************/ #include "mesh_editor_plugin.h" +#if 0 void MeshEditor::_input_event(InputEvent p_event) { @@ -241,3 +242,4 @@ MeshEditorPlugin::MeshEditorPlugin(EditorNode *p_node) { MeshEditorPlugin::~MeshEditorPlugin() { } +#endif diff --git a/tools/editor/plugins/mesh_editor_plugin.h b/tools/editor/plugins/mesh_editor_plugin.h index 0715a96e741..97fd9ae771c 100644 --- a/tools/editor/plugins/mesh_editor_plugin.h +++ b/tools/editor/plugins/mesh_editor_plugin.h @@ -29,6 +29,8 @@ #ifndef MESH_EDITOR_PLUGIN_H #define MESH_EDITOR_PLUGIN_H +#if 0 + #include "tools/editor/editor_plugin.h" #include "tools/editor/editor_node.h" #include "scene/resources/material.h" @@ -93,3 +95,4 @@ public: }; #endif // MESH_EDITOR_PLUGIN_H +#endif diff --git a/tools/editor/plugins/particles_2d_editor_plugin.cpp b/tools/editor/plugins/particles_2d_editor_plugin.cpp index ce25f34c1fa..4d93f2019ef 100644 --- a/tools/editor/plugins/particles_2d_editor_plugin.cpp +++ b/tools/editor/plugins/particles_2d_editor_plugin.cpp @@ -69,8 +69,8 @@ void Particles2DEditorPlugin::_file_selected(const String& p_file) { ERR_EXPLAIN(TTR("Error loading image:")+" "+p_file); ERR_FAIL_COND(err!=OK); - img.convert(Image::FORMAT_GRAYSCALE_ALPHA); - ERR_FAIL_COND(img.get_format()!=Image::FORMAT_GRAYSCALE_ALPHA); + img.convert(Image::FORMAT_LA8); + ERR_FAIL_COND(img.get_format()!=Image::FORMAT_LA8); Size2i s = Size2(img.get_width(),img.get_height()); ERR_FAIL_COND(s.width==0 || s.height==0); diff --git a/tools/editor/plugins/particles_editor_plugin.cpp b/tools/editor/plugins/particles_editor_plugin.cpp index 7e20cc3f54d..d8330c4b0c8 100644 --- a/tools/editor/plugins/particles_editor_plugin.cpp +++ b/tools/editor/plugins/particles_editor_plugin.cpp @@ -26,6 +26,8 @@ /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ + +#if 0 #include "particles_editor_plugin.h" #include "io/resource_loader.h" #include "servers/visual/particle_system_sw.h" @@ -456,3 +458,4 @@ ParticlesEditorPlugin::~ParticlesEditorPlugin() } +#endif diff --git a/tools/editor/plugins/particles_editor_plugin.h b/tools/editor/plugins/particles_editor_plugin.h index ff80bffc291..eec01d03a10 100644 --- a/tools/editor/plugins/particles_editor_plugin.h +++ b/tools/editor/plugins/particles_editor_plugin.h @@ -37,7 +37,7 @@ /** @author Juan Linietsky */ - +#if 0 class ParticlesEditor : public Control { OBJ_TYPE(ParticlesEditor, Control ); @@ -113,3 +113,4 @@ public: }; #endif // PARTICLES_EDITOR_PLUGIN_H +#endif diff --git a/tools/editor/plugins/path_editor_plugin.cpp b/tools/editor/plugins/path_editor_plugin.cpp index 33ef71efab5..aa7071298ad 100644 --- a/tools/editor/plugins/path_editor_plugin.cpp +++ b/tools/editor/plugins/path_editor_plugin.cpp @@ -31,6 +31,7 @@ #include "scene/resources/curve.h" #include "os/keyboard.h" +#if 0 String PathSpatialGizmo::get_handle_name(int p_idx) const { Ref c = path->get_curve(); @@ -593,3 +594,4 @@ PathEditorPlugin::~PathEditorPlugin() { } +#endif diff --git a/tools/editor/plugins/path_editor_plugin.h b/tools/editor/plugins/path_editor_plugin.h index 0afd957af70..ee2196b47ff 100644 --- a/tools/editor/plugins/path_editor_plugin.h +++ b/tools/editor/plugins/path_editor_plugin.h @@ -32,6 +32,7 @@ #include "tools/editor/spatial_editor_gizmos.h" #include "scene/3d/path.h" +# if 0 class PathSpatialGizmo : public EditorSpatialGizmo { OBJ_TYPE(PathSpatialGizmo,EditorSpatialGizmo); @@ -95,5 +96,5 @@ public: }; - +#endif #endif // PATH_EDITOR_PLUGIN_H diff --git a/tools/editor/plugins/polygon_2d_editor_plugin.cpp b/tools/editor/plugins/polygon_2d_editor_plugin.cpp index 19d1ccc06f5..cd19a908a8f 100644 --- a/tools/editor/plugins/polygon_2d_editor_plugin.cpp +++ b/tools/editor/plugins/polygon_2d_editor_plugin.cpp @@ -850,7 +850,7 @@ Polygon2DEditor::Polygon2DEditor(EditorNode *p_editor) { uv_edit = memnew( AcceptDialog ); add_child(uv_edit); uv_edit->set_title(TTR("Polygon 2D UV Editor")); - uv_edit->set_self_opacity(0.9); + uv_edit->set_self_modulate(Color(1,1,1,0.9)); VBoxContainer *uv_main_vb = memnew( VBoxContainer ); uv_edit->add_child(uv_main_vb); diff --git a/tools/editor/plugins/sample_editor_plugin.cpp b/tools/editor/plugins/sample_editor_plugin.cpp index 7965fa54ae1..5bdd7c4e61b 100644 --- a/tools/editor/plugins/sample_editor_plugin.cpp +++ b/tools/editor/plugins/sample_editor_plugin.cpp @@ -311,7 +311,7 @@ void SampleEditor::generate_preview_texture(const Ref& p_sample,Ref::Write(); - p_texture->set_data(Image(w,h,0,Image::FORMAT_RGB,img)); + p_texture->set_data(Image(w,h,0,Image::FORMAT_RGB8,img)); } @@ -392,7 +392,7 @@ SampleEditor::SampleEditor() { add_child(stop); peakdisplay=Ref( memnew( ImageTexture) ); - peakdisplay->create( EDITOR_DEF("audio/sample_editor_preview_width",512),EDITOR_DEF("audio/sample_editor_preview_height",128),Image::FORMAT_RGB); + peakdisplay->create( EDITOR_DEF("audio/sample_editor_preview_width",512),EDITOR_DEF("audio/sample_editor_preview_height",128),Image::FORMAT_RGB8); sample_texframe->set_expand(true); sample_texframe->set_texture(peakdisplay); diff --git a/tools/editor/plugins/sample_library_editor_plugin.cpp b/tools/editor/plugins/sample_library_editor_plugin.cpp index 2a6940332cd..ade0c475e11 100644 --- a/tools/editor/plugins/sample_library_editor_plugin.cpp +++ b/tools/editor/plugins/sample_library_editor_plugin.cpp @@ -236,7 +236,7 @@ void SampleLibraryEditor::_update_library() { // Preview/edit Ref preview( memnew( ImageTexture )); - preview->create(128,16,Image::FORMAT_RGB); + preview->create(128,16,Image::FORMAT_RGB8); SampleEditor::generate_preview_texture(smp,preview); ti->set_cell_mode(1,TreeItem::CELL_MODE_ICON); ti->set_selectable(1,false); diff --git a/tools/editor/plugins/shader_editor_plugin.cpp b/tools/editor/plugins/shader_editor_plugin.cpp index b3317e83138..5541ae987f9 100644 --- a/tools/editor/plugins/shader_editor_plugin.cpp +++ b/tools/editor/plugins/shader_editor_plugin.cpp @@ -38,7 +38,7 @@ #include "tools/editor/property_editor.h" #include "os/os.h" - +#if 0 /*** SETTINGS EDITOR ****/ @@ -654,3 +654,4 @@ ShaderEditorPlugin::ShaderEditorPlugin(EditorNode *p_node, bool p_2d) { ShaderEditorPlugin::~ShaderEditorPlugin() { } +#endif diff --git a/tools/editor/plugins/shader_editor_plugin.h b/tools/editor/plugins/shader_editor_plugin.h index 9219a1fbc27..ef7cc6772ec 100644 --- a/tools/editor/plugins/shader_editor_plugin.h +++ b/tools/editor/plugins/shader_editor_plugin.h @@ -38,7 +38,7 @@ #include "scene/resources/shader.h" #include "servers/visual/shader_language.h" - +#if 0 class ShaderTextEditor : public CodeTextEditor { OBJ_TYPE( ShaderTextEditor, CodeTextEditor ); @@ -154,4 +154,6 @@ public: ~ShaderEditorPlugin(); }; + +#endif #endif diff --git a/tools/editor/plugins/shader_graph_editor_plugin.cpp b/tools/editor/plugins/shader_graph_editor_plugin.cpp index 3ab906f84ee..f864c6885f9 100644 --- a/tools/editor/plugins/shader_graph_editor_plugin.cpp +++ b/tools/editor/plugins/shader_graph_editor_plugin.cpp @@ -28,6 +28,7 @@ /*************************************************************************/ #include "shader_graph_editor_plugin.h" +#if 0 #include "scene/gui/check_box.h" #include "scene/gui/menu_button.h" @@ -1377,7 +1378,7 @@ ToolButton *ShaderGraphView::make_editor(String text,GraphNode* gn,int p_id,int edit->set_icon(ped_popup->get_icon("Matrix", "EditorIcons")); break; case Variant::COLOR: { - Image icon_color = Image(15,15,false,Image::FORMAT_RGB); + Image icon_color = Image(15,15,false,Image::FORMAT_RGB8); Color c = graph->default_get_value(type,p_id,param); for (int x=1;x<14;x++) for (int y=1;y<14;y++) @@ -2945,3 +2946,4 @@ ShaderGraphEditorPlugin::~ShaderGraphEditorPlugin() +#endif diff --git a/tools/editor/plugins/shader_graph_editor_plugin.h b/tools/editor/plugins/shader_graph_editor_plugin.h index 67ee5e2d454..dafafdd1c83 100644 --- a/tools/editor/plugins/shader_graph_editor_plugin.h +++ b/tools/editor/plugins/shader_graph_editor_plugin.h @@ -43,7 +43,7 @@ @author Juan Linietsky */ - +#if 0 class GraphColorRampEdit : public Control { OBJ_TYPE(GraphColorRampEdit,Control); @@ -239,4 +239,4 @@ public: }; #endif - +#endif diff --git a/tools/editor/plugins/spatial_editor_plugin.cpp b/tools/editor/plugins/spatial_editor_plugin.cpp index 9701b8030df..afea6f93dd7 100644 --- a/tools/editor/plugins/spatial_editor_plugin.cpp +++ b/tools/editor/plugins/spatial_editor_plugin.cpp @@ -274,7 +274,7 @@ ObjectID SpatialEditorViewport::_select_ray(const Point2& p_pos, bool p_append,b Vector3 ray=_get_ray(p_pos); Vector3 pos=_get_ray_pos(p_pos); - Vector instances=VisualServer::get_singleton()->instances_cull_ray(pos,ray,get_tree()->get_root()->get_world()->get_scenario() ); + Vector instances=VisualServer::get_singleton()->instances_cull_ray(pos,ray,get_tree()->get_root()->get_world()->get_scenario() ); Set > found_gizmos; //uint32_t closest=0; @@ -286,8 +286,7 @@ ObjectID SpatialEditorViewport::_select_ray(const Point2& p_pos, bool p_append,b for (int i=0;iinstance_get_object_instance_ID(instances[i]); - Object *obj=ObjectDB::get_instance(id); + Object *obj=ObjectDB::get_instance(instances[i]); if (!obj) continue; @@ -405,15 +404,15 @@ void SpatialEditorViewport::_find_items_at_pos(const Point2& p_pos,bool &r_inclu Vector3 ray=_get_ray(p_pos); Vector3 pos=_get_ray_pos(p_pos); - Vector instances=VisualServer::get_singleton()->instances_cull_ray(pos,ray,get_tree()->get_root()->get_world()->get_scenario() ); + Vector instances=VisualServer::get_singleton()->instances_cull_ray(pos,ray,get_tree()->get_root()->get_world()->get_scenario() ); Set > found_gizmos; r_includes_current=false; for (int i=0;iinstance_get_object_instance_ID(instances[i]); - Object *obj=ObjectDB::get_instance(id); + Object *obj=ObjectDB::get_instance(instances[i]); + if (!obj) continue; @@ -534,14 +533,12 @@ void SpatialEditorViewport::_select_region() { frustum.push_back( far ); - Vector instances=VisualServer::get_singleton()->instances_cull_convex(frustum,get_tree()->get_root()->get_world()->get_scenario()); + Vector instances=VisualServer::get_singleton()->instances_cull_convex(frustum,get_tree()->get_root()->get_world()->get_scenario()); for (int i=0;iinstance_get_object_instance_ID(instances[i]); - - Object *obj=ObjectDB::get_instance(id); + Object *obj=ObjectDB::get_instance(instances[i]); if (!obj) continue; Spatial *sp = obj->cast_to(); @@ -877,7 +874,7 @@ void SpatialEditorViewport::_sinput(const InputEvent &p_event) { if (b.mod.control) { - Vector instances=VisualServer::get_singleton()->instances_cull_ray(ray_origin,ray_dir,get_tree()->get_root()->get_world()->get_scenario() ); + Vector instances=VisualServer::get_singleton()->instances_cull_ray(ray_origin,ray_dir,get_tree()->get_root()->get_world()->get_scenario() ); Plane p(ray_origin,_get_camera_normal()); @@ -886,8 +883,9 @@ void SpatialEditorViewport::_sinput(const InputEvent &p_event) { for (int i=0;iinstance_get_object_instance_ID(instances[i]); - Object *obj=ObjectDB::get_instance(id); + + Object *obj=ObjectDB::get_instance(instances[i]); + if (!obj) continue; @@ -2061,9 +2059,9 @@ void SpatialEditorViewport::_menu_option(int p_option) { bool current = view_menu->get_popup()->is_item_checked( idx ); current=!current; if (current) - camera->set_visible_layers( ((1<<20)-1)|(1<<(GIZMO_BASE_LAYER+index))|(1<set_cull_mask( ((1<<20)-1)|(1<<(GIZMO_BASE_LAYER+index))|(1<set_visible_layers( ((1<<20)-1)|(1<<(GIZMO_BASE_LAYER+index))|(1<set_cull_mask( ((1<<20)-1)|(1<<(GIZMO_BASE_LAYER+index))|(1<get_popup()->set_item_checked( idx, current ); } break; @@ -2361,7 +2359,7 @@ SpatialEditorViewport::SpatialEditorViewport(SpatialEditor *p_spatial_editor, Ed surface->set_area_as_parent_rect(); camera = memnew(Camera); camera->set_disable_gizmo(true); - camera->set_visible_layers( ((1<<20)-1)|(1<<(GIZMO_BASE_LAYER+p_index))|(1<set_cull_mask( ((1<<20)-1)|(1<<(GIZMO_BASE_LAYER+p_index))|(1<set_environment(SpatialEditor::get_singleton()->get_viewport_environment()); viewport->add_child(camera); camera->make_current(); @@ -2370,7 +2368,7 @@ SpatialEditorViewport::SpatialEditorViewport(SpatialEditor *p_spatial_editor, Ed view_menu = memnew( MenuButton ); surface->add_child(view_menu); view_menu->set_pos( Point2(4,4)); - view_menu->set_self_opacity(0.5); + view_menu->set_self_modulate(Color(1,1,1,0.5)); view_menu->get_popup()->add_shortcut(ED_GET_SHORTCUT("spatial_editor/top_view"), VIEW_TOP); view_menu->get_popup()->add_shortcut(ED_GET_SHORTCUT("spatial_editor/bottom_view"), VIEW_BOTTOM); view_menu->get_popup()->add_shortcut(ED_GET_SHORTCUT("spatial_editor/left_view"), VIEW_LEFT); @@ -2550,10 +2548,10 @@ void SpatialEditor::_generate_selection_box() { } Ref mat = memnew( FixedMaterial ); - mat->set_flag(Material::FLAG_UNSHADED,true); + /*mat->set_flag(Material::FLAG_UNSHADED,true); mat->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1)); mat->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); - mat->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY,true); + mat->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY,true);*/ st->set_material(mat); selection_box = st->commit(); } @@ -2685,14 +2683,14 @@ void SpatialEditor::set_state(const Dictionary& p_state) { } if (d.has("ambient_light_color")) { settings_ambient_color->set_color(d["ambient_light_color"]); - viewport_environment->fx_set_param(Environment::FX_PARAM_AMBIENT_LIGHT_COLOR,d["ambient_light_color"]); + //viewport_environment->fx_set_param(Environment::FX_PARAM_AMBIENT_LIGHT_COLOR,d["ambient_light_color"]); } if (d.has("default_srgb")) { bool use = d["default_srgb"]; - viewport_environment->set_enable_fx(Environment::FX_SRGB,use); - view_menu->get_popup()->set_item_checked( view_menu->get_popup()->get_item_index(MENU_VIEW_USE_DEFAULT_SRGB), use ); + //viewport_environment->set_enable_fx(Environment::FX_SRGB,use); + //view_menu->get_popup()->set_item_checked( view_menu->get_popup()->get_item_index(MENU_VIEW_USE_DEFAULT_SRGB), use ); } if (d.has("show_grid")) { bool use = d["show_grid"]; @@ -2892,9 +2890,9 @@ void SpatialEditor::_menu_item_pressed(int p_option) { bool is_checked = view_menu->get_popup()->is_item_checked( view_menu->get_popup()->get_item_index(p_option) ); if (is_checked) { - viewport_environment->set_enable_fx(Environment::FX_SRGB,false); + //viewport_environment->set_enable_fx(Environment::FX_SRGB,false); } else { - viewport_environment->set_enable_fx(Environment::FX_SRGB,true); + //viewport_environment->set_enable_fx(Environment::FX_SRGB,true); } is_checked = ! is_checked; @@ -3140,11 +3138,11 @@ void SpatialEditor::_init_indicators() { { - indicator_mat = VisualServer::get_singleton()->fixed_material_create(); - VisualServer::get_singleton()->material_set_flag( indicator_mat, VisualServer::MATERIAL_FLAG_UNSHADED, true ); + indicator_mat = VisualServer::get_singleton()->material_create(); + /*VisualServer::get_singleton()->material_set_flag( indicator_mat, VisualServer::MATERIAL_FLAG_UNSHADED, true ); VisualServer::get_singleton()->material_set_flag( indicator_mat, VisualServer::MATERIAL_FLAG_ONTOP, false ); VisualServer::get_singleton()->fixed_material_set_flag(indicator_mat, VisualServer::FIXED_MATERIAL_FLAG_USE_ALPHA,true); - VisualServer::get_singleton()->fixed_material_set_flag(indicator_mat, VisualServer::FIXED_MATERIAL_FLAG_USE_COLOR_ARRAY,true); + VisualServer::get_singleton()->fixed_material_set_flag(indicator_mat, VisualServer::FIXED_MATERIAL_FLAG_USE_COLOR_ARRAY,true);*/ DVector grid_colors[3]; DVector grid_points[3]; @@ -3186,7 +3184,7 @@ void SpatialEditor::_init_indicators() { d.resize(VS::ARRAY_MAX); d[VisualServer::ARRAY_VERTEX]=grid_points[i]; d[VisualServer::ARRAY_COLOR]=grid_colors[i]; - VisualServer::get_singleton()->mesh_add_surface(grid[i],VisualServer::PRIMITIVE_LINES,d); + VisualServer::get_singleton()->mesh_add_surface_from_arrays(grid[i],VisualServer::PRIMITIVE_LINES,d); VisualServer::get_singleton()->mesh_surface_set_material(grid[i],0,indicator_mat); grid_instance[i] = VisualServer::get_singleton()->instance_create2(grid[i],get_tree()->get_root()->get_world()->get_scenario()); @@ -3205,7 +3203,7 @@ void SpatialEditor::_init_indicators() { d[VisualServer::ARRAY_VERTEX]=origin_points; d[VisualServer::ARRAY_COLOR]=origin_colors; - VisualServer::get_singleton()->mesh_add_surface(origin,VisualServer::PRIMITIVE_LINES,d); + VisualServer::get_singleton()->mesh_add_surface_from_arrays(origin,VisualServer::PRIMITIVE_LINES,d); VisualServer::get_singleton()->mesh_surface_set_material(origin,0,indicator_mat); @@ -3237,16 +3235,16 @@ void SpatialEditor::_init_indicators() { cursor_points.push_back(Vector3(0,-cs,0)); cursor_points.push_back(Vector3(0,0,+cs)); cursor_points.push_back(Vector3(0,0,-cs)); - cursor_material=VisualServer::get_singleton()->fixed_material_create(); - VisualServer::get_singleton()->fixed_material_set_param(cursor_material,VS::FIXED_MATERIAL_PARAM_DIFFUSE,Color(0,1,1)); + cursor_material=VisualServer::get_singleton()->material_create(); + /*VisualServer::get_singleton()->fixed_material_set_param(cursor_material,VS::FIXED_MATERIAL_PARAM_DIFFUSE,Color(0,1,1)); VisualServer::get_singleton()->material_set_flag( cursor_material, VisualServer::MATERIAL_FLAG_UNSHADED, true ); VisualServer::get_singleton()->fixed_material_set_flag(cursor_material, VisualServer::FIXED_MATERIAL_FLAG_USE_ALPHA,true); - VisualServer::get_singleton()->fixed_material_set_flag(cursor_material, VisualServer::FIXED_MATERIAL_FLAG_USE_COLOR_ARRAY,true); + VisualServer::get_singleton()->fixed_material_set_flag(cursor_material, VisualServer::FIXED_MATERIAL_FLAG_USE_COLOR_ARRAY,true);*/ Array d; d.resize(VS::ARRAY_MAX); d[VS::ARRAY_VERTEX]=cursor_points; - VisualServer::get_singleton()->mesh_add_surface(cursor_mesh,VS::PRIMITIVE_LINES,d); + VisualServer::get_singleton()->mesh_add_surface_from_arrays(cursor_mesh,VS::PRIMITIVE_LINES,d); VisualServer::get_singleton()->mesh_surface_set_material(cursor_mesh,0,cursor_material); cursor_instance = VisualServer::get_singleton()->instance_create2(cursor_mesh,get_tree()->get_root()->get_world()->get_scenario()); @@ -3266,10 +3264,10 @@ void SpatialEditor::_init_indicators() { float gizmo_alph = EditorSettings::get_singleton()->get("3d_editor/manipulator_gizmo_opacity"); gizmo_hl = Ref( memnew( FixedMaterial ) ); - gizmo_hl->set_flag(Material::FLAG_UNSHADED, true); + /* gizmo_hl->set_flag(Material::FLAG_UNSHADED, true); gizmo_hl->set_flag(Material::FLAG_ONTOP, true); gizmo_hl->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - gizmo_hl->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1,gizmo_alph+0.2f)); + gizmo_hl->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1,gizmo_alph+0.2f));*/ for(int i=0;i<3;i++) { @@ -3278,13 +3276,13 @@ void SpatialEditor::_init_indicators() { Ref mat = memnew( FixedMaterial ); - mat->set_flag(Material::FLAG_UNSHADED, true); + /* mat->set_flag(Material::FLAG_UNSHADED, true); mat->set_flag(Material::FLAG_ONTOP, true); mat->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); Color col; col[i]=1.0; col.a= gizmo_alph; - mat->set_parameter(FixedMaterial::PARAM_DIFFUSE,col); + mat->set_parameter(FixedMaterial::PARAM_DIFFUSE,col);*/ gizmo_color[i]=mat; @@ -3548,7 +3546,7 @@ void SpatialEditor::_notification(int p_what) { if (p_what==NOTIFICATION_ENTER_TREE) { - gizmos = memnew( SpatialEditorGizmos ); + //gizmos = memnew( SpatialEditorGizmos ); _init_indicators(); _update_default_light_angle(); } @@ -3556,7 +3554,7 @@ void SpatialEditor::_notification(int p_what) { if (p_what==NOTIFICATION_EXIT_TREE) { _finish_indicators(); - memdelete( gizmos ); +// memdelete( gizmos ); } } @@ -3602,7 +3600,7 @@ void SpatialEditor::_request_gizmo(Object* p_obj) { } if (!seg.is_valid()) { - seg = gizmos->get_gizmo(sp); + // seg = gizmos->get_gizmo(sp); } if (seg.is_valid()) { @@ -3724,7 +3722,7 @@ void SpatialEditor::clear() { settings_default_light_rot_x=Math_PI*0.3; settings_default_light_rot_y=Math_PI*0.2; - viewport_environment->fx_set_param(Environment::FX_PARAM_AMBIENT_LIGHT_COLOR,Color(0.15,0.15,0.15)); + //viewport_environment->fx_set_param(Environment::FX_PARAM_AMBIENT_LIGHT_COLOR,Color(0.15,0.15,0.15)); settings_ambient_color->set_color(Color(0.15,0.15,0.15)); if (!light_instance.is_valid()) _menu_item_pressed(MENU_VIEW_USE_DEFAULT_LIGHT); @@ -3737,7 +3735,7 @@ void SpatialEditor::clear() { void SpatialEditor::_update_ambient_light_color(const Color& p_color) { - viewport_environment->fx_set_param(Environment::FX_PARAM_AMBIENT_LIGHT_COLOR,settings_ambient_color->get_color()); +// viewport_environment->fx_set_param(Environment::FX_PARAM_AMBIENT_LIGHT_COLOR,settings_ambient_color->get_color()); } @@ -4006,8 +4004,8 @@ SpatialEditor::SpatialEditor(EditorNode *p_editor) { settings_vbc->add_margin_child(TTR("Ambient Light Color:"),settings_ambient_color); settings_ambient_color->connect("color_changed",this,"_update_ambient_light_color"); - viewport_environment->set_enable_fx(Environment::FX_AMBIENT_LIGHT,true); - viewport_environment->fx_set_param(Environment::FX_PARAM_AMBIENT_LIGHT_COLOR,Color(0.15,0.15,0.15)); +// viewport_environment->set_enable_fx(Environment::FX_AMBIENT_LIGHT,true); +// viewport_environment->fx_set_param(Environment::FX_PARAM_AMBIENT_LIGHT_COLOR,Color(0.15,0.15,0.15)); settings_ambient_color->set_color(Color(0.15,0.15,0.15)); diff --git a/tools/editor/plugins/spatial_editor_plugin.h b/tools/editor/plugins/spatial_editor_plugin.h index 975092a01d2..2a52eab777f 100644 --- a/tools/editor/plugins/spatial_editor_plugin.h +++ b/tools/editor/plugins/spatial_editor_plugin.h @@ -467,7 +467,7 @@ private: static SpatialEditor *singleton; void _node_removed(Node* p_node); - SpatialEditorGizmos *gizmos; + //SpatialEditorGizmos *gizmos; SpatialEditor(); void _update_ambient_light_color(const Color& p_color); diff --git a/tools/editor/project_manager.cpp b/tools/editor/project_manager.cpp index 18a4e845a03..4f6f624b1e0 100644 --- a/tools/editor/project_manager.cpp +++ b/tools/editor/project_manager.cpp @@ -870,7 +870,7 @@ void ProjectManager::_load_recent_projects() { TextureButton *favorite = memnew( TextureButton ); favorite->set_normal_texture(favorite_icon); if (!is_favorite) - favorite->set_opacity(0.2); + favorite->set_modulate(Color(1,1,1,0.2)); favorite->set_v_size_flags(SIZE_EXPAND); favorite->connect("pressed",this,"_favorite_pressed",varray(hb)); favorite_box->add_child(favorite); @@ -891,7 +891,7 @@ void ProjectManager::_load_recent_projects() { vb->add_child(title); Label *fpath = memnew( Label(path) ); vb->add_child(fpath); - fpath->set_opacity(0.5); + fpath->set_modulate(Color(1,1,1,0.5)); fpath->add_color_override("font_color",font_color); scroll_childs->add_child(hb); diff --git a/tools/editor/pvrtc_compress.cpp b/tools/editor/pvrtc_compress.cpp index 75b5c69bc2c..bda06e1706d 100644 --- a/tools/editor/pvrtc_compress.cpp +++ b/tools/editor/pvrtc_compress.cpp @@ -85,7 +85,7 @@ static void _compress_image(Image::CompressMode p_mode,Image *p_image) { if (EditorSettings::get_singleton()->get("PVRTC/fast_conversion").operator bool()) { args.push_back("-pvrtcfast"); } - if (p_image->get_mipmaps()>0) + if (p_image->has_mipmaps()) args.push_back("-m"); Ref t = memnew( ImageTexture ); diff --git a/tools/editor/scene_tree_editor.cpp b/tools/editor/scene_tree_editor.cpp index 53bfe8cc57f..fdcced2fa6a 100644 --- a/tools/editor/scene_tree_editor.cpp +++ b/tools/editor/scene_tree_editor.cpp @@ -953,7 +953,7 @@ Variant SceneTreeEditor::get_drag_data_fw(const Point2& p_point,Control* p_from) Label *label = memnew( Label( selected[i]->get_name() ) ); hb->add_child(label); vb->add_child(hb); - hb->set_opacity(opacity_item); + hb->set_modulate(Color(1,1,1,opacity_item)); opacity_item -= opacity_step; } NodePath p = selected[i]->get_path(); diff --git a/tools/editor/spatial_editor_gizmos.cpp b/tools/editor/spatial_editor_gizmos.cpp index 84803eb6dbd..716b03b7b7f 100644 --- a/tools/editor/spatial_editor_gizmos.cpp +++ b/tools/editor/spatial_editor_gizmos.cpp @@ -41,6 +41,8 @@ // Keep small children away from this file. // It's so ugly it will eat them alive +#if 0 + #define HANDLE_HALF_SIZE 0.05 void EditorSpatialGizmo::clear() { @@ -3198,3 +3200,4 @@ SpatialEditorGizmos::SpatialEditorGizmos() { } +#endif diff --git a/tools/editor/spatial_editor_gizmos.h b/tools/editor/spatial_editor_gizmos.h index 3d7272f5228..a7a6af4b181 100644 --- a/tools/editor/spatial_editor_gizmos.h +++ b/tools/editor/spatial_editor_gizmos.h @@ -52,7 +52,7 @@ class Camera; - +#if 0 class EditorSpatialGizmo : public SpatialEditorGizmo { OBJ_TYPE(EditorSpatialGizmo,SpatialGizmo); @@ -505,5 +505,5 @@ public: SpatialEditorGizmos(); }; - +#endif #endif // SPATIAL_EDITOR_GIZMOS_H From a6e9dc615346f44b68b418483dd218d11ba4a674 Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Mon, 3 Oct 2016 23:46:24 -0300 Subject: [PATCH 002/223] make editor update by tracking changes in visualserverraster --- drivers/gles3/rasterizer_storage_gles3.cpp | 8 +- drivers/gles3/rasterizer_storage_gles3.h | 8 +- servers/visual/rasterizer.h | 8 +- servers/visual/visual_server_raster.cpp | 971 +-------------------- servers/visual/visual_server_raster.h | 461 +++++----- servers/visual_server.h | 8 +- 6 files changed, 274 insertions(+), 1190 deletions(-) diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index b29de876a47..515db390983 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -1199,7 +1199,7 @@ RID RasterizerStorageGLES3::multimesh_get_mesh(RID p_multimesh) const{ return RID(); } -AABB RasterizerStorageGLES3::multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const{ +AABB RasterizerStorageGLES3::multimesh_get_custom_aabb(RID p_multimesh) const{ return AABB(); } @@ -1305,7 +1305,7 @@ void RasterizerStorageGLES3::skeleton_bone_set_transform(RID p_skeleton,int p_bo } -Transform RasterizerStorageGLES3::skeleton_bone_get_transform(RID p_skeleton,int p_bone){ +Transform RasterizerStorageGLES3::skeleton_bone_get_transform(RID p_skeleton,int p_bone) const{ return Transform(); } @@ -1313,7 +1313,7 @@ void RasterizerStorageGLES3::skeleton_bone_set_transform_2d(RID p_skeleton,int p } -Matrix32 RasterizerStorageGLES3::skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone){ +Matrix32 RasterizerStorageGLES3::skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone) const{ return Matrix32(); } @@ -1419,7 +1419,7 @@ void RasterizerStorageGLES3::room_add_bounds(RID p_room, const DVector& } -void RasterizerStorageGLES3::room_clear_bounds(){ +void RasterizerStorageGLES3::room_clear_bounds(RID p_room){ } diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index 3b2d7d752c9..eb6cd0bd925 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -249,7 +249,7 @@ public: virtual void multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color); virtual RID multimesh_get_mesh(RID p_multimesh) const; - virtual AABB multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const; + virtual AABB multimesh_get_custom_aabb(RID p_multimesh) const; virtual Transform multimesh_instance_get_transform(RID p_multimesh,int p_index) const; virtual Matrix32 multimesh_instance_get_transform_2d(RID p_multimesh,int p_index) const; @@ -282,9 +282,9 @@ public: virtual void skeleton_allocate(RID p_skeleton,int p_bones,bool p_2d_skeleton=false); virtual int skeleton_get_bone_count(RID p_skeleton) const; virtual void skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform); - virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone); + virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone) const; virtual void skeleton_bone_set_transform_2d(RID p_skeleton,int p_bone, const Matrix32& p_transform); - virtual Matrix32 skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone); + virtual Matrix32 skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone) const; /* Light API */ @@ -321,7 +321,7 @@ public: virtual RID room_create(); virtual void room_add_bounds(RID p_room, const DVector& p_convex_polygon,float p_height,const Transform& p_transform); - virtual void room_clear_bounds(); + virtual void room_clear_bounds(RID p_room); /* PORTAL API */ diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h index df18dd1679e..6a6eb7ad20f 100644 --- a/servers/visual/rasterizer.h +++ b/servers/visual/rasterizer.h @@ -134,7 +134,7 @@ public: virtual void multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color)=0; virtual RID multimesh_get_mesh(RID p_multimesh) const=0; - virtual AABB multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const=0; + virtual AABB multimesh_get_custom_aabb(RID p_multimesh) const=0; virtual Transform multimesh_instance_get_transform(RID p_multimesh,int p_index) const=0; virtual Matrix32 multimesh_instance_get_transform_2d(RID p_multimesh,int p_index) const=0; @@ -167,9 +167,9 @@ public: virtual void skeleton_allocate(RID p_skeleton,int p_bones,bool p_2d_skeleton=false)=0; virtual int skeleton_get_bone_count(RID p_skeleton) const=0; virtual void skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform)=0; - virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone)=0; + virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone) const =0; virtual void skeleton_bone_set_transform_2d(RID p_skeleton,int p_bone, const Matrix32& p_transform)=0; - virtual Matrix32 skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone)=0; + virtual Matrix32 skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone) const=0; /* Light API */ @@ -206,7 +206,7 @@ public: virtual RID room_create()=0; virtual void room_add_bounds(RID p_room, const DVector& p_convex_polygon,float p_height,const Transform& p_transform)=0; - virtual void room_clear_bounds()=0; + virtual void room_clear_bounds(RID p_room)=0; /* PORTAL API */ diff --git a/servers/visual/visual_server_raster.cpp b/servers/visual/visual_server_raster.cpp index 168afac77fd..d6e057bb520 100644 --- a/servers/visual/visual_server_raster.cpp +++ b/servers/visual/visual_server_raster.cpp @@ -37,519 +37,7 @@ // careful, these may run in different threads than the visual server -RID VisualServerRaster::texture_create(){ - return VSG::storage->texture_create(); -} -void VisualServerRaster::texture_allocate(RID p_texture,int p_width, int p_height,Image::Format p_format,uint32_t p_flags){ - - VSG::storage->texture_allocate(p_texture,p_width,p_height,p_format,p_flags); -} -void VisualServerRaster::texture_set_data(RID p_texture,const Image& p_image,CubeMapSide p_cube_side){ - - VSG::storage->texture_set_data(p_texture,p_image,p_cube_side); -} -Image VisualServerRaster::texture_get_data(RID p_texture,CubeMapSide p_cube_side) const{ - - return VSG::storage->texture_get_data(p_texture,p_cube_side); -} -void VisualServerRaster::texture_set_flags(RID p_texture,uint32_t p_flags) { - - VSG::storage->texture_set_flags(p_texture,p_flags); -} -uint32_t VisualServerRaster::texture_get_flags(RID p_texture) const{ - - return VSG::storage->texture_get_flags(p_texture); -} -Image::Format VisualServerRaster::texture_get_format(RID p_texture) const{ - - return VSG::storage->texture_get_format(p_texture); -} -uint32_t VisualServerRaster::texture_get_width(RID p_texture) const{ - - return VSG::storage->texture_get_width(p_texture); -} -uint32_t VisualServerRaster::texture_get_height(RID p_texture) const{ - - return VSG::storage->texture_get_height(p_texture); -} -void VisualServerRaster::texture_set_size_override(RID p_texture,int p_width, int p_height){ - VSG::storage->texture_set_size_override(p_texture,p_width,p_height); - -} - -void VisualServerRaster::texture_set_path(RID p_texture,const String& p_path){ - - VSG::storage->texture_set_path(p_texture,p_path); -} - -String VisualServerRaster::texture_get_path(RID p_texture) const{ - - return VSG::storage->texture_get_path(p_texture); -} - -void VisualServerRaster::texture_set_shrink_all_x2_on_set_data(bool p_enable){ - - VSG::storage->texture_set_shrink_all_x2_on_set_data(p_enable); -} - -void VisualServerRaster::texture_debug_usage(List *r_info){ - - VSG::storage->texture_debug_usage(r_info); -} - - -/* SHADER API */ - - -RID VisualServerRaster::shader_create(ShaderMode p_mode){ - - return VSG::storage->shader_create(p_mode); -} - -void VisualServerRaster::shader_set_mode(RID p_shader,ShaderMode p_mode){ - - VSG::storage->shader_set_mode(p_shader,p_mode); -} -VisualServerRaster::ShaderMode VisualServerRaster::shader_get_mode(RID p_shader) const{ - - return VSG::storage->shader_get_mode(p_shader); -} - -void VisualServerRaster::shader_set_code(RID p_shader, const String& p_code){ - - VSG::storage->shader_set_code(p_shader,p_code); -} -String VisualServerRaster::shader_get_code(RID p_shader) const{ - - return VSG::storage->shader_get_code(p_shader); -} -void VisualServerRaster::shader_get_param_list(RID p_shader, List *p_param_list) const{ - - VSG::storage->shader_get_param_list(p_shader,p_param_list); -} - -void VisualServerRaster::shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture){ - - VSG::storage->shader_set_default_texture_param(p_shader,p_name,p_texture); -} -RID VisualServerRaster::shader_get_default_texture_param(RID p_shader, const StringName& p_name) const{ - - return VSG::storage->shader_get_default_texture_param(p_shader,p_name); -} - - -/* COMMON MATERIAL API */ - -RID VisualServerRaster::material_create(){ - - return VSG::storage->material_create(); -} - -void VisualServerRaster::material_set_shader(RID p_shader_material, RID p_shader){ - - VSG::storage->material_set_shader(p_shader_material,p_shader); -} -RID VisualServerRaster::material_get_shader(RID p_shader_material) const{ - - return VSG::storage->material_get_shader(p_shader_material); -} - -void VisualServerRaster::material_set_param(RID p_material, const StringName& p_param, const Variant& p_value){ - - VSG::storage->material_set_param(p_material,p_param,p_value); -} -Variant VisualServerRaster::material_get_param(RID p_material, const StringName& p_param) const{ - - return VSG::storage->material_get_param(p_material,p_param); -} - -/* MESH API */ - -RID VisualServerRaster::mesh_create(){ - - return VSG::storage->mesh_create(); -} - -void VisualServerRaster::mesh_add_surface(RID p_mesh,uint32_t p_format,PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes){ - - VSG::storage->mesh_add_surface(p_mesh,p_format,p_primitive,p_array,p_vertex_count,p_index_array,p_index_count,p_blend_shapes); -} - -void VisualServerRaster::mesh_set_morph_target_count(RID p_mesh,int p_amount){ - - VSG::storage->mesh_set_morph_target_count(p_mesh,p_amount); -} -int VisualServerRaster::mesh_get_morph_target_count(RID p_mesh) const{ - - return VSG::storage->mesh_get_morph_target_count(p_mesh); -} - - -void VisualServerRaster::mesh_set_morph_target_mode(RID p_mesh,MorphTargetMode p_mode){ - - VSG::storage->mesh_set_morph_target_mode(p_mesh,p_mode); -} -VisualServerRaster::MorphTargetMode VisualServerRaster::mesh_get_morph_target_mode(RID p_mesh) const{ - - return VSG::storage->mesh_get_morph_target_mode(p_mesh); -} - -void VisualServerRaster::mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material){ - - VSG::storage->mesh_surface_set_material(p_mesh,p_surface,p_material); -} -RID VisualServerRaster::mesh_surface_get_material(RID p_mesh, int p_surface) const{ - - return VSG::storage->mesh_surface_get_material(p_mesh,p_surface); -} - -int VisualServerRaster::mesh_surface_get_array_len(RID p_mesh, int p_surface) const{ - - return VSG::storage->mesh_surface_get_array_len(p_mesh,p_surface); -} -int VisualServerRaster::mesh_surface_get_array_index_len(RID p_mesh, int p_surface) const{ - - return VSG::storage->mesh_surface_get_array_index_len(p_mesh,p_surface); -} - -DVector VisualServerRaster::mesh_surface_get_array(RID p_mesh, int p_surface) const{ - - return VSG::storage->mesh_surface_get_array(p_mesh,p_surface); -} -DVector VisualServerRaster::mesh_surface_get_index_array(RID p_mesh, int p_surface) const{ - - return VSG::storage->mesh_surface_get_index_array(p_mesh,p_surface); -} - - -uint32_t VisualServerRaster::mesh_surface_get_format(RID p_mesh, int p_surface) const{ - - return VSG::storage->mesh_surface_get_format(p_mesh,p_surface); -} -VisualServerRaster::PrimitiveType VisualServerRaster::mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const{ - - return VSG::storage->mesh_surface_get_primitive_type(p_mesh,p_surface); -} - -void VisualServerRaster::mesh_remove_surface(RID p_mesh,int p_index){ - - VSG::storage->mesh_remove_surface(p_mesh,p_index); -} -int VisualServerRaster::mesh_get_surface_count(RID p_mesh) const{ - - return VSG::storage->mesh_get_surface_count(p_mesh); -} - -void VisualServerRaster::mesh_set_custom_aabb(RID p_mesh,const AABB& p_aabb){ - - VSG::storage->mesh_set_custom_aabb(p_mesh,p_aabb); -} -AABB VisualServerRaster::mesh_get_custom_aabb(RID p_mesh) const{ - - return VSG::storage->mesh_get_custom_aabb(p_mesh); -} - -void VisualServerRaster::mesh_clear(RID p_mesh){ - - VSG::storage->mesh_clear(p_mesh); -} - -/* MULTIMESH API */ - - -RID VisualServerRaster::multimesh_create() { - - return VSG::storage->multimesh_create(); -} - -void VisualServerRaster::multimesh_allocate(RID p_multimesh,int p_instances,MultimeshTransformFormat p_transform_format,MultimeshColorFormat p_color_format,bool p_gen_aabb){ - - VSG::storage->multimesh_allocate(p_multimesh,p_instances,p_transform_format,p_color_format,p_gen_aabb); -} -int VisualServerRaster::multimesh_get_instance_count(RID p_multimesh) const{ - - return VSG::storage->multimesh_get_instance_count(p_multimesh); -} - -void VisualServerRaster::multimesh_set_mesh(RID p_multimesh,RID p_mesh){ - - VSG::storage->multimesh_set_mesh(p_multimesh,p_mesh); -} -void VisualServerRaster::multimesh_set_custom_aabb(RID p_multimesh,const AABB& p_aabb){ - - VSG::storage->multimesh_set_custom_aabb(p_multimesh,p_aabb); -} -void VisualServerRaster::multimesh_instance_set_transform(RID p_multimesh,int p_index,const Transform& p_transform){ - - VSG::storage->multimesh_instance_set_transform(p_multimesh,p_index,p_transform); -} -void VisualServerRaster::multimesh_instance_set_transform_2d(RID p_multimesh,int p_index,const Matrix32& p_transform){ - - VSG::storage->multimesh_instance_set_transform_2d(p_multimesh,p_index,p_transform); -} -void VisualServerRaster::multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color){ - - VSG::storage->multimesh_instance_set_color(p_multimesh,p_index,p_color); -} - -RID VisualServerRaster::multimesh_get_mesh(RID p_multimesh) const{ - - return VSG::storage->multimesh_get_mesh(p_multimesh); -} -AABB VisualServerRaster::multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const{ - - return VSG::storage->multimesh_get_custom_aabb(p_multimesh,p_aabb); -} - -Transform VisualServerRaster::multimesh_instance_get_transform(RID p_multimesh,int p_index) const{ - - return VSG::storage->multimesh_instance_get_transform(p_multimesh,p_index); -} -Matrix32 VisualServerRaster::multimesh_instance_get_transform_2d(RID p_multimesh,int p_index) const{ - - return VSG::storage->multimesh_instance_get_transform_2d(p_multimesh,p_index); -} -Color VisualServerRaster::multimesh_instance_get_color(RID p_multimesh,int p_index) const{ - - return VSG::storage->multimesh_instance_get_color(p_multimesh,p_index); -} - -void VisualServerRaster::multimesh_set_visible_instances(RID p_multimesh,int p_visible){ - - VSG::storage->multimesh_set_visible_instances(p_multimesh,p_visible); -} -int VisualServerRaster::multimesh_get_visible_instances(RID p_multimesh) const{ - - return VSG::storage->multimesh_get_visible_instances(p_multimesh); -} - - -/* IMMEDIATE API */ - -RID VisualServerRaster::immediate_create(){ - - return VSG::storage->immediate_create(); -} -void VisualServerRaster::immediate_begin(RID p_immediate,PrimitiveType p_rimitive,RID p_texture){ - - VSG::storage->immediate_begin(p_immediate,p_rimitive,p_texture); -} -void VisualServerRaster::immediate_vertex(RID p_immediate,const Vector3& p_vertex){ - - VSG::storage->immediate_vertex(p_immediate,p_vertex); -} -void VisualServerRaster::immediate_vertex_2d(RID p_immediate,const Vector3& p_vertex){ - - VSG::storage->immediate_vertex_2d(p_immediate,p_vertex); - -} -void VisualServerRaster::immediate_normal(RID p_immediate,const Vector3& p_normal){ - - VSG::storage->immediate_normal(p_immediate,p_normal); - -} -void VisualServerRaster::immediate_tangent(RID p_immediate,const Plane& p_tangent){ - - VSG::storage->immediate_tangent(p_immediate,p_tangent); -} -void VisualServerRaster::immediate_color(RID p_immediate,const Color& p_color){ - - VSG::storage->immediate_color(p_immediate,p_color); - -} -void VisualServerRaster::immediate_uv(RID p_immediate,const Vector2& tex_uv){ - - VSG::storage->immediate_uv(p_immediate,tex_uv); - -} -void VisualServerRaster::immediate_uv2(RID p_immediate,const Vector2& tex_uv){ - - VSG::storage->immediate_uv2(p_immediate,tex_uv); - -} -void VisualServerRaster::immediate_end(RID p_immediate){ - - VSG::storage->immediate_end(p_immediate); - -} -void VisualServerRaster::immediate_clear(RID p_immediate){ - - VSG::storage->immediate_clear(p_immediate); - -} -void VisualServerRaster::immediate_set_material(RID p_immediate,RID p_material){ - - VSG::storage->immediate_set_material(p_immediate,p_material); - -} -RID VisualServerRaster::immediate_get_material(RID p_immediate) const{ - - return VSG::storage->immediate_get_material(p_immediate); -} - -/* SKELETON API */ - -RID VisualServerRaster::skeleton_create(){ - - return VSG::storage->skeleton_create(); -} -void VisualServerRaster::skeleton_allocate(RID p_skeleton,int p_bones,bool p_2d_skeleton){ - - VSG::storage->skeleton_allocate(p_skeleton,p_bones,p_2d_skeleton); -} -int VisualServerRaster::skeleton_get_bone_count(RID p_skeleton) const{ - - return VSG::storage->skeleton_get_bone_count(p_skeleton); -} -void VisualServerRaster::skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform){ - - VSG::storage->skeleton_bone_set_transform(p_skeleton,p_bone,p_transform); -} -Transform VisualServerRaster::skeleton_bone_get_transform(RID p_skeleton,int p_bone){ - - return VSG::storage->skeleton_bone_get_transform(p_skeleton,p_bone); -} -void VisualServerRaster::skeleton_bone_set_transform_2d(RID p_skeleton,int p_bone, const Matrix32& p_transform){ - - VSG::storage->skeleton_bone_set_transform_2d(p_skeleton,p_bone,p_transform); -} -Matrix32 VisualServerRaster::skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone){ - - return VSG::storage->skeleton_bone_get_transform_2d(p_skeleton,p_bone); -} - -/* Light API */ - -RID VisualServerRaster::light_create(LightType p_type){ - - return VSG::storage->light_create(p_type); -} - -void VisualServerRaster::light_set_color(RID p_light,const Color& p_color){ - - VSG::storage->light_set_color(p_light,p_color); -} -void VisualServerRaster::light_set_param(RID p_light,LightParam p_param,float p_value){ - - VSG::storage->light_set_param(p_light,p_param,p_value); -} -void VisualServerRaster::light_set_shadow(RID p_light,bool p_enabled){ - - VSG::storage->light_set_shadow(p_light,p_enabled); -} -void VisualServerRaster::light_set_projector(RID p_light,RID p_texture){ - - VSG::storage->light_set_projector(p_light,p_texture); -} -void VisualServerRaster::light_set_attenuation_texure(RID p_light,RID p_texture){ - - VSG::storage->light_set_attenuation_texure(p_light,p_texture); -} -void VisualServerRaster::light_set_negative(RID p_light,bool p_enable){ - - VSG::storage->light_set_negative(p_light,p_enable); -} -void VisualServerRaster::light_set_cull_mask(RID p_light,uint32_t p_mask){ - - VSG::storage->light_set_cull_mask(p_light,p_mask); -} -void VisualServerRaster::light_set_shader(RID p_light,RID p_shader){ - - VSG::storage->light_set_shader(p_light,p_shader); -} - - -void VisualServerRaster::light_directional_set_shadow_mode(RID p_light,LightDirectionalShadowMode p_mode){ - - VSG::storage->light_directional_set_shadow_mode(p_light,p_mode); -} - -/* PROBE API */ - -RID VisualServerRaster::reflection_probe_create(){ - - return VSG::storage->reflection_probe_create(); -} - -void VisualServerRaster::reflection_probe_set_intensity(RID p_probe, float p_intensity){ - - VSG::storage->reflection_probe_set_intensity(p_probe,p_intensity); -} -void VisualServerRaster::reflection_probe_set_clip(RID p_probe, float p_near, float p_far){ - - VSG::storage->reflection_probe_set_clip(p_probe,p_near,p_far); -} -void VisualServerRaster::reflection_probe_set_min_blend_distance(RID p_probe, float p_distance){ - - VSG::storage->reflection_probe_set_min_blend_distance(p_probe,p_distance); -} -void VisualServerRaster::reflection_probe_set_extents(RID p_probe, const Vector3& p_extents){ - - VSG::storage->reflection_probe_set_extents(p_probe,p_extents); -} -void VisualServerRaster::reflection_probe_set_origin_offset(RID p_probe, const Vector3& p_offset){ - - VSG::storage->reflection_probe_set_origin_offset(p_probe,p_offset); -} -void VisualServerRaster::reflection_probe_set_enable_parallax_correction(RID p_probe, bool p_enable){ - - VSG::storage->reflection_probe_set_enable_parallax_correction(p_probe,p_enable); -} -void VisualServerRaster::reflection_probe_set_resolution(RID p_probe, int p_resolution){ - - VSG::storage->reflection_probe_set_resolution(p_probe,p_resolution); -} -void VisualServerRaster::reflection_probe_set_hide_skybox(RID p_probe, bool p_hide){ - - VSG::storage->reflection_probe_set_hide_skybox(p_probe,p_hide); -} -void VisualServerRaster::reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers){ - - VSG::storage->reflection_probe_set_cull_mask(p_probe,p_layers); -} - - -/* ROOM API */ - -RID VisualServerRaster::room_create(){ - - return VSG::storage->room_create(); -} -void VisualServerRaster::room_add_bounds(RID p_room, const DVector& p_convex_polygon,float p_height,const Transform& p_transform){ - - VSG::storage->room_add_bounds(p_room,p_convex_polygon,p_height,p_transform); -} -void VisualServerRaster::room_clear_bounds(){ - - VSG::storage->room_clear_bounds(); -} - -/* PORTAL API */ - -// portals are only (x/y) points, forming a convex shape, which its clockwise -// order points outside. (z is 0); - -RID VisualServerRaster::portal_create(){ - - return VSG::storage->portal_create(); -} -void VisualServerRaster::portal_set_shape(RID p_portal, const Vector& p_shape){ - - VSG::storage->portal_set_shape(p_portal,p_shape); -} -void VisualServerRaster::portal_set_enabled(RID p_portal, bool p_enabled) { - - VSG::storage->portal_set_enabled(p_portal,p_enabled); -} -void VisualServerRaster::portal_set_disable_distance(RID p_portal, float p_distance){ - - VSG::storage->portal_set_disable_distance(p_portal,p_distance); -} -void VisualServerRaster::portal_set_disabled_color(RID p_portal, const Color& p_color){ - - VSG::storage->portal_set_disabled_color(p_portal,p_color); -} /* CAMERA API */ @@ -577,118 +65,6 @@ void VisualServerRaster::camera_set_use_vertical_aspect(RID p_camera,bool p_enab } -/* VIEWPORT TARGET API */ - -RID VisualServerRaster::viewport_create(){ - - return VSG::viewport->viewport_create(); -} - -void VisualServerRaster::viewport_set_size(RID p_viewport,int p_width,int p_height){ - - VSG::viewport->viewport_set_size(p_viewport,p_width,p_height); -} - -void VisualServerRaster::viewport_set_active(RID p_viewport,bool p_active) { - - VSG::viewport->viewport_set_active(p_viewport,p_active); -} - -void VisualServerRaster::viewport_set_clear_mode(RID p_viewport,ViewportClearMode p_clear_mode) { - - VSG::viewport->viewport_set_clear_mode(p_viewport,p_clear_mode); -} - -void VisualServerRaster::viewport_attach_to_screen(RID p_viewport,const Rect2& p_rect,int p_screen){ - - VSG::viewport->viewport_attach_to_screen(p_viewport,p_rect,p_screen); - -} -void VisualServerRaster::viewport_detach(RID p_viewport){ - - VSG::viewport->viewport_detach(p_viewport); - -} - -void VisualServerRaster::viewport_set_update_mode(RID p_viewport,ViewportUpdateMode p_mode){ - - VSG::viewport->viewport_set_update_mode(p_viewport,p_mode); - -} -void VisualServerRaster::viewport_set_vflip(RID p_viewport,bool p_enable){ - - VSG::viewport->viewport_set_vflip(p_viewport,p_enable); - -} - -RID VisualServerRaster::viewport_get_texture(RID p_viewport) const{ - - return VSG::viewport->viewport_get_texture(p_viewport); - -} -Image VisualServerRaster::viewport_capture(RID p_viewport) const{ - - return VSG::viewport->viewport_capture(p_viewport); -} - -void VisualServerRaster::viewport_set_hide_scenario(RID p_viewport,bool p_hide){ - - return VSG::viewport->viewport_set_hide_scenario(p_viewport,p_hide); -} -void VisualServerRaster::viewport_set_hide_canvas(RID p_viewport,bool p_hide){ - - return VSG::viewport->viewport_set_hide_canvas(p_viewport,p_hide); - -} -void VisualServerRaster::viewport_set_disable_environment(RID p_viewport,bool p_disable){ - - return VSG::viewport->viewport_set_disable_environment(p_viewport,p_disable); - -} - -void VisualServerRaster::viewport_attach_camera(RID p_viewport,RID p_camera){ - - return VSG::viewport->viewport_attach_camera(p_viewport,p_camera); - -} -void VisualServerRaster::viewport_set_scenario(RID p_viewport,RID p_scenario){ - - return VSG::viewport->viewport_set_scenario(p_viewport,p_scenario); - -} -void VisualServerRaster::viewport_attach_canvas(RID p_viewport,RID p_canvas){ - - return VSG::viewport->viewport_attach_canvas(p_viewport,p_canvas); - -} -void VisualServerRaster::viewport_remove_canvas(RID p_viewport,RID p_canvas){ - - return VSG::viewport->viewport_remove_canvas(p_viewport,p_canvas); - -} -void VisualServerRaster::viewport_set_canvas_transform(RID p_viewport,RID p_canvas,const Matrix32& p_offset){ - - return VSG::viewport->viewport_set_canvas_transform(p_viewport,p_canvas,p_offset); - -} -void VisualServerRaster::viewport_set_transparent_background(RID p_viewport,bool p_enabled){ - - return VSG::viewport->viewport_set_transparent_background(p_viewport,p_enabled); - -} - -void VisualServerRaster::viewport_set_global_canvas_transform(RID p_viewport,const Matrix32& p_transform){ - - return VSG::viewport->viewport_set_global_canvas_transform(p_viewport,p_transform); - -} -void VisualServerRaster::viewport_set_canvas_layer(RID p_viewport,RID p_canvas,int p_layer){ - - return VSG::viewport->viewport_set_canvas_layer(p_viewport,p_canvas,p_layer); - -} - - /* ENVIRONMENT API */ RID VisualServerRaster::environment_create(){ @@ -836,349 +212,6 @@ void VisualServerRaster::instance_geometry_set_as_instance_lod(RID p_instance,RI } -/* CANVAS (2D) */ - -RID VisualServerRaster::canvas_create(){ - - return VSG::canvas->canvas_create(); - -} -void VisualServerRaster::canvas_set_item_mirroring(RID p_canvas,RID p_item,const Point2& p_mirroring){ - - VSG::canvas->canvas_set_item_mirroring(p_canvas,p_item,p_mirroring); -} -void VisualServerRaster::canvas_set_modulate(RID p_canvas,const Color& p_color){ - - VSG::canvas->canvas_set_modulate(p_canvas,p_color); -} - - -RID VisualServerRaster::canvas_item_create(){ - - return VSG::canvas->canvas_item_create(); -} -void VisualServerRaster::canvas_item_set_parent(RID p_item,RID p_parent){ - - VSG::canvas->canvas_item_set_parent(p_item,p_parent); - -} - -void VisualServerRaster::canvas_item_set_visible(RID p_item,bool p_visible){ - - VSG::canvas->canvas_item_set_visible(p_item,p_visible); - -} -void VisualServerRaster::canvas_item_set_light_mask(RID p_item,int p_mask){ - - VSG::canvas->canvas_item_set_light_mask(p_item,p_mask); - -} - -void VisualServerRaster::canvas_item_set_transform(RID p_item, const Matrix32& p_transform){ - - VSG::canvas->canvas_item_set_transform(p_item,p_transform); - -} -void VisualServerRaster::canvas_item_set_clip(RID p_item, bool p_clip){ - - VSG::canvas->canvas_item_set_clip(p_item,p_clip); - -} -void VisualServerRaster::canvas_item_set_distance_field_mode(RID p_item, bool p_enable){ - - VSG::canvas->canvas_item_set_distance_field_mode(p_item,p_enable); - -} -void VisualServerRaster::canvas_item_set_custom_rect(RID p_item, bool p_custom_rect,const Rect2& p_rect){ - - VSG::canvas->canvas_item_set_custom_rect(p_item,p_custom_rect,p_rect); - -} -void VisualServerRaster::canvas_item_set_modulate(RID p_item, const Color& p_color){ - - VSG::canvas->canvas_item_set_modulate(p_item,p_color); - -} -void VisualServerRaster::canvas_item_set_self_modulate(RID p_item, const Color& p_color){ - - VSG::canvas->canvas_item_set_self_modulate(p_item,p_color); - -} - -void VisualServerRaster::canvas_item_set_draw_behind_parent(RID p_item, bool p_enable){ - - VSG::canvas->canvas_item_set_draw_behind_parent(p_item,p_enable); - -} - - -void VisualServerRaster::canvas_item_add_line(RID p_item, const Point2& p_from, const Point2& p_to,const Color& p_color,float p_width,bool p_antialiased){ - - VSG::canvas->canvas_item_add_line(p_item,p_from,p_to,p_color,p_width,p_antialiased); - -} -void VisualServerRaster::canvas_item_add_rect(RID p_item, const Rect2& p_rect, const Color& p_color){ - - VSG::canvas->canvas_item_add_rect(p_item,p_rect,p_color); - -} -void VisualServerRaster::canvas_item_add_circle(RID p_item, const Point2& p_pos, float p_radius,const Color& p_color){ - - VSG::canvas->canvas_item_add_circle(p_item,p_pos,p_radius,p_color); - -} -void VisualServerRaster::canvas_item_add_texture_rect(RID p_item, const Rect2& p_rect, RID p_texture,bool p_tile,const Color& p_modulate,bool p_transpose){ - - VSG::canvas->canvas_item_add_texture_rect(p_item,p_rect,p_texture,p_tile,p_modulate,p_transpose); - -} -void VisualServerRaster::canvas_item_add_texture_rect_region(RID p_item, const Rect2& p_rect, RID p_texture,const Rect2& p_src_rect,const Color& p_modulate,bool p_transpose){ - - VSG::canvas->canvas_item_add_texture_rect_region(p_item,p_rect,p_texture,p_src_rect,p_modulate,p_transpose); - -} -void VisualServerRaster::canvas_item_add_nine_patch(RID p_item, const Rect2& p_rect, const Rect2& p_source, RID p_texture,const Vector2& p_topleft, const Vector2& p_bottomright,NinePatchAxisMode p_x_axis_mode, NinePatchAxisMode p_y_axis_mode,bool p_draw_center,const Color& p_modulate){ - - VSG::canvas->canvas_item_add_nine_patch(p_item,p_rect,p_source,p_texture,p_topleft,p_bottomright,p_x_axis_mode,p_y_axis_mode,p_draw_center,p_modulate); - -} -void VisualServerRaster::canvas_item_add_primitive(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture,float p_width){ - - VSG::canvas->canvas_item_add_primitive(p_item,p_points,p_colors,p_uvs,p_texture,p_width); - -} -void VisualServerRaster::canvas_item_add_polygon(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture){ - - VSG::canvas->canvas_item_add_polygon(p_item,p_points,p_colors,p_uvs,p_texture); - -} -void VisualServerRaster::canvas_item_add_triangle_array(RID p_item, const Vector& p_indices, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture, int p_count){ - - VSG::canvas->canvas_item_add_triangle_array(p_item,p_indices,p_points,p_colors,p_uvs,p_texture,p_count); - -} -void VisualServerRaster::canvas_item_add_mesh(RID p_item, const RID& p_mesh,RID p_skeleton){ - - VSG::canvas->canvas_item_add_mesh(p_item,p_mesh,p_skeleton); - -} -void VisualServerRaster::canvas_item_add_multimesh(RID p_item, RID p_mesh,RID p_skeleton){ - - VSG::canvas->canvas_item_add_multimesh(p_item,p_mesh,p_skeleton); - -} -void VisualServerRaster::canvas_item_add_set_transform(RID p_item,const Matrix32& p_transform){ - - VSG::canvas->canvas_item_add_set_transform(p_item,p_transform); - -} -void VisualServerRaster::canvas_item_add_clip_ignore(RID p_item, bool p_ignore){ - - VSG::canvas->canvas_item_add_clip_ignore(p_item,p_ignore); - -} -void VisualServerRaster::canvas_item_set_sort_children_by_y(RID p_item, bool p_enable){ - - VSG::canvas->canvas_item_set_sort_children_by_y(p_item,p_enable); - -} -void VisualServerRaster::canvas_item_set_z(RID p_item, int p_z){ - - VSG::canvas->canvas_item_set_z(p_item,p_z); - -} -void VisualServerRaster::canvas_item_set_z_as_relative_to_parent(RID p_item, bool p_enable){ - - VSG::canvas->canvas_item_set_z_as_relative_to_parent(p_item,p_enable); - -} -void VisualServerRaster::canvas_item_set_copy_to_backbuffer(RID p_item, bool p_enable,const Rect2& p_rect){ - - VSG::canvas->canvas_item_set_copy_to_backbuffer(p_item,p_enable,p_rect); - -} - -void VisualServerRaster::canvas_item_clear(RID p_item){ - - VSG::canvas->canvas_item_clear(p_item); - -} -void VisualServerRaster::canvas_item_set_draw_index(RID p_item,int p_index){ - - VSG::canvas->canvas_item_set_draw_index(p_item,p_index); - -} - -void VisualServerRaster::canvas_item_set_material(RID p_item, RID p_material){ - - VSG::canvas->canvas_item_set_material(p_item,p_material); - -} - -void VisualServerRaster::canvas_item_set_use_parent_material(RID p_item, bool p_enable) { - - VSG::canvas->canvas_item_set_use_parent_material(p_item,p_enable); -} - -RID VisualServerRaster::canvas_light_create(){ - - return VSG::canvas->canvas_light_create(); -} - -void VisualServerRaster::canvas_light_attach_to_canvas(RID p_light,RID p_canvas){ - - VSG::canvas->canvas_light_attach_to_canvas(p_light,p_canvas); - -} -void VisualServerRaster::canvas_light_set_enabled(RID p_light, bool p_enabled){ - - VSG::canvas->canvas_light_set_enabled(p_light,p_enabled); - -} -void VisualServerRaster::canvas_light_set_scale(RID p_light, float p_scale){ - - VSG::canvas->canvas_light_set_scale(p_light,p_scale); - -} -void VisualServerRaster::canvas_light_set_transform(RID p_light, const Matrix32& p_transform){ - - VSG::canvas->canvas_light_set_transform(p_light,p_transform); - -} -void VisualServerRaster::canvas_light_set_texture(RID p_light, RID p_texture){ - - VSG::canvas->canvas_light_set_texture(p_light,p_texture); - -} -void VisualServerRaster::canvas_light_set_texture_offset(RID p_light, const Vector2& p_offset){ - - VSG::canvas->canvas_light_set_texture_offset(p_light,p_offset); - -} -void VisualServerRaster::canvas_light_set_color(RID p_light, const Color& p_color){ - - VSG::canvas->canvas_light_set_color(p_light,p_color); - -} -void VisualServerRaster::canvas_light_set_height(RID p_light, float p_height){ - - VSG::canvas->canvas_light_set_height(p_light,p_height); - -} -void VisualServerRaster::canvas_light_set_energy(RID p_light, float p_energy){ - - VSG::canvas->canvas_light_set_energy(p_light,p_energy); - -} -void VisualServerRaster::canvas_light_set_z_range(RID p_light, int p_min_z,int p_max_z){ - - VSG::canvas->canvas_light_set_z_range(p_light,p_min_z,p_max_z); - -} -void VisualServerRaster::canvas_light_set_layer_range(RID p_light, int p_min_layer,int p_max_layer){ - - VSG::canvas->canvas_light_set_layer_range(p_light,p_min_layer,p_max_layer); - -} -void VisualServerRaster::canvas_light_set_item_cull_mask(RID p_light, int p_mask){ - - VSG::canvas->canvas_light_set_item_cull_mask(p_light,p_mask); - -} -void VisualServerRaster::canvas_light_set_item_shadow_cull_mask(RID p_light, int p_mask){ - - VSG::canvas->canvas_light_set_item_shadow_cull_mask(p_light,p_mask); - -} - -void VisualServerRaster::canvas_light_set_mode(RID p_light, CanvasLightMode p_mode){ - - VSG::canvas->canvas_light_set_mode(p_light,p_mode); - -} - - -void VisualServerRaster::canvas_light_set_shadow_enabled(RID p_light, bool p_enabled) { - - VSG::canvas->canvas_light_set_shadow_enabled(p_light,p_enabled); - -} -void VisualServerRaster::canvas_light_set_shadow_buffer_size(RID p_light, int p_size) { - - VSG::canvas->canvas_light_set_shadow_buffer_size(p_light,p_size); - -} -void VisualServerRaster::canvas_light_set_shadow_gradient_length(RID p_light, float p_length) { - - VSG::canvas->canvas_light_set_shadow_gradient_length(p_light,p_length); - -} -void VisualServerRaster::canvas_light_set_shadow_filter(RID p_light, CanvasLightShadowFilter p_filter){ - - VSG::canvas->canvas_light_set_shadow_filter(p_light,p_filter); - -} -void VisualServerRaster::canvas_light_set_shadow_color(RID p_light, const Color& p_color){ - - VSG::canvas->canvas_light_set_shadow_color(p_light,p_color); - -} - - - -RID VisualServerRaster::canvas_light_occluder_create() { - - return VSG::canvas->canvas_light_occluder_create(); -} - -void VisualServerRaster::canvas_light_occluder_attach_to_canvas(RID p_occluder,RID p_canvas){ - - VSG::canvas->canvas_light_occluder_attach_to_canvas(p_occluder,p_canvas); - -} -void VisualServerRaster::canvas_light_occluder_set_enabled(RID p_occluder,bool p_enabled) { - - VSG::canvas->canvas_light_occluder_set_enabled(p_occluder,p_enabled); - -} -void VisualServerRaster::canvas_light_occluder_set_polygon(RID p_occluder,RID p_polygon){ - - VSG::canvas->canvas_light_occluder_set_polygon(p_occluder,p_polygon); - -} -void VisualServerRaster::canvas_light_occluder_set_transform(RID p_occluder,const Matrix32& p_xform){ - - VSG::canvas->canvas_light_occluder_set_transform(p_occluder,p_xform); - -} -void VisualServerRaster::canvas_light_occluder_set_light_mask(RID p_occluder,int p_mask){ - - VSG::canvas->canvas_light_occluder_set_light_mask(p_occluder,p_mask); - -} - -RID VisualServerRaster::canvas_occluder_polygon_create() { - - return VSG::canvas->canvas_occluder_polygon_create(); -} -void VisualServerRaster::canvas_occluder_polygon_set_shape(RID p_occluder_polygon,const DVector& p_shape,bool p_closed){ - - VSG::canvas->canvas_occluder_polygon_set_shape(p_occluder_polygon,p_shape,p_closed); -} -void VisualServerRaster::canvas_occluder_polygon_set_shape_as_lines(RID p_occluder_polygon,const DVector& p_shape){ - - VSG::canvas->canvas_occluder_polygon_set_shape_as_lines(p_occluder_polygon,p_shape); - -} - - -void VisualServerRaster::canvas_occluder_polygon_set_cull_mode(RID p_occluder_polygon,CanvasOccluderPolygonCullMode p_mode){ - - VSG::canvas->canvas_occluder_polygon_set_cull_mode(p_occluder_polygon,p_mode); - -} - - /* CURSOR */ void VisualServerRaster::cursor_set_rotation(float p_rotation, int p_cursor ){ @@ -1224,7 +257,7 @@ void VisualServerRaster::draw(){ //if (changes) // print_line("changes: "+itos(changes)); -// changes=0; + changes=0; VSG::rasterizer->begin_frame(); VSG::viewport->draw_viewports(); //_draw_cursors_and_margins(); @@ -1236,7 +269,7 @@ void VisualServerRaster::sync(){ } bool VisualServerRaster::has_changed() const{ - return false; + return changes>0; } void VisualServerRaster::init(){ diff --git a/servers/visual/visual_server_raster.h b/servers/visual/visual_server_raster.h index 4cad03928b3..cd6d7b38d36 100644 --- a/servers/visual/visual_server_raster.h +++ b/servers/visual/visual_server_raster.h @@ -34,7 +34,9 @@ #include "servers/visual/rasterizer.h" #include "allocators.h" #include "octree.h" - +#include "visual_server_global.h" +#include "visual_server_viewport.h" +#include "visual_server_canvas.h" /** @author Juan Linietsky */ @@ -139,7 +141,7 @@ class VisualServerRaster : public VisualServer { Transform transform; - Camera() { + Camera() { visible_layers=0xFFFFFFFF; fov=60; @@ -148,8 +150,8 @@ class VisualServerRaster : public VisualServer { size=1.0; vaspect=false; - } - }; + } + }; struct Instance; @@ -580,188 +582,209 @@ class VisualServerRaster : public VisualServer { public: +#define DISPLAY_CHANGED changes++; + +#define BIND0R(m_r,m_name) m_r m_name() { return BINDBASE->m_name(); } +#define BIND1R(m_r,m_name,m_type1) m_r m_name(m_type1 arg1) { return BINDBASE->m_name(arg1); } +#define BIND1RC(m_r,m_name,m_type1) m_r m_name(m_type1 arg1) const { return BINDBASE->m_name(arg1); } +#define BIND2RC(m_r,m_name,m_type1,m_type2) m_r m_name(m_type1 arg1,m_type2 arg2) const { return BINDBASE->m_name(arg1,arg2); } +#define BIND3RC(m_r,m_name,m_type1,m_type2,m_type3) m_r m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3) const { return BINDBASE->m_name(arg1,arg2,arg3); } +#define BIND4RC(m_r,m_name,m_type1,m_type2,m_type3,m_type4) m_r m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4) const { return BINDBASE->m_name(arg1,arg2,arg3,arg4); } + +#define BIND1(m_name,m_type1) void m_name(m_type1 arg1) { DISPLAY_CHANGED BINDBASE->m_name(arg1); } +#define BIND2(m_name,m_type1,m_type2) void m_name(m_type1 arg1,m_type2 arg2) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2); } +#define BIND2C(m_name,m_type1,m_type2) void m_name(m_type1 arg1,m_type2 arg2) const { BINDBASE->m_name(arg1,arg2); } +#define BIND3(m_name,m_type1,m_type2,m_type3) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3); } +#define BIND4(m_name,m_type1,m_type2,m_type3,m_type4) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3,arg4); } +#define BIND5(m_name,m_type1,m_type2,m_type3,m_type4,m_type5) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4,m_type5 arg5) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3,arg4,arg5); } +#define BIND6(m_name,m_type1,m_type2,m_type3,m_type4,m_type5,m_type6) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4,m_type5 arg5,m_type6 arg6) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3,arg4,arg5,arg6); } +#define BIND7(m_name,m_type1,m_type2,m_type3,m_type4,m_type5,m_type6,m_type7) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4,m_type5 arg5,m_type6 arg6,m_type7 arg7) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3,arg4,arg5,arg6,arg7); } +#define BIND8(m_name,m_type1,m_type2,m_type3,m_type4,m_type5,m_type6,m_type7,m_type8) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4,m_type5 arg5,m_type6 arg6,m_type7 arg7,m_type8 arg8) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8); } +#define BIND10(m_name,m_type1,m_type2,m_type3,m_type4,m_type5,m_type6,m_type7,m_type8,m_type9,m_type10) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4,m_type5 arg5,m_type6 arg6,m_type7 arg7,m_type8 arg8,m_type9 arg9,m_type10 arg10) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10); } + +//from now on, calls forwarded to this singleton +#define BINDBASE VSG::storage /* TEXTURE API */ - virtual RID texture_create(); - virtual void texture_allocate(RID p_texture,int p_width, int p_height,Image::Format p_format,uint32_t p_flags=TEXTURE_FLAGS_DEFAULT); - virtual void texture_set_data(RID p_texture,const Image& p_image,CubeMapSide p_cube_side=CUBEMAP_LEFT); - virtual Image texture_get_data(RID p_texture,CubeMapSide p_cube_side=CUBEMAP_LEFT) const; - virtual void texture_set_flags(RID p_texture,uint32_t p_flags) ; - virtual uint32_t texture_get_flags(RID p_texture) const; - virtual Image::Format texture_get_format(RID p_texture) const; - virtual uint32_t texture_get_width(RID p_texture) const; - virtual uint32_t texture_get_height(RID p_texture) const; - virtual void texture_set_size_override(RID p_texture,int p_width, int p_height); + + BIND0R(RID,texture_create) + BIND5(texture_allocate,RID,int,int,Image::Format,uint32_t) + BIND3(texture_set_data,RID,const Image&,CubeMapSide) + BIND2RC(Image,texture_get_data,RID,CubeMapSide) + BIND2(texture_set_flags,RID,uint32_t) + BIND1RC(uint32_t,texture_get_flags,RID) + BIND1RC(Image::Format,texture_get_format,RID) + BIND1RC(uint32_t,texture_get_width,RID) + BIND1RC(uint32_t,texture_get_height,RID) + BIND3(texture_set_size_override,RID,int,int) - virtual void texture_set_path(RID p_texture,const String& p_path); - virtual String texture_get_path(RID p_texture) const; + BIND2(texture_set_path,RID,const String&) + BIND1RC(String,texture_get_path,RID) + BIND1(texture_set_shrink_all_x2_on_set_data,bool) + BIND1(texture_debug_usage,List*) - virtual void texture_set_shrink_all_x2_on_set_data(bool p_enable); - - virtual void texture_debug_usage(List *r_info); /* SHADER API */ + BIND1R(RID,shader_create,ShaderMode) - virtual RID shader_create(ShaderMode p_mode=SHADER_SPATIAL); - virtual void shader_set_mode(RID p_shader,ShaderMode p_mode); - virtual ShaderMode shader_get_mode(RID p_shader) const; + BIND2(shader_set_mode,RID,ShaderMode) + BIND1RC(ShaderMode,shader_get_mode,RID) - virtual void shader_set_code(RID p_shader, const String& p_code); - virtual String shader_get_code(RID p_shader) const; - virtual void shader_get_param_list(RID p_shader, List *p_param_list) const; + BIND2(shader_set_code,RID,const String&) + BIND1RC(String,shader_get_code,RID) - virtual void shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture); - virtual RID shader_get_default_texture_param(RID p_shader, const StringName& p_name) const; + BIND2C(shader_get_param_list,RID, List *) + + BIND3(shader_set_default_texture_param,RID,const StringName&,RID) + BIND2RC(RID,shader_get_default_texture_param,RID,const StringName&) /* COMMON MATERIAL API */ - virtual RID material_create(); + BIND0R(RID,material_create) - virtual void material_set_shader(RID p_shader_material, RID p_shader); - virtual RID material_get_shader(RID p_shader_material) const; + BIND2(material_set_shader,RID,RID) + BIND1RC(RID,material_get_shader,RID) - virtual void material_set_param(RID p_material, const StringName& p_param, const Variant& p_value); - virtual Variant material_get_param(RID p_material, const StringName& p_param) const; + BIND3(material_set_param,RID, const StringName&, const Variant& ) + BIND2RC(Variant,material_get_param,RID, const StringName& ) /* MESH API */ - virtual RID mesh_create(); + BIND0R(RID,mesh_create) - virtual void mesh_add_surface(RID p_mesh,uint32_t p_format,PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes=Vector >()); + BIND8(mesh_add_surface,RID,uint32_t,PrimitiveType,const DVector&,int ,const DVector& ,int ,const Vector >& ) - virtual void mesh_set_morph_target_count(RID p_mesh,int p_amount); - virtual int mesh_get_morph_target_count(RID p_mesh) const; + BIND2(mesh_set_morph_target_count,RID,int) + BIND1RC(int,mesh_get_morph_target_count,RID) - virtual void mesh_set_morph_target_mode(RID p_mesh,MorphTargetMode p_mode); - virtual MorphTargetMode mesh_get_morph_target_mode(RID p_mesh) const; + BIND2(mesh_set_morph_target_mode,RID,MorphTargetMode) + BIND1RC(MorphTargetMode, mesh_get_morph_target_mode,RID ) - virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material); - virtual RID mesh_surface_get_material(RID p_mesh, int p_surface) const; + BIND3(mesh_surface_set_material,RID, int , RID ) + BIND2RC(RID,mesh_surface_get_material,RID, int ) - virtual int mesh_surface_get_array_len(RID p_mesh, int p_surface) const; - virtual int mesh_surface_get_array_index_len(RID p_mesh, int p_surface) const; + BIND2RC(int,mesh_surface_get_array_len,RID,int) + BIND2RC(int,mesh_surface_get_array_index_len,RID,int) - virtual DVector mesh_surface_get_array(RID p_mesh, int p_surface) const; - virtual DVector mesh_surface_get_index_array(RID p_mesh, int p_surface) const; + BIND2RC(DVector,mesh_surface_get_array,RID,int) + BIND2RC(DVector,mesh_surface_get_index_array,RID, int) + BIND2RC(uint32_t,mesh_surface_get_format,RID,int) + BIND2RC(PrimitiveType,mesh_surface_get_primitive_type,RID,int) - virtual uint32_t mesh_surface_get_format(RID p_mesh, int p_surface) const; - virtual PrimitiveType mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const; + BIND2(mesh_remove_surface,RID,int) + BIND1RC(int,mesh_get_surface_count,RID) - virtual void mesh_remove_surface(RID p_mesh,int p_index); - virtual int mesh_get_surface_count(RID p_mesh) const; + BIND2(mesh_set_custom_aabb,RID,const AABB&) + BIND1RC(AABB,mesh_get_custom_aabb,RID) - virtual void mesh_set_custom_aabb(RID p_mesh,const AABB& p_aabb); - virtual AABB mesh_get_custom_aabb(RID p_mesh) const; - - virtual void mesh_clear(RID p_mesh); + BIND1(mesh_clear,RID) /* MULTIMESH API */ - virtual RID multimesh_create(); + BIND0R(RID,multimesh_create) - virtual void multimesh_allocate(RID p_multimesh,int p_instances,MultimeshTransformFormat p_transform_format,MultimeshColorFormat p_color_format,bool p_gen_aabb=true); - virtual int multimesh_get_instance_count(RID p_multimesh) const; + BIND5(multimesh_allocate,RID,int,MultimeshTransformFormat,MultimeshColorFormat,bool) + BIND1RC(int,multimesh_get_instance_count,RID) - virtual void multimesh_set_mesh(RID p_multimesh,RID p_mesh); - virtual void multimesh_set_custom_aabb(RID p_multimesh,const AABB& p_aabb); - virtual void multimesh_instance_set_transform(RID p_multimesh,int p_index,const Transform& p_transform); - virtual void multimesh_instance_set_transform_2d(RID p_multimesh,int p_index,const Matrix32& p_transform); - virtual void multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color); + BIND2(multimesh_set_mesh,RID,RID) + BIND2(multimesh_set_custom_aabb,RID,const AABB&) + BIND3(multimesh_instance_set_transform,RID,int,const Transform&) + BIND3(multimesh_instance_set_transform_2d,RID,int,const Matrix32& ) + BIND3(multimesh_instance_set_color,RID,int,const Color&) - virtual RID multimesh_get_mesh(RID p_multimesh) const; - virtual AABB multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const; + BIND1RC(RID,multimesh_get_mesh,RID) + BIND1RC(AABB,multimesh_get_custom_aabb,RID) - virtual Transform multimesh_instance_get_transform(RID p_multimesh,int p_index) const; - virtual Matrix32 multimesh_instance_get_transform_2d(RID p_multimesh,int p_index) const; - virtual Color multimesh_instance_get_color(RID p_multimesh,int p_index) const; + BIND2RC(Transform,multimesh_instance_get_transform,RID,int ) + BIND2RC(Matrix32,multimesh_instance_get_transform_2d,RID,int) + BIND2RC(Color,multimesh_instance_get_color,RID,int) - virtual void multimesh_set_visible_instances(RID p_multimesh,int p_visible); - virtual int multimesh_get_visible_instances(RID p_multimesh) const; + BIND2(multimesh_set_visible_instances,RID,int) + BIND1RC(int,multimesh_get_visible_instances,RID) /* IMMEDIATE API */ - virtual RID immediate_create(); - virtual void immediate_begin(RID p_immediate,PrimitiveType p_rimitive,RID p_texture=RID()); - virtual void immediate_vertex(RID p_immediate,const Vector3& p_vertex); - virtual void immediate_vertex_2d(RID p_immediate,const Vector3& p_vertex); - virtual void immediate_normal(RID p_immediate,const Vector3& p_normal); - virtual void immediate_tangent(RID p_immediate,const Plane& p_tangent); - virtual void immediate_color(RID p_immediate,const Color& p_color); - virtual void immediate_uv(RID p_immediate,const Vector2& tex_uv); - virtual void immediate_uv2(RID p_immediate,const Vector2& tex_uv); - virtual void immediate_end(RID p_immediate); - virtual void immediate_clear(RID p_immediate); - virtual void immediate_set_material(RID p_immediate,RID p_material); - virtual RID immediate_get_material(RID p_immediate) const; + BIND0R(RID,immediate_create) + BIND3(immediate_begin,RID,PrimitiveType,RID) + BIND2(immediate_vertex,RID,const Vector3&) + BIND2(immediate_vertex_2d,RID,const Vector3&) + BIND2(immediate_normal,RID,const Vector3&) + BIND2(immediate_tangent,RID,const Plane&) + BIND2(immediate_color,RID,const Color&) + BIND2(immediate_uv,RID,const Vector2& ) + BIND2(immediate_uv2,RID,const Vector2&) + BIND1(immediate_end,RID) + BIND1(immediate_clear,RID) + BIND2(immediate_set_material,RID ,RID ) + BIND1RC(RID,immediate_get_material,RID) /* SKELETON API */ - virtual RID skeleton_create(); - virtual void skeleton_allocate(RID p_skeleton,int p_bones,bool p_2d_skeleton=false); - virtual int skeleton_get_bone_count(RID p_skeleton) const; - virtual void skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform); - virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone); - virtual void skeleton_bone_set_transform_2d(RID p_skeleton,int p_bone, const Matrix32& p_transform); - virtual Matrix32 skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone); + BIND0R(RID,skeleton_create) + BIND3(skeleton_allocate,RID,int,bool) + BIND1RC(int,skeleton_get_bone_count,RID) + BIND3(skeleton_bone_set_transform,RID,int,const Transform&) + BIND2RC(Transform,skeleton_bone_get_transform,RID,int) + BIND3(skeleton_bone_set_transform_2d,RID,int, const Matrix32& ) + BIND2RC(Matrix32,skeleton_bone_get_transform_2d,RID,int) /* Light API */ - virtual RID light_create(LightType p_type); + BIND1R(RID,light_create,LightType) - virtual void light_set_color(RID p_light,const Color& p_color); - virtual void light_set_param(RID p_light,LightParam p_param,float p_value); - virtual void light_set_shadow(RID p_light,bool p_enabled); - virtual void light_set_projector(RID p_light,RID p_texture); - virtual void light_set_attenuation_texure(RID p_light,RID p_texture); - virtual void light_set_negative(RID p_light,bool p_enable); - virtual void light_set_cull_mask(RID p_light,uint32_t p_mask); - virtual void light_set_shader(RID p_light,RID p_shader); + BIND2(light_set_color,RID,const Color&) + BIND3(light_set_param,RID ,LightParam ,float ) + BIND2(light_set_shadow,RID ,bool ) + BIND2(light_set_projector,RID,RID ) + BIND2(light_set_attenuation_texure,RID,RID ) + BIND2(light_set_negative,RID,bool ) + BIND2(light_set_cull_mask,RID ,uint32_t ) + BIND2(light_set_shader,RID ,RID ) - - virtual void light_directional_set_shadow_mode(RID p_light,LightDirectionalShadowMode p_mode); + BIND2(light_directional_set_shadow_mode,RID,LightDirectionalShadowMode) /* PROBE API */ - virtual RID reflection_probe_create(); + BIND0R(RID,reflection_probe_create) - virtual void reflection_probe_set_intensity(RID p_probe, float p_intensity); - virtual void reflection_probe_set_clip(RID p_probe, float p_near, float p_far); - virtual void reflection_probe_set_min_blend_distance(RID p_probe, float p_distance); - virtual void reflection_probe_set_extents(RID p_probe, const Vector3& p_extents); - virtual void reflection_probe_set_origin_offset(RID p_probe, const Vector3& p_offset); - virtual void reflection_probe_set_enable_parallax_correction(RID p_probe, bool p_enable); - virtual void reflection_probe_set_resolution(RID p_probe, int p_resolution); - virtual void reflection_probe_set_hide_skybox(RID p_probe, bool p_hide); - virtual void reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers); + BIND2(reflection_probe_set_intensity,RID, float ) + BIND3(reflection_probe_set_clip,RID, float , float ) + BIND2(reflection_probe_set_min_blend_distance,RID, float ) + BIND2(reflection_probe_set_extents,RID, const Vector3& ) + BIND2(reflection_probe_set_origin_offset,RID, const Vector3& ) + BIND2(reflection_probe_set_enable_parallax_correction,RID, bool ) + BIND2(reflection_probe_set_resolution,RID, int ) + BIND2(reflection_probe_set_hide_skybox,RID, bool ) + BIND2(reflection_probe_set_cull_mask,RID, uint32_t ) /* ROOM API */ - virtual RID room_create(); - virtual void room_add_bounds(RID p_room, const DVector& p_convex_polygon,float p_height,const Transform& p_transform); - virtual void room_clear_bounds(); + BIND0R(RID,room_create) + BIND4(room_add_bounds,RID, const DVector& ,float ,const Transform& ) + BIND1(room_clear_bounds,RID) /* PORTAL API */ // portals are only (x/y) points, forming a convex shape, which its clockwise // order points outside. (z is 0); - virtual RID portal_create(); - virtual void portal_set_shape(RID p_portal, const Vector& p_shape); - virtual void portal_set_enabled(RID p_portal, bool p_enabled); - virtual void portal_set_disable_distance(RID p_portal, float p_distance); - virtual void portal_set_disabled_color(RID p_portal, const Color& p_color); + BIND0R(RID,portal_create) + BIND2(portal_set_shape,RID , const Vector& ) + BIND2(portal_set_enabled,RID , bool ) + BIND2(portal_set_disable_distance,RID , float ) + BIND2(portal_set_disabled_color,RID , const Color& ) /* CAMERA API */ @@ -773,40 +796,44 @@ public: virtual void camera_set_environment(RID p_camera,RID p_env); virtual void camera_set_use_vertical_aspect(RID p_camera,bool p_enable); +#undef BINDBASE +//from now on, calls forwarded to this singleton +#define BINDBASE VSG::viewport /* VIEWPORT TARGET API */ - virtual RID viewport_create(); + BIND0R(RID,viewport_create) - virtual void viewport_set_size(RID p_viewport,int p_width,int p_height); + BIND3(viewport_set_size,RID,int ,int ) - virtual void viewport_set_active(RID p_viewport,bool p_active); + BIND2(viewport_set_active,RID ,bool ) - virtual void viewport_set_clear_mode(RID p_viewport,ViewportClearMode p_clear_mode); + BIND2(viewport_set_clear_mode,RID,ViewportClearMode ) - virtual void viewport_attach_to_screen(RID p_viewport,const Rect2& p_rect=Rect2(),int p_screen=0); - virtual void viewport_detach(RID p_viewport); + BIND3(viewport_attach_to_screen,RID ,const Rect2& ,int ) + BIND1(viewport_detach,RID) - virtual void viewport_set_update_mode(RID p_viewport,ViewportUpdateMode p_mode); - virtual void viewport_set_vflip(RID p_viewport,bool p_enable); + BIND2(viewport_set_update_mode,RID,ViewportUpdateMode ) + BIND2(viewport_set_vflip,RID,bool) - virtual RID viewport_get_texture(RID p_viewport) const; - virtual Image viewport_capture(RID p_viewport) const; + BIND1RC(RID,viewport_get_texture,RID ) + BIND1RC(Image,viewport_capture,RID ) - virtual void viewport_set_hide_scenario(RID p_viewport,bool p_hide); - virtual void viewport_set_hide_canvas(RID p_viewport,bool p_hide); - virtual void viewport_set_disable_environment(RID p_viewport,bool p_disable); + BIND2(viewport_set_hide_scenario,RID,bool ) + BIND2(viewport_set_hide_canvas,RID,bool ) + BIND2(viewport_set_disable_environment,RID,bool ) - virtual void viewport_attach_camera(RID p_viewport,RID p_camera); - virtual void viewport_set_scenario(RID p_viewport,RID p_scenario); - virtual void viewport_attach_canvas(RID p_viewport,RID p_canvas); - virtual void viewport_remove_canvas(RID p_viewport,RID p_canvas); - virtual void viewport_set_canvas_transform(RID p_viewport,RID p_canvas,const Matrix32& p_offset); - virtual void viewport_set_transparent_background(RID p_viewport,bool p_enabled); + BIND2(viewport_attach_camera,RID,RID ) + BIND2(viewport_set_scenario,RID,RID ) + BIND2(viewport_attach_canvas,RID,RID ) - virtual void viewport_set_global_canvas_transform(RID p_viewport,const Matrix32& p_transform); - virtual void viewport_set_canvas_layer(RID p_viewport,RID p_canvas,int p_layer); + BIND2(viewport_remove_canvas,RID,RID ) + BIND3(viewport_set_canvas_transform,RID ,RID ,const Matrix32& ) + BIND2(viewport_set_transparent_background,RID ,bool ) + + BIND2(viewport_set_global_canvas_transform,RID,const Matrix32& ) + BIND3(viewport_set_canvas_layer,RID ,RID ,int ) /* ENVIRONMENT API */ @@ -872,93 +899,99 @@ public: virtual void instance_geometry_set_draw_range(RID p_instance,float p_min,float p_max,float p_min_margin,float p_max_margin); virtual void instance_geometry_set_as_instance_lod(RID p_instance,RID p_as_lod_of_instance); + +#undef BINDBASE +//from now on, calls forwarded to this singleton +#define BINDBASE VSG::canvas + /* CANVAS (2D) */ - virtual RID canvas_create(); - virtual void canvas_set_item_mirroring(RID p_canvas,RID p_item,const Point2& p_mirroring); - virtual void canvas_set_modulate(RID p_canvas,const Color& p_color); + BIND0R(RID,canvas_create) + BIND3(canvas_set_item_mirroring,RID ,RID ,const Point2& ) + BIND2(canvas_set_modulate,RID,const Color&) - virtual RID canvas_item_create(); - virtual void canvas_item_set_parent(RID p_item,RID p_parent); + BIND0R(RID,canvas_item_create) + BIND2(canvas_item_set_parent,RID ,RID) - virtual void canvas_item_set_visible(RID p_item,bool p_visible); - virtual void canvas_item_set_light_mask(RID p_item,int p_mask); + BIND2(canvas_item_set_visible,RID,bool ) + BIND2(canvas_item_set_light_mask,RID,int ) - virtual void canvas_item_set_transform(RID p_item, const Matrix32& p_transform); - virtual void canvas_item_set_clip(RID p_item, bool p_clip); - virtual void canvas_item_set_distance_field_mode(RID p_item, bool p_enable); - virtual void canvas_item_set_custom_rect(RID p_item, bool p_custom_rect,const Rect2& p_rect=Rect2()); - virtual void canvas_item_set_modulate(RID p_item, const Color& p_color); - virtual void canvas_item_set_self_modulate(RID p_item, const Color& p_color); + BIND2(canvas_item_set_transform,RID, const Matrix32& ) + BIND2(canvas_item_set_clip,RID, bool ) + BIND2(canvas_item_set_distance_field_mode,RID, bool ) + BIND3(canvas_item_set_custom_rect,RID, bool ,const Rect2& ) + BIND2(canvas_item_set_modulate,RID, const Color& ) + BIND2(canvas_item_set_self_modulate,RID, const Color& ) - virtual void canvas_item_set_draw_behind_parent(RID p_item, bool p_enable); + BIND2(canvas_item_set_draw_behind_parent,RID, bool ) - virtual void canvas_item_add_line(RID p_item, const Point2& p_from, const Point2& p_to,const Color& p_color,float p_width=1.0,bool p_antialiased=false); - virtual void canvas_item_add_rect(RID p_item, const Rect2& p_rect, const Color& p_color); - virtual void canvas_item_add_circle(RID p_item, const Point2& p_pos, float p_radius,const Color& p_color); - virtual void canvas_item_add_texture_rect(RID p_item, const Rect2& p_rect, RID p_texture,bool p_tile=false,const Color& p_modulate=Color(1,1,1),bool p_transpose=false); - virtual void canvas_item_add_texture_rect_region(RID p_item, const Rect2& p_rect, RID p_texture,const Rect2& p_src_rect,const Color& p_modulate=Color(1,1,1),bool p_transpose=false); - virtual void canvas_item_add_nine_patch(RID p_item, const Rect2& p_rect, const Rect2& p_source, RID p_texture,const Vector2& p_topleft, const Vector2& p_bottomright,NinePatchAxisMode p_x_axis_mode=NINE_PATCH_STRETCH, NinePatchAxisMode p_y_axis_mode=NINE_PATCH_STRETCH,bool p_draw_center=true,const Color& p_modulate=Color(1,1,1)); - virtual void canvas_item_add_primitive(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs, RID p_texture,float p_width=1.0); - virtual void canvas_item_add_polygon(RID p_item, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs=Vector(), RID p_texture=RID()); - virtual void canvas_item_add_triangle_array(RID p_item, const Vector& p_indices, const Vector& p_points, const Vector& p_colors,const Vector& p_uvs=Vector(), RID p_texture=RID(), int p_count=-1); - virtual void canvas_item_add_mesh(RID p_item, const RID& p_mesh,RID p_skeleton=RID()); - virtual void canvas_item_add_multimesh(RID p_item, RID p_mesh,RID p_skeleton=RID()); - virtual void canvas_item_add_set_transform(RID p_item,const Matrix32& p_transform); - virtual void canvas_item_add_clip_ignore(RID p_item, bool p_ignore); - virtual void canvas_item_set_sort_children_by_y(RID p_item, bool p_enable); - virtual void canvas_item_set_z(RID p_item, int p_z); - virtual void canvas_item_set_z_as_relative_to_parent(RID p_item, bool p_enable); - virtual void canvas_item_set_copy_to_backbuffer(RID p_item, bool p_enable,const Rect2& p_rect); + BIND6(canvas_item_add_line,RID, const Point2& , const Point2& ,const Color& ,float ,bool ) + BIND3(canvas_item_add_rect,RID, const Rect2& , const Color& ) + BIND4(canvas_item_add_circle,RID, const Point2& , float ,const Color& ) + BIND6(canvas_item_add_texture_rect,RID, const Rect2& , RID ,bool ,const Color& ,bool ) + BIND6(canvas_item_add_texture_rect_region,RID, const Rect2& , RID ,const Rect2& ,const Color& ,bool ) + BIND10(canvas_item_add_nine_patch,RID, const Rect2& , const Rect2& , RID ,const Vector2& , const Vector2& ,NinePatchAxisMode , NinePatchAxisMode,bool ,const Color& ) + BIND6(canvas_item_add_primitive,RID, const Vector& , const Vector& ,const Vector& , RID ,float ) + BIND5(canvas_item_add_polygon,RID, const Vector& , const Vector& ,const Vector& , RID ) + BIND7(canvas_item_add_triangle_array,RID, const Vector& , const Vector& , const Vector& ,const Vector& , RID , int) + BIND3(canvas_item_add_mesh,RID, const RID& ,RID ) + BIND3(canvas_item_add_multimesh,RID, RID ,RID ) + BIND2(canvas_item_add_set_transform,RID,const Matrix32& ) + BIND2(canvas_item_add_clip_ignore,RID, bool ) + BIND2(canvas_item_set_sort_children_by_y,RID, bool ) + BIND2(canvas_item_set_z,RID, int ) + BIND2(canvas_item_set_z_as_relative_to_parent,RID, bool ) + BIND3(canvas_item_set_copy_to_backbuffer,RID, bool ,const Rect2& ) - virtual void canvas_item_clear(RID p_item); - virtual void canvas_item_set_draw_index(RID p_item,int p_index); + BIND1(canvas_item_clear,RID ) + BIND2(canvas_item_set_draw_index,RID,int) - virtual void canvas_item_set_material(RID p_item, RID p_material); + BIND2(canvas_item_set_material,RID, RID ) - virtual void canvas_item_set_use_parent_material(RID p_item, bool p_enable); - - virtual RID canvas_light_create(); - virtual void canvas_light_attach_to_canvas(RID p_light,RID p_canvas); - virtual void canvas_light_set_enabled(RID p_light, bool p_enabled); - virtual void canvas_light_set_scale(RID p_light, float p_scale); - virtual void canvas_light_set_transform(RID p_light, const Matrix32& p_transform); - virtual void canvas_light_set_texture(RID p_light, RID p_texture); - virtual void canvas_light_set_texture_offset(RID p_light, const Vector2& p_offset); - virtual void canvas_light_set_color(RID p_light, const Color& p_color); - virtual void canvas_light_set_height(RID p_light, float p_height); - virtual void canvas_light_set_energy(RID p_light, float p_energy); - virtual void canvas_light_set_z_range(RID p_light, int p_min_z,int p_max_z); - virtual void canvas_light_set_layer_range(RID p_light, int p_min_layer,int p_max_layer); - virtual void canvas_light_set_item_cull_mask(RID p_light, int p_mask); - virtual void canvas_light_set_item_shadow_cull_mask(RID p_light, int p_mask); - - virtual void canvas_light_set_mode(RID p_light, CanvasLightMode p_mode); + BIND2(canvas_item_set_use_parent_material,RID, bool ) - virtual void canvas_light_set_shadow_enabled(RID p_light, bool p_enabled); - virtual void canvas_light_set_shadow_buffer_size(RID p_light, int p_size); - virtual void canvas_light_set_shadow_gradient_length(RID p_light, float p_length); - virtual void canvas_light_set_shadow_filter(RID p_light, CanvasLightShadowFilter p_filter); - virtual void canvas_light_set_shadow_color(RID p_light, const Color& p_color); + BIND0R(RID,canvas_light_create) + BIND2(canvas_light_attach_to_canvas,RID,RID ) + BIND2(canvas_light_set_enabled,RID, bool ) + BIND2(canvas_light_set_scale,RID, float ) + BIND2(canvas_light_set_transform,RID, const Matrix32& ) + BIND2(canvas_light_set_texture,RID, RID ) + BIND2(canvas_light_set_texture_offset,RID, const Vector2& ) + BIND2(canvas_light_set_color,RID, const Color& ) + BIND2(canvas_light_set_height,RID, float ) + BIND2(canvas_light_set_energy,RID, float ) + BIND3(canvas_light_set_z_range,RID, int ,int ) + BIND3(canvas_light_set_layer_range,RID, int ,int ) + BIND2(canvas_light_set_item_cull_mask,RID, int ) + BIND2(canvas_light_set_item_shadow_cull_mask,RID, int ) + + BIND2(canvas_light_set_mode,RID, CanvasLightMode ) + + BIND2(canvas_light_set_shadow_enabled,RID, bool ) + BIND2(canvas_light_set_shadow_buffer_size,RID, int ) + BIND2(canvas_light_set_shadow_gradient_length,RID, float ) + BIND2(canvas_light_set_shadow_filter,RID, CanvasLightShadowFilter ) + BIND2(canvas_light_set_shadow_color,RID, const Color& ) - virtual RID canvas_light_occluder_create(); - virtual void canvas_light_occluder_attach_to_canvas(RID p_occluder,RID p_canvas); - virtual void canvas_light_occluder_set_enabled(RID p_occluder,bool p_enabled); - virtual void canvas_light_occluder_set_polygon(RID p_occluder,RID p_polygon); - virtual void canvas_light_occluder_set_transform(RID p_occluder,const Matrix32& p_xform); - virtual void canvas_light_occluder_set_light_mask(RID p_occluder,int p_mask); - - virtual RID canvas_occluder_polygon_create(); - virtual void canvas_occluder_polygon_set_shape(RID p_occluder_polygon,const DVector& p_shape,bool p_closed); - virtual void canvas_occluder_polygon_set_shape_as_lines(RID p_occluder_polygon,const DVector& p_shape); + BIND0R(RID,canvas_light_occluder_create) + BIND2(canvas_light_occluder_attach_to_canvas,RID,RID ) + BIND2(canvas_light_occluder_set_enabled,RID,bool ) + BIND2(canvas_light_occluder_set_polygon,RID,RID ) + BIND2(canvas_light_occluder_set_transform,RID,const Matrix32& ) + BIND2(canvas_light_occluder_set_light_mask,RID,int ) - virtual void canvas_occluder_polygon_set_cull_mode(RID p_occluder_polygon,CanvasOccluderPolygonCullMode p_mode); + BIND0R(RID,canvas_occluder_polygon_create) + BIND3(canvas_occluder_polygon_set_shape,RID,const DVector& ,bool) + BIND2(canvas_occluder_polygon_set_shape_as_lines,RID ,const DVector&) + + + BIND2(canvas_occluder_polygon_set_cull_mode,RID,CanvasOccluderPolygonCullMode) /* CURSOR */ @@ -1005,6 +1038,24 @@ public: VisualServerRaster(); ~VisualServerRaster(); +#undef DISPLAY_CHANGED + +#undef BIND0R +#undef BIND1RC +#undef BIND2RC +#undef BIND3RC +#undef BIND4RC + +#undef BIND1 +#undef BIND2 +#undef BIND3 +#undef BIND4 +#undef BIND5 +#undef BIND6 +#undef BIND7 +#undef BIND8 +#undef BIND10 + }; #endif diff --git a/servers/visual_server.h b/servers/visual_server.h index 918b0b102ec..4eb2b2a5fa5 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -288,7 +288,7 @@ public: virtual void multimesh_instance_set_color(RID p_multimesh,int p_index,const Color& p_color)=0; virtual RID multimesh_get_mesh(RID p_multimesh) const=0; - virtual AABB multimesh_get_custom_aabb(RID p_multimesh,const AABB& p_aabb) const=0; + virtual AABB multimesh_get_custom_aabb(RID p_multimesh) const=0; virtual Transform multimesh_instance_get_transform(RID p_multimesh,int p_index) const=0; virtual Matrix32 multimesh_instance_get_transform_2d(RID p_multimesh,int p_index) const=0; @@ -320,9 +320,9 @@ public: virtual void skeleton_allocate(RID p_skeleton,int p_bones,bool p_2d_skeleton=false)=0; virtual int skeleton_get_bone_count(RID p_skeleton) const=0; virtual void skeleton_bone_set_transform(RID p_skeleton,int p_bone, const Transform& p_transform)=0; - virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone)=0; + virtual Transform skeleton_bone_get_transform(RID p_skeleton,int p_bone) const=0; virtual void skeleton_bone_set_transform_2d(RID p_skeleton,int p_bone, const Matrix32& p_transform)=0; - virtual Matrix32 skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone)=0; + virtual Matrix32 skeleton_bone_get_transform_2d(RID p_skeleton,int p_bone)const =0; /* Light API */ @@ -392,7 +392,7 @@ public: virtual RID room_create()=0; virtual void room_add_bounds(RID p_room, const DVector& p_convex_polygon,float p_height,const Transform& p_transform)=0; - virtual void room_clear_bounds()=0; + virtual void room_clear_bounds(RID p_room)=0; /* PORTAL API */ From cf5778e51a883936ffc896231da8259e5ebabc0a Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Wed, 5 Oct 2016 01:26:35 -0300 Subject: [PATCH 003/223] -Added ViewportContainer, this is the only way to make viewports show up in GUI now -2D editing now seems to work -Added some functions and refactoring to Viewport --- drivers/gles2/shaders/canvas.glsl | 1 + drivers/gles3/rasterizer_canvas_gles3.cpp | 18 +-- drivers/gles3/rasterizer_gles3.cpp | 4 +- drivers/gles3/rasterizer_storage_gles3.cpp | 64 ++++++++- drivers/gles3/rasterizer_storage_gles3.h | 5 +- scene/gui/viewport_container.cpp | 103 +++++++++++++++ scene/gui/viewport_container.h | 25 ++++ scene/main/viewport.cpp | 125 +++++++----------- scene/main/viewport.h | 24 ++-- scene/register_scene_types.cpp | 4 +- servers/visual/rasterizer.h | 1 - servers/visual/visual_server_raster.h | 3 +- servers/visual/visual_server_viewport.cpp | 41 ++++-- servers/visual/visual_server_viewport.h | 27 +++- servers/visual_server.h | 5 +- tools/editor/editor_node.cpp | 13 +- tools/editor/editor_node.h | 4 +- .../plugins/canvas_item_editor_plugin.cpp | 3 +- .../editor/plugins/spatial_editor_plugin.cpp | 4 +- tools/editor/plugins/spatial_editor_plugin.h | 2 +- 20 files changed, 340 insertions(+), 136 deletions(-) create mode 100644 scene/gui/viewport_container.cpp create mode 100644 scene/gui/viewport_container.h diff --git a/drivers/gles2/shaders/canvas.glsl b/drivers/gles2/shaders/canvas.glsl index 38c579c15c2..eeab42ee648 100644 --- a/drivers/gles2/shaders/canvas.glsl +++ b/drivers/gles2/shaders/canvas.glsl @@ -395,5 +395,6 @@ LIGHT_SHADER_CODE // color.rgb*=color.a; gl_FragColor = color; + } diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index f24560763f3..a1e755cb4ae 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -111,6 +111,15 @@ void RasterizerCanvasGLES3::light_internal_free(RID p_rid) { void RasterizerCanvasGLES3::canvas_begin(){ + if (storage->frame.current_rt && storage->frame.clear_request) { + // a clear request may be pending, so do it + + glClearColor( storage->frame.clear_request_color.r, storage->frame.clear_request_color.g, storage->frame.clear_request_color.b, storage->frame.clear_request_color.a ); + glClear(GL_COLOR_BUFFER_BIT); + storage->frame.clear_request=false; + + } + /*canvas_shader.unbind(); canvas_shader.set_custom_shader(0); canvas_shader.set_conditional(CanvasShaderGLES2::USE_MODULATE,false); @@ -504,7 +513,7 @@ void RasterizerCanvasGLES3::_canvas_item_render_commands(Item *p_item,Item *curr } if (rect->flags&CANVAS_RECT_FLIP_V) { - src_rect.size.x*=-1; + src_rect.size.y*=-1; } if (rect->flags&CANVAS_RECT_TRANSPOSE) { @@ -769,15 +778,8 @@ void RasterizerGLES2::_canvas_item_setup_shader_params(CanvasItemMaterial *mater void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const Color& p_modulate,Light *p_light) { - if (storage->frame.clear_request) { - // a clear request may be pending, so do it - glClearColor( storage->frame.clear_request_color.r, storage->frame.clear_request_color.g, storage->frame.clear_request_color.b, storage->frame.clear_request_color.a ); - glClear(GL_COLOR_BUFFER_BIT); - storage->frame.clear_request=false; - } - Item *current_clip=NULL; RasterizerStorageGLES3::Shader *shader_cache=NULL; diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index ba83a572e52..27b06906457 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -141,7 +141,7 @@ void RasterizerGLES3::set_current_render_target(RID p_render_target){ storage->frame.current_rt=rt; storage->frame.clear_request=false; - glViewport(0,0,rt->width,rt->height); + glViewport(0,0,rt->width,rt->height); } else { storage->frame.current_rt=NULL; @@ -155,6 +155,7 @@ void RasterizerGLES3::restore_render_target() { ERR_FAIL_COND(storage->frame.current_rt==NULL); RasterizerStorageGLES3::RenderTarget * rt = storage->frame.current_rt; + glBindFramebuffer(GL_FRAMEBUFFER,rt->front.fbo); glViewport(0,0,rt->width,rt->height); } @@ -176,6 +177,7 @@ void RasterizerGLES3::blit_render_target_to_screen(RID p_render_target,const Rec ERR_FAIL_COND(!rt); canvas->canvas_begin(); + glDisable(GL_BLEND); glBindFramebuffer(GL_FRAMEBUFFER,storage->config.system_fbo); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D,rt->front.color); diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index 515db390983..b4d65080a47 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -1483,6 +1483,12 @@ void RasterizerStorageGLES3::_render_target_clear(RenderTarget *rt) { rt->depth=0; } + Texture *tex = texture_owner.get(rt->texture); + tex->alloc_height=0; + tex->alloc_width=0; + tex->width=0; + tex->height=0; + } void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ @@ -1490,6 +1496,8 @@ void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ if (rt->width<=0 || rt->height<=0) return; + glActiveTexture(GL_TEXTURE0); + glGenFramebuffers(1, &rt->front.fbo); glBindFramebuffer(GL_FRAMEBUFFER, rt->front.fbo); @@ -1512,6 +1520,7 @@ void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ GLuint color_internal_format; GLuint color_format; GLuint color_type; + Image::Format image_format; if (config.fbo_format==FBO_FORMAT_16_BITS) { @@ -1520,28 +1529,33 @@ void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ color_internal_format=GL_RGB5_A1; color_format=GL_RGBA; color_type=GL_UNSIGNED_SHORT_5_5_5_1; + image_format=Image::FORMAT_RGBA5551; } else { color_internal_format=GL_RGB565; color_format=GL_RGB; color_type=GL_UNSIGNED_SHORT_5_6_5; + image_format=Image::FORMAT_RGB565; } - } else if (config.fbo_format==FBO_FORMAT_32_BITS) { + } else if (config.fbo_format==FBO_FORMAT_32_BITS || (config.fbo_format==FBO_FORMAT_FLOAT && rt->flags[RENDER_TARGET_NO_3D])) { if (rt->flags[RENDER_TARGET_TRANSPARENT]) { color_internal_format=GL_RGBA8; color_format=GL_RGBA; color_type=GL_UNSIGNED_BYTE; + image_format=Image::FORMAT_RGBA8; } else { color_internal_format=GL_RGB10_A2; color_format=GL_RGBA; color_type=GL_UNSIGNED_INT_2_10_10_10_REV; + image_format=Image::FORMAT_RGBA8;//todo } } else if (config.fbo_format==FBO_FORMAT_FLOAT) { color_internal_format=GL_RGBA16F; color_format=GL_RGBA; color_type=GL_HALF_FLOAT; + image_format=Image::FORMAT_RGBAH; } glTexImage2D(GL_TEXTURE_2D, 0, color_internal_format, rt->width, rt->height, 0, color_format, color_type, NULL); @@ -1559,6 +1573,19 @@ void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ ERR_FAIL_COND( status != GL_FRAMEBUFFER_COMPLETE ); } + Texture *tex = texture_owner.get(rt->texture); + tex->format=image_format; + tex->gl_format_cache=color_format; + tex->gl_type_cache=color_type; + tex->gl_internal_format_cache=color_internal_format; + tex->tex_id=rt->front.color; + tex->width=rt->width; + tex->alloc_width=rt->width; + tex->height=rt->height; + tex->alloc_height=rt->height; + + + texture_set_flags(rt->texture,tex->flags); if (!rt->flags[RENDER_TARGET_NO_SAMPLING]) { @@ -1662,6 +1689,31 @@ void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ RID RasterizerStorageGLES3::render_target_create(){ RenderTarget *rt = memnew( RenderTarget ); + + Texture * t = memnew( Texture ); + + t->flags=0; + t->width=0; + t->height=0; + t->alloc_height=0; + t->alloc_width=0; + t->format=Image::FORMAT_R8; + t->target=GL_TEXTURE_2D; + t->gl_format_cache=0; + t->gl_internal_format_cache=0; + t->gl_type_cache=0; + t->data_size=0; + t->compressed=false; + t->srgb=false; + t->total_data_size=0; + t->ignore_mipmaps=false; + t->mipmaps=0; + t->active=true; + t->tex_id=0; + + + rt->texture=texture_owner.make_rid(t); + return render_target_owner.make_rid(rt); } @@ -1686,13 +1738,9 @@ RID RasterizerStorageGLES3::render_target_get_texture(RID p_render_target) const RenderTarget *rt = render_target_owner.getornull(p_render_target); ERR_FAIL_COND_V(!rt,RID()); - - return RID(); + return rt->texture; } -Image RasterizerStorageGLES3::render_target_get_image(RID p_render_target) const{ - return Image(); -} void RasterizerStorageGLES3::render_target_set_flag(RID p_render_target,RenderTargetFlags p_flag,bool p_value) { RenderTarget *rt = render_target_owner.getornull(p_render_target); @@ -1889,12 +1937,16 @@ bool RasterizerStorageGLES3::free(RID p_rid){ RenderTarget *rt = render_target_owner.getornull(p_rid); _render_target_clear(rt); + Texture *t=texture_owner.get(rt->texture); + texture_owner.free(rt->texture); + memdelete(t); render_target_owner.free(p_rid); memdelete(rt); } else if (texture_owner.owns(p_rid)) { // delete the texture Texture *texture = texture_owner.get(p_rid); + ERR_FAIL_COND_V(texture->render_target,true); //cant free the render target texture, dude info.texture_mem-=texture->total_data_size; texture_owner.free(p_rid); memdelete(texture); diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index eb6cd0bd925..9820fbc941a 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -117,7 +117,6 @@ public: bool active; GLuint tex_id; - RenderTarget *render_target; Texture() { @@ -361,6 +360,8 @@ public: bool used_in_frame; + RID texture; + RenderTarget() { width=0; @@ -387,7 +388,7 @@ public: virtual RID render_target_create(); virtual void render_target_set_size(RID p_render_target,int p_width, int p_height); virtual RID render_target_get_texture(RID p_render_target) const; - virtual Image render_target_get_image(RID p_render_target) const; + virtual void render_target_set_flag(RID p_render_target,RenderTargetFlags p_flag,bool p_value); virtual bool render_target_renedered_in_frame(RID p_render_target); diff --git a/scene/gui/viewport_container.cpp b/scene/gui/viewport_container.cpp new file mode 100644 index 00000000000..3418a495e9f --- /dev/null +++ b/scene/gui/viewport_container.cpp @@ -0,0 +1,103 @@ +#include "viewport_container.h" +#include "scene/main/viewport.h" +Size2 ViewportContainer::get_minimum_size() const { + + + if (stretch) + return Size2(); + Size2 ms; + for(int i=0;icast_to(); + if (!c) + continue; + + Size2 minsize = c->get_size(); + ms.width = MAX(ms.width , minsize.width); + ms.height = MAX(ms.height , minsize.height); + } + + return ms; + +} + + +void ViewportContainer::set_stretch(bool p_enable) { + + stretch=p_enable; + queue_sort(); + update(); + +} + +bool ViewportContainer::is_stretch_enabled() const { + + return stretch; +} + + +void ViewportContainer::_notification(int p_what) { + + + if (p_what==NOTIFICATION_RESIZED) { + + if (!stretch) + return; + + for(int i=0;icast_to(); + if (!c) + continue; + + c->set_size(get_size()); + } + } + + if (p_what==NOTIFICATION_ENTER_TREE || p_what==NOTIFICATION_VISIBILITY_CHANGED) { + + for(int i=0;icast_to(); + if (!c) + continue; + + + if (is_visible()) + c->set_update_mode(Viewport::UPDATE_ALWAYS); + else + c->set_update_mode(Viewport::UPDATE_DISABLED); + } + + } + + if (p_what==NOTIFICATION_DRAW) { + + for(int i=0;icast_to(); + if (!c) + continue; + + if (stretch) + draw_texture_rect(c->get_texture(),Rect2(Vector2(),get_size()*Size2(1,-1))); + else + draw_texture_rect(c->get_texture(),Rect2(Vector2(),c->get_size()*Size2(1,-1))); + } + } + +} + +void ViewportContainer::_bind_methods() { + + ObjectTypeDB::bind_method(_MD("set_stretch","enable"),&ViewportContainer::set_stretch); + ObjectTypeDB::bind_method(_MD("is_stretch_enabled"),&ViewportContainer::is_stretch_enabled); + + ADD_PROPERTY( PropertyInfo(Variant::BOOL,"stretch"),_SCS("set_stretch"),_SCS("is_stretch_enabled")); +} + +ViewportContainer::ViewportContainer() { + + stretch=false; +} diff --git a/scene/gui/viewport_container.h b/scene/gui/viewport_container.h new file mode 100644 index 00000000000..cfc58f7d6e3 --- /dev/null +++ b/scene/gui/viewport_container.h @@ -0,0 +1,25 @@ +#ifndef VIEWPORTCONTAINER_H +#define VIEWPORTCONTAINER_H + +#include "scene/gui/container.h" + +class ViewportContainer : public Container { + + OBJ_TYPE( ViewportContainer, Container ); + + bool stretch; +protected: + + void _notification(int p_what); + static void _bind_methods(); +public: + + void set_stretch(bool p_enable); + bool is_stretch_enabled() const; + + virtual Size2 get_minimum_size() const; + + ViewportContainer(); +}; + +#endif // VIEWPORTCONTAINER_H diff --git a/scene/main/viewport.cpp b/scene/main/viewport.cpp index 44a3f41a4ae..e93a16371d3 100644 --- a/scene/main/viewport.cpp +++ b/scene/main/viewport.cpp @@ -51,35 +51,37 @@ #include "globals.h" -int RenderTargetTexture::get_width() const { +int ViewportTexture::get_width() const { ERR_FAIL_COND_V(!vp,0); return vp->size.width; } -int RenderTargetTexture::get_height() const{ +int ViewportTexture::get_height() const{ ERR_FAIL_COND_V(!vp,0); return vp->size.height; } -Size2 RenderTargetTexture::get_size() const{ +Size2 ViewportTexture::get_size() const{ ERR_FAIL_COND_V(!vp,Size2()); return vp->size; } -RID RenderTargetTexture::get_rid() const{ +RID ViewportTexture::get_rid() const{ ERR_FAIL_COND_V(!vp,RID()); return vp->texture_rid; } -bool RenderTargetTexture::has_alpha() const{ +bool ViewportTexture::has_alpha() const{ return false; } -void RenderTargetTexture::set_flags(uint32_t p_flags){ +void ViewportTexture::set_flags(uint32_t p_flags){ + + if (!vp) + return; - ERR_FAIL_COND(!vp); if (p_flags&FLAG_FILTER) flags=FLAG_FILTER; else @@ -89,12 +91,12 @@ void RenderTargetTexture::set_flags(uint32_t p_flags){ } -uint32_t RenderTargetTexture::get_flags() const{ +uint32_t ViewportTexture::get_flags() const{ return flags; } -RenderTargetTexture::RenderTargetTexture(Viewport *p_vp){ +ViewportTexture::ViewportTexture(Viewport *p_vp){ vp=p_vp; flags=0; @@ -207,7 +209,7 @@ void Viewport::_parent_draw() { void Viewport::_parent_visibility_changed() { - +/* if (parent_control) { Control *c = parent_control; @@ -216,14 +218,14 @@ void Viewport::_parent_visibility_changed() { _update_listener(); _update_listener_2d(); } - +*/ } void Viewport::_vp_enter_tree() { - if (parent_control) { +/* if (parent_control) { Control *cparent=parent_control; RID parent_ci = cparent->get_canvas_item(); @@ -240,7 +242,7 @@ void Viewport::_vp_enter_tree() { // VisualServer::get_singleton()->viewport_attach_to_screen(viewport,0); } - +*/ } @@ -327,24 +329,10 @@ void Viewport::_notification(int p_what) { case NOTIFICATION_ENTER_TREE: { if (get_parent()) { - Node *parent=get_parent(); - if (parent) { - parent_control=parent->cast_to(); - } - } - - - parent=NULL; - Node *parent_node=get_parent(); - - - while(parent_node) { - - parent = parent_node->cast_to(); - if (parent) - break; - - parent_node=parent_node->get_parent(); + parent = get_parent()->get_viewport(); + VisualServer::get_singleton()->viewport_set_parent_viewport(viewport,parent->get_viewport()); + } else { + parent=NULL; } current_canvas=find_world_2d()->get_canvas(); @@ -434,7 +422,7 @@ void Viewport::_notification(int p_what) { } remove_from_group("_viewports"); - parent_control=NULL; + VS::get_singleton()->viewport_set_active(viewport,false); @@ -1263,7 +1251,7 @@ bool Viewport::is_set_as_render_target() const{ void Viewport::set_update_mode(UpdateMode p_mode){ update_mode=p_mode; -// VS::get_singleton()->viewport_set_update_mode(viewport,VS::RenderTargetUpdateMode(p_mode)); + VS::get_singleton()->viewport_set_update_mode(viewport,VS::ViewportUpdateMode(p_mode)); } Viewport::UpdateMode Viewport::get_update_mode() const{ @@ -1282,7 +1270,7 @@ Image Viewport::get_screen_capture() const { return Image(); } -Ref Viewport::get_texture() const { +Ref Viewport::get_texture() const { return texture; } @@ -1290,7 +1278,7 @@ Ref Viewport::get_texture() const { void Viewport::set_vflip(bool p_enable) { vflip=p_enable; -// VisualServer::get_singleton()->viewport_set_vflip(viewport,p_enable); + VisualServer::get_singleton()->viewport_set_vflip(viewport,p_enable); } bool Viewport::get_vflip() const{ @@ -1315,30 +1303,6 @@ void Viewport::clear() { // VisualServer::get_singleton()->viewport_clear(viewport); } -void Viewport::set_filter(bool p_enable) { - - texture->set_flags(p_enable?int(Texture::FLAG_FILTER):int(0)); - -} - -bool Viewport::get_filter() const{ - - return (texture->get_flags()&Texture::FLAG_FILTER)!=0; -} - -void Viewport::set_gen_mipmaps(bool p_enable) { - - //texture->set_flags(p_enable?int(Texture::FLAG_FILTER):int(0)); - gen_mipmaps=p_enable; - -} - -bool Viewport::get_gen_mipmaps() const{ - - //return (texture->get_flags()&Texture::FLAG_FILTER)!=0; - return gen_mipmaps; -} - Matrix32 Viewport::_get_input_pre_xform() const { @@ -1356,9 +1320,9 @@ Matrix32 Viewport::_get_input_pre_xform() const { Vector2 Viewport::_get_window_offset() const { - if (parent_control) { - return (parent_control->get_viewport()->get_final_transform() * parent_control->get_global_transform_with_canvas()).get_origin(); - } +// if (parent_control) { +// return (parent_control->get_viewport()->get_final_transform() * parent_control->get_global_transform_with_canvas()).get_origin(); +// } return Vector2(); } @@ -1454,8 +1418,6 @@ void Viewport::_vp_input(const InputEvent& p_ev) { } #endif - if (parent_control && !parent_control->is_visible()) - return; if (to_screen_rect==Rect2()) return; //if render target, can't get input events @@ -1480,8 +1442,8 @@ void Viewport::_vp_unhandled_input(const InputEvent& p_ev) { } #endif - if (parent_control && !parent_control->is_visible()) - return; +// if (parent_control && !parent_control->is_visible()) +// return; if (to_screen_rect==Rect2()) return; //if render target, can't get input events @@ -2575,6 +2537,16 @@ bool Viewport::is_input_disabled() const { return disable_input; } +void Viewport::set_disable_3d(bool p_disable) { + disable_3d=p_disable; + VS::get_singleton()->viewport_set_disable_3d(viewport,p_disable); +} + +bool Viewport::is_3d_disabled() const { + + return disable_3d; +} + Variant Viewport::gui_get_drag_data() const { return gui.drag_data; } @@ -2648,17 +2620,10 @@ void Viewport::_bind_methods() { ObjectTypeDB::bind_method(_MD("get_clear_on_new_frame"), &Viewport::get_clear_on_new_frame); ObjectTypeDB::bind_method(_MD("clear"), &Viewport::clear); - - ObjectTypeDB::bind_method(_MD("set_filter","enable"), &Viewport::set_filter); - ObjectTypeDB::bind_method(_MD("get_filter"), &Viewport::get_filter); - - ObjectTypeDB::bind_method(_MD("set_gen_mipmaps","enable"), &Viewport::set_gen_mipmaps); - ObjectTypeDB::bind_method(_MD("get_gen_mipmaps"), &Viewport::get_gen_mipmaps); - ObjectTypeDB::bind_method(_MD("set_update_mode","mode"), &Viewport::set_update_mode); ObjectTypeDB::bind_method(_MD("get_update_mode"), &Viewport::get_update_mode); - ObjectTypeDB::bind_method(_MD("get_texture:RenderTargetTexture"), &Viewport::get_texture); + ObjectTypeDB::bind_method(_MD("get_texture:ViewportTexture"), &Viewport::get_texture); ObjectTypeDB::bind_method(_MD("set_physics_object_picking","enable"), &Viewport::set_physics_object_picking); ObjectTypeDB::bind_method(_MD("get_physics_object_picking"), &Viewport::get_physics_object_picking); @@ -2690,6 +2655,9 @@ void Viewport::_bind_methods() { ObjectTypeDB::bind_method(_MD("set_disable_input","disable"), &Viewport::set_disable_input); ObjectTypeDB::bind_method(_MD("is_input_disabled"), &Viewport::is_input_disabled); + ObjectTypeDB::bind_method(_MD("set_disable_3d","disable"), &Viewport::set_disable_3d); + ObjectTypeDB::bind_method(_MD("is_3d_disabled"), &Viewport::is_3d_disabled); + ObjectTypeDB::bind_method(_MD("_gui_show_tooltip"), &Viewport::_gui_show_tooltip); ObjectTypeDB::bind_method(_MD("_gui_remove_focus"), &Viewport::_gui_remove_focus); @@ -2700,13 +2668,12 @@ void Viewport::_bind_methods() { ADD_PROPERTY( PropertyInfo(Variant::BOOL,"transparent_bg"), _SCS("set_transparent_background"), _SCS("has_transparent_background") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/v_flip"), _SCS("set_vflip"), _SCS("get_vflip") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/clear_on_new_frame"), _SCS("set_clear_on_new_frame"), _SCS("get_clear_on_new_frame") ); - ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/filter"), _SCS("set_filter"), _SCS("get_filter") ); - ADD_PROPERTY( PropertyInfo(Variant::BOOL,"render_target/gen_mipmaps"), _SCS("set_gen_mipmaps"), _SCS("get_gen_mipmaps") ); ADD_PROPERTY( PropertyInfo(Variant::INT,"render_target/update_mode",PROPERTY_HINT_ENUM,"Disabled,Once,When Visible,Always"), _SCS("set_update_mode"), _SCS("get_update_mode") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"audio_listener/enable_2d"), _SCS("set_as_audio_listener_2d"), _SCS("is_audio_listener_2d") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"audio_listener/enable_3d"), _SCS("set_as_audio_listener"), _SCS("is_audio_listener") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"physics/object_picking"), _SCS("set_physics_object_picking"), _SCS("get_physics_object_picking") ); ADD_PROPERTY( PropertyInfo(Variant::BOOL,"gui/disable_input"), _SCS("set_disable_input"), _SCS("is_input_disabled") ); + ADD_PROPERTY( PropertyInfo(Variant::BOOL,"3d/disable_3d"), _SCS("set_disable_3d"), _SCS("is_3d_disabled") ); ADD_SIGNAL(MethodInfo("size_changed")); @@ -2727,6 +2694,7 @@ Viewport::Viewport() { world_2d = Ref( memnew( World2D )); viewport = VisualServer::get_singleton()->viewport_create(); + texture_rid=VisualServer::get_singleton()->viewport_get_texture(viewport); internal_listener = SpatialSoundServer::get_singleton()->listener_create(); audio_listener=false; internal_listener_2d = SpatialSound2DServer::get_singleton()->listener_create(); @@ -2744,7 +2712,7 @@ Viewport::Viewport() { clear_on_new_frame=true; //clear=true; update_mode=UPDATE_WHEN_VISIBLE; - texture = Ref( memnew( RenderTargetTexture(this) ) ); + texture = Ref( memnew( ViewportTexture(this) ) ); physics_object_picking=false; physics_object_capture=0; @@ -2759,6 +2727,7 @@ Viewport::Viewport() { unhandled_key_input_group = "_vp_unhandled_key_input"+id; disable_input=false; + disable_3d=false; //window tooltip gui.tooltip_timer = -1; @@ -2773,7 +2742,7 @@ Viewport::Viewport() { gui.canvas_sort_index=0; - parent_control=NULL; + } diff --git a/scene/main/viewport.h b/scene/main/viewport.h index d610237bec8..c5d61469dd4 100644 --- a/scene/main/viewport.h +++ b/scene/main/viewport.h @@ -48,9 +48,9 @@ class Label; class Timer; class Viewport; -class RenderTargetTexture : public Texture { +class ViewportTexture : public Texture { - OBJ_TYPE( RenderTargetTexture, Texture ); + OBJ_TYPE( ViewportTexture, Texture ); int flags; friend class Viewport; @@ -70,7 +70,7 @@ public: virtual void set_flags(uint32_t p_flags); virtual uint32_t get_flags() const; - RenderTargetTexture(Viewport *p_vp=NULL); + ViewportTexture(Viewport *p_vp=NULL); }; @@ -88,10 +88,10 @@ public: private: -friend class RenderTargetTexture; +friend class ViewportTexture; + - Control *parent_control; Viewport *parent; Listener *listener; @@ -101,7 +101,6 @@ friend class RenderTargetTexture; Set cameras; RID viewport; - RID canvas_item; RID current_canvas; bool audio_listener; @@ -171,9 +170,10 @@ friend class RenderTargetTexture; void _update_global_transform(); + bool disable_3d; UpdateMode update_mode; RID texture_rid; - Ref texture; + Ref texture; struct GUI { @@ -342,15 +342,10 @@ public: bool get_clear_on_new_frame() const; void clear(); - void set_filter(bool p_enable); - bool get_filter() const; - - void set_gen_mipmaps(bool p_enable); - bool get_gen_mipmaps() const; void set_update_mode(UpdateMode p_mode); UpdateMode get_update_mode() const; - Ref get_texture() const; + Ref get_texture() const; Vector2 get_camera_coords(const Vector2& p_viewport_coords) const; @@ -368,6 +363,9 @@ public: void set_disable_input(bool p_disable); bool is_input_disabled() const; + void set_disable_3d(bool p_disable); + bool is_3d_disabled() const; + void set_attach_to_screen_rect(const Rect2& p_rect); Rect2 get_attach_to_screen_rect() const; diff --git a/scene/register_scene_types.cpp b/scene/register_scene_types.cpp index dcb2edf8724..1bff6801fe5 100644 --- a/scene/register_scene_types.cpp +++ b/scene/register_scene_types.cpp @@ -66,6 +66,7 @@ #include "scene/gui/center_container.h" #include "scene/gui/scroll_container.h" #include "scene/gui/margin_container.h" +#include "scene/gui/viewport_container.h" #include "scene/gui/panel.h" #include "scene/gui/spin_box.h" #include "scene/gui/file_dialog.h" @@ -300,7 +301,7 @@ void register_scene_types() { ObjectTypeDB::register_virtual_type(); ObjectTypeDB::register_type(); - ObjectTypeDB::register_virtual_type(); + ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); @@ -390,6 +391,7 @@ void register_scene_types() { ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); + ObjectTypeDB::register_type(); OS::get_singleton()->yield(); //may take time to init diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h index 6a6eb7ad20f..865298f64ec 100644 --- a/servers/visual/rasterizer.h +++ b/servers/visual/rasterizer.h @@ -233,7 +233,6 @@ public: virtual RID render_target_create()=0; virtual void render_target_set_size(RID p_render_target,int p_width, int p_height)=0; virtual RID render_target_get_texture(RID p_render_target) const=0; - virtual Image render_target_get_image(RID p_render_target) const=0; virtual void render_target_set_flag(RID p_render_target,RenderTargetFlags p_flag,bool p_value)=0; virtual bool render_target_renedered_in_frame(RID p_render_target)=0; diff --git a/servers/visual/visual_server_raster.h b/servers/visual/visual_server_raster.h index cd6d7b38d36..2171998f3fe 100644 --- a/servers/visual/visual_server_raster.h +++ b/servers/visual/visual_server_raster.h @@ -807,6 +807,7 @@ public: BIND3(viewport_set_size,RID,int ,int ) BIND2(viewport_set_active,RID ,bool ) + BIND2(viewport_set_parent_viewport,RID,RID) BIND2(viewport_set_clear_mode,RID,ViewportClearMode ) @@ -818,11 +819,11 @@ public: BIND1RC(RID,viewport_get_texture,RID ) - BIND1RC(Image,viewport_capture,RID ) BIND2(viewport_set_hide_scenario,RID,bool ) BIND2(viewport_set_hide_canvas,RID,bool ) BIND2(viewport_set_disable_environment,RID,bool ) + BIND2(viewport_set_disable_3d,RID,bool ) BIND2(viewport_attach_camera,RID,RID ) BIND2(viewport_set_scenario,RID,RID ) diff --git a/servers/visual/visual_server_viewport.cpp b/servers/visual/visual_server_viewport.cpp index b30a57a5b84..baf18b7a562 100644 --- a/servers/visual/visual_server_viewport.cpp +++ b/servers/visual/visual_server_viewport.cpp @@ -1,6 +1,7 @@ #include "visual_server_viewport.h" #include "visual_server_global.h" #include "visual_server_canvas.h" +#include "globals.h" void VisualServerViewport::_draw_viewport(Viewport *p_viewport) { @@ -50,7 +51,12 @@ void VisualServerViewport::_draw_viewport(Viewport *p_viewport) { } #endif - VSG::rasterizer->clear_render_target(Color(0.5,0.5,0.5,1.0)); + if (p_viewport->clear_mode!=VS::VIEWPORT_CLEAR_NEVER) { + VSG::rasterizer->clear_render_target(clear_color); + if (p_viewport->clear_mode==VS::VIEWPORT_CLEAR_ONLY_NEXT_FRAME) { + p_viewport->clear_mode=VS::VIEWPORT_CLEAR_NEVER; + } + } if (!p_viewport->hide_canvas) { int i=0; @@ -161,10 +167,10 @@ void VisualServerViewport::_draw_viewport(Viewport *p_viewport) { light=light->shadows_next_ptr; } - VSG::rasterizer->restore_render_target(); // VSG::canvas_render->reset_canvas(); } + VSG::rasterizer->restore_render_target(); #if 0 @@ -175,6 +181,8 @@ void VisualServerViewport::_draw_viewport(Viewport *p_viewport) { } #endif + + for (Map::Element *E=canvas_map.front();E;E=E->next()) { VisualServerCanvas::Canvas *canvas = static_cast(E->get()->canvas); @@ -226,6 +234,10 @@ void VisualServerViewport::draw_viewports() { //draw viewports + clear_color=GLOBAL_DEF("rendering/viewport/default_clear_color",Color(0.5,0.5,0.5)); + + + active_viewports.sort_custom(); for(int i=0;iparent=p_parent_viewport; +} + void VisualServerViewport::viewport_set_clear_mode(RID p_viewport,VS::ViewportClearMode p_clear_mode) { Viewport * viewport = viewport_owner.getornull(p_viewport); @@ -349,13 +369,6 @@ RID VisualServerViewport::viewport_get_texture(RID p_viewport) const{ return VSG::storage->render_target_get_texture(viewport->render_target); -} -Image VisualServerViewport::viewport_capture(RID p_viewport) const{ - - const Viewport * viewport = viewport_owner.getornull(p_viewport); - ERR_FAIL_COND_V(!viewport,Image()); - return VSG::storage->render_target_get_image(viewport->render_target); - } void VisualServerViewport::viewport_set_hide_scenario(RID p_viewport,bool p_hide){ @@ -381,6 +394,16 @@ void VisualServerViewport::viewport_set_disable_environment(RID p_viewport,bool viewport->disable_environment=p_disable; } +void VisualServerViewport::viewport_set_disable_3d(RID p_viewport,bool p_disable){ + + Viewport * viewport = viewport_owner.getornull(p_viewport); + ERR_FAIL_COND(!viewport); + + + viewport->disable_3d=p_disable; + VSG::storage->render_target_set_flag(viewport->render_target,RasterizerStorage::RENDER_TARGET_NO_3D,p_disable); +} + void VisualServerViewport::viewport_attach_camera(RID p_viewport,RID p_camera){ Viewport * viewport = viewport_owner.getornull(p_viewport); diff --git a/servers/visual/visual_server_viewport.h b/servers/visual/visual_server_viewport.h index e5c888fbcb9..cba33a3b2d1 100644 --- a/servers/visual/visual_server_viewport.h +++ b/servers/visual/visual_server_viewport.h @@ -13,6 +13,8 @@ public: }; + + struct Viewport : public RID_Data { RID self; @@ -32,8 +34,8 @@ public: bool hide_scenario; bool hide_canvas; bool disable_environment; + bool disable_3d; - Image capture; VS::ViewportClearMode clear_mode; @@ -70,8 +72,27 @@ public: }; mutable RID_Owner viewport_owner; + + + struct ViewportSort { + _FORCE_INLINE_ bool operator()(const Viewport*p_left,const Viewport* p_right) const { + + bool left_to_screen = p_left->viewport_to_screen_rect.size!=Size2(); + bool right_to_screen = p_right->viewport_to_screen_rect.size!=Size2(); + + if (left_to_screen==right_to_screen) { + + return p_left->parent==p_right->self; + } else { + return right_to_screen; + } + } + }; + + Vector active_viewports; private: + Color clear_color; void _draw_viewport(Viewport *p_viewport); public: @@ -84,7 +105,7 @@ public: void viewport_detach(RID p_viewport); void viewport_set_active(RID p_viewport,bool p_active); - + void viewport_set_parent_viewport(RID p_viewport,RID p_parent_viewport); void viewport_set_update_mode(RID p_viewport,VS::ViewportUpdateMode p_mode); void viewport_set_vflip(RID p_viewport,bool p_enable); @@ -92,11 +113,11 @@ public: void viewport_set_clear_mode(RID p_viewport,VS::ViewportClearMode p_clear_mode); RID viewport_get_texture(RID p_viewport) const; - Image viewport_capture(RID p_viewport) const; void viewport_set_hide_scenario(RID p_viewport,bool p_hide); void viewport_set_hide_canvas(RID p_viewport,bool p_hide); void viewport_set_disable_environment(RID p_viewport,bool p_disable); + void viewport_set_disable_3d(RID p_viewport,bool p_disable); void viewport_attach_camera(RID p_viewport,RID p_camera); void viewport_set_scenario(RID p_viewport,RID p_scenario); diff --git a/servers/visual_server.h b/servers/visual_server.h index 4eb2b2a5fa5..238bcc216da 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -424,6 +424,7 @@ public: virtual void viewport_set_size(RID p_viewport,int p_width,int p_height)=0; virtual void viewport_set_active(RID p_viewport,bool p_active)=0; + virtual void viewport_set_parent_viewport(RID p_viewport,RID p_parent_viewport)=0; virtual void viewport_attach_to_screen(RID p_viewport,const Rect2& p_rect=Rect2(),int p_screen=0)=0; virtual void viewport_detach(RID p_viewport)=0; @@ -442,18 +443,18 @@ public: enum ViewportClearMode { VIEWPORT_CLEAR_ALWAYS, - VIWEPORT_CLEAR_NEVER, + VIEWPORT_CLEAR_NEVER, VIEWPORT_CLEAR_ONLY_NEXT_FRAME }; virtual void viewport_set_clear_mode(RID p_viewport,ViewportClearMode p_clear_mode)=0; virtual RID viewport_get_texture(RID p_viewport) const=0; - virtual Image viewport_capture(RID p_viewport) const=0; virtual void viewport_set_hide_scenario(RID p_viewport,bool p_hide)=0; virtual void viewport_set_hide_canvas(RID p_viewport,bool p_hide)=0; virtual void viewport_set_disable_environment(RID p_viewport,bool p_disable)=0; + virtual void viewport_set_disable_3d(RID p_viewport,bool p_disable)=0; virtual void viewport_attach_camera(RID p_viewport,RID p_camera)=0; virtual void viewport_set_scenario(RID p_viewport,RID p_scenario)=0; diff --git a/tools/editor/editor_node.cpp b/tools/editor/editor_node.cpp index 7f026241074..9b5ed422c5a 100644 --- a/tools/editor/editor_node.cpp +++ b/tools/editor/editor_node.cpp @@ -266,14 +266,12 @@ void EditorNode::_notification(int p_what) { circle_step=0; circle_step_msec=tick; - circle_step_frame=frame+1; + circle_step_frame=frame+1; update_menu->set_icon(gui_base->get_icon("Progress"+itos(circle_step+1),"EditorIcons")); } - scene_root->set_size_override(true,Size2(Globals::get_singleton()->get("display/width"),Globals::get_singleton()->get("display/height"))); - editor_selection->update(); { @@ -300,7 +298,7 @@ void EditorNode::_notification(int p_what) { } if (p_what==NOTIFICATION_ENTER_TREE) { - + get_tree()->get_root()->set_disable_3d(true); //MessageQueue::get_singleton()->push_call(this,"_get_scene_metadata"); get_tree()->set_editor_hint(true); get_tree()->get_root()->set_as_audio_listener(false); @@ -5663,6 +5661,7 @@ EditorNode::EditorNode() { scene_root_parent->set_custom_minimum_size(Size2(0,80)*EDSCALE); + //Ref sp = scene_root_parent->get_stylebox("panel","TabContainer"); //scene_root_parent->add_style_override("panel",sp); @@ -5676,6 +5675,8 @@ EditorNode::EditorNode() { scene_root = memnew( Viewport ); + scene_root->set_disable_3d(true); + //scene_root_base->add_child(scene_root); @@ -5683,7 +5684,7 @@ EditorNode::EditorNode() { VisualServer::get_singleton()->viewport_set_hide_scenario(scene_root->get_viewport(),true); scene_root->set_disable_input(true); scene_root->set_as_audio_listener_2d(true); - scene_root->set_size_override(true,Size2(Globals::get_singleton()->get("display/width"),Globals::get_singleton()->get("display/height"))); + //scene_root->set_size_override(true,Size2(Globals::get_singleton()->get("display/width"),Globals::get_singleton()->get("display/height"))); // scene_root->set_world_2d( Ref( memnew( World2D )) ); @@ -5696,6 +5697,7 @@ EditorNode::EditorNode() { scene_root_parent->add_child(viewport); + PanelContainer *top_region = memnew( PanelContainer ); top_region->add_style_override("panel",gui_base->get_stylebox("hover","Button")); HBoxContainer *left_menu_hb = memnew( HBoxContainer ); @@ -6673,6 +6675,7 @@ EditorNode::EditorNode() { load_error_dialog->set_child_rect(load_errors); gui_base->add_child(load_error_dialog); + //EditorImport::add_importer( Ref( memnew(EditorImporterCollada ))); EditorFileSystem::get_singleton()->connect("sources_changed",this,"_sources_changed"); diff --git a/tools/editor/editor_node.h b/tools/editor/editor_node.h index 0393cd19a94..c5271860eb5 100644 --- a/tools/editor/editor_node.h +++ b/tools/editor/editor_node.h @@ -79,7 +79,7 @@ #include "fileserver/editor_file_server.h" #include "editor_resource_preview.h" - +#include "scene/gui/viewport_container.h" #include "progress_dialog.h" @@ -207,7 +207,7 @@ private: //Ref scene_import_metadata; - Control* scene_root_parent; + PanelContainer* scene_root_parent; Control *gui_base; VBoxContainer *main_vbox; diff --git a/tools/editor/plugins/canvas_item_editor_plugin.cpp b/tools/editor/plugins/canvas_item_editor_plugin.cpp index b0e002ba448..2025fe825e5 100644 --- a/tools/editor/plugins/canvas_item_editor_plugin.cpp +++ b/tools/editor/plugins/canvas_item_editor_plugin.cpp @@ -3320,7 +3320,8 @@ CanvasItemEditor::CanvasItemEditor(EditorNode *p_editor) { vp_base->set_v_size_flags(SIZE_EXPAND_FILL); palette_split->add_child(vp_base); - Control *vp = memnew (Control); + ViewportContainer *vp = memnew (ViewportContainer); + vp->set_stretch(true); vp_base->add_child(vp); vp->set_area_as_parent_rect(); vp->add_child(p_editor->get_scene_root()); diff --git a/tools/editor/plugins/spatial_editor_plugin.cpp b/tools/editor/plugins/spatial_editor_plugin.cpp index afea6f93dd7..55a4c6c9347 100644 --- a/tools/editor/plugins/spatial_editor_plugin.cpp +++ b/tools/editor/plugins/spatial_editor_plugin.cpp @@ -2348,7 +2348,7 @@ SpatialEditorViewport::SpatialEditorViewport(SpatialEditor *p_spatial_editor, Ed message_time=0; spatial_editor=p_spatial_editor; - Control *c=memnew(Control); + ViewportContainer *c=memnew(ViewportContainer); add_child(c); c->set_area_as_parent_rect(); viewport = memnew( Viewport ); @@ -3973,7 +3973,7 @@ SpatialEditor::SpatialEditor(EditorNode *p_editor) { - settings_light_base = memnew( Control ); + settings_light_base = memnew( ViewportContainer ); settings_light_base->set_custom_minimum_size(Size2(128,128)); settings_light_base->connect("input_event",this,"_default_light_angle_input"); settings_vbc->add_margin_child(TTR("Default Light Normal:"),settings_light_base); diff --git a/tools/editor/plugins/spatial_editor_plugin.h b/tools/editor/plugins/spatial_editor_plugin.h index 2a52eab777f..4751cf70717 100644 --- a/tools/editor/plugins/spatial_editor_plugin.h +++ b/tools/editor/plugins/spatial_editor_plugin.h @@ -430,7 +430,7 @@ private: float settings_default_light_rot_x; float settings_default_light_rot_y; - Control *settings_light_base; + ViewportContainer *settings_light_base; Viewport *settings_light_vp; ColorPickerButton *settings_ambient_color; Image settings_light_dir_image; From 850eaf7ed796d2f2d9a35c6bc4ba9a4e69f5ca1d Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Fri, 7 Oct 2016 11:31:18 -0300 Subject: [PATCH 004/223] -the new shader language seems to work -shader editor plugin can edit shaders -code completion in shader editor plugin --- bin/tests/test_shader_lang.cpp | 267 +++-- scene/register_scene_types.cpp | 2 +- scene/resources/shader.cpp | 2 +- scene/resources/shader.h | 8 +- servers/register_server_types.cpp | 8 +- servers/visual/shader_language.cpp | 1053 ++++++++++++++--- servers/visual/shader_language.h | 88 +- servers/visual/shader_types.cpp | 99 ++ servers/visual/shader_types.h | 27 + tools/editor/editor_node.cpp | 3 +- tools/editor/plugins/shader_editor_plugin.cpp | 234 ++-- tools/editor/plugins/shader_editor_plugin.h | 25 +- 12 files changed, 1317 insertions(+), 499 deletions(-) create mode 100644 servers/visual/shader_types.cpp create mode 100644 servers/visual/shader_types.h diff --git a/bin/tests/test_shader_lang.cpp b/bin/tests/test_shader_lang.cpp index 6c8132122ce..1a677bcbe2e 100644 --- a/bin/tests/test_shader_lang.cpp +++ b/bin/tests/test_shader_lang.cpp @@ -39,7 +39,7 @@ #include "servers/visual/shader_language.h" //#include "drivers/gles2/shader_compiler_gles2.h" -#if 0 + typedef ShaderLanguage SL; namespace TestShaderLang { @@ -57,50 +57,54 @@ static String _mktab(int p_level) { static String _typestr(SL::DataType p_type) { - switch(p_type) { - - case SL::TYPE_VOID: return "void"; - case SL::TYPE_BOOL: return "bool"; - case SL::TYPE_FLOAT: return "float"; - case SL::TYPE_VEC2: return "vec2"; - case SL::TYPE_VEC3: return "vec3"; - case SL::TYPE_VEC4: return "vec4"; - case SL::TYPE_MAT3: return "mat3"; - case SL::TYPE_MAT4: return "mat4"; - case SL::TYPE_TEXTURE: return "texture"; - case SL::TYPE_CUBEMAP: return "cubemap"; - default: {} - } + return ShaderLanguage::get_datatype_name(p_type); return ""; } + +static String _prestr(SL::DataPrecision p_pres) { + + + switch(p_pres) { + case SL::PRECISION_LOWP: return "lowp "; + case SL::PRECISION_MEDIUMP: return "mediump "; + case SL::PRECISION_HIGHP: return "highp "; + case SL::PRECISION_DEFAULT: return ""; + } + return ""; +} + + static String _opstr(SL::Operator p_op) { - switch(p_op) { - case SL::OP_ASSIGN: return "="; - case SL::OP_ADD: return "+"; - case SL::OP_SUB: return "-"; - case SL::OP_MUL: return "*"; - case SL::OP_DIV: return "/"; - case SL::OP_ASSIGN_ADD: return "+="; - case SL::OP_ASSIGN_SUB: return "-="; - case SL::OP_ASSIGN_MUL: return "*="; - case SL::OP_ASSIGN_DIV: return "/="; - case SL::OP_NEG: return "-"; - case SL::OP_NOT: return "!"; - case SL::OP_CMP_EQ: return "=="; - case SL::OP_CMP_NEQ: return "!="; - case SL::OP_CMP_LEQ: return "<="; - case SL::OP_CMP_GEQ: return ">="; - case SL::OP_CMP_LESS: return "<"; - case SL::OP_CMP_GREATER: return ">"; - case SL::OP_CMP_OR: return "||"; - case SL::OP_CMP_AND: return "&&"; - default: return ""; - } + return ShaderLanguage::get_operator_text(p_op); - return ""; + +} + + +static String get_constant_text(SL::DataType p_type, const Vector& p_values) { + + switch(p_type) { + case SL::TYPE_BOOL: return p_values[0].boolean?"true":"false"; + case SL::TYPE_BVEC2: return String()+"bvec2("+(p_values[0].boolean?"true":"false")+(p_values[1].boolean?"true":"false")+")"; + case SL::TYPE_BVEC3: return String()+"bvec3("+(p_values[0].boolean?"true":"false")+","+(p_values[1].boolean?"true":"false")+","+(p_values[2].boolean?"true":"false")+")"; + case SL::TYPE_BVEC4: return String()+"bvec4("+(p_values[0].boolean?"true":"false")+","+(p_values[1].boolean?"true":"false")+","+(p_values[2].boolean?"true":"false")+","+(p_values[3].boolean?"true":"false")+")"; + case SL::TYPE_INT: return rtos(p_values[0].sint); + case SL::TYPE_IVEC2: return String()+"ivec2("+rtos(p_values[0].sint)+","+rtos(p_values[1].sint)+")"; + case SL::TYPE_IVEC3: return String()+"ivec3("+rtos(p_values[0].sint)+","+rtos(p_values[1].sint)+","+rtos(p_values[2].sint)+")"; + case SL::TYPE_IVEC4: return String()+"ivec4("+rtos(p_values[0].sint)+","+rtos(p_values[1].sint)+","+rtos(p_values[2].sint)+","+rtos(p_values[3].sint)+")"; + case SL::TYPE_UINT: return rtos(p_values[0].real); + case SL::TYPE_UVEC2: return String()+"uvec2("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+")"; + case SL::TYPE_UVEC3: return String()+"uvec3("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+")"; + case SL::TYPE_UVEC4: return String()+"uvec4("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+","+rtos(p_values[3].real)+")"; + case SL::TYPE_FLOAT: return rtos(p_values[0].real); + case SL::TYPE_VEC2: return String()+"vec2("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+")"; + case SL::TYPE_VEC3: return String()+"vec3("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+")"; + case SL::TYPE_VEC4: return String()+"vec4("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+","+rtos(p_values[3].real)+")"; + default: ERR_FAIL_V(String()); + } } static String dump_node_code(SL::Node *p_node,int p_level) { @@ -109,18 +113,48 @@ static String dump_node_code(SL::Node *p_node,int p_level) { switch(p_node->type) { - case SL::Node::TYPE_PROGRAM: { + case SL::Node::TYPE_SHADER: { - SL::ProgramNode *pnode=(SL::ProgramNode*)p_node; + SL::ShaderNode *pnode=(SL::ShaderNode*)p_node; - for(Map::Element *E=pnode->uniforms.front();E;E=E->next()) { + for(Map::Element *E=pnode->uniforms.front();E;E=E->next()) { String ucode="uniform "; - ucode+=_typestr(E->get().type)+"="+String(E->get().default_value)+"\n"; - code+=ucode; + ucode+=_prestr(E->get().precission); + ucode+=_typestr(E->get().type); + ucode+=" "+String(E->key()); + + if (E->get().default_value.size()) { + ucode+=" = "+get_constant_text(E->get().type,E->get().default_value); + } + + static const char*hint_name[SL::ShaderNode::Uniform::HINT_MAX]={ + "", + "color", + "range", + "albedo", + "normal", + "black", + "white" + }; + + if (E->get().hint) + ucode+=" : "+String(hint_name[E->get().hint]); + + code+=ucode+"\n"; } + for(Map::Element *E=pnode->varyings.front();E;E=E->next()) { + + String vcode="varying "; + vcode+=_prestr(E->get().precission); + vcode+=_typestr(E->get().type); + vcode+=" "+String(E->key()); + + code+=vcode+"\n"; + + } for(int i=0;ifunctions.size();i++) { SL::FunctionNode *fnode=pnode->functions[i].function; @@ -131,16 +165,15 @@ static String dump_node_code(SL::Node *p_node,int p_level) { if (i>0) header+=", "; - header+=_typestr(fnode->arguments[i].type)+" "+fnode->arguments[i].name; + header+=_prestr(fnode->arguments[i].precision)+_typestr(fnode->arguments[i].type)+" "+fnode->arguments[i].name; } - header+=") {\n"; + header+=")\n"; code+=header; code+=dump_node_code(fnode->body,p_level+1); - code+="}\n"; } - code+=dump_node_code(pnode->body,p_level); + //code+=dump_node_code(pnode->body,p_level); } break; case SL::Node::TYPE_FUNCTION: { @@ -149,15 +182,23 @@ static String dump_node_code(SL::Node *p_node,int p_level) { SL::BlockNode *bnode=(SL::BlockNode*)p_node; //variables - for(Map::Element *E=bnode->variables.front();E;E=E->next()) { + code+=_mktab(p_level-1)+"{\n"; + for(Map::Element *E=bnode->variables.front();E;E=E->next()) { - code+=_mktab(p_level)+_typestr(E->value())+" "+E->key()+";\n"; + code+=_mktab(p_level)+_prestr(E->get().precision)+_typestr(E->get().type)+" "+E->key()+";\n"; } for(int i=0;istatements.size();i++) { - code+=_mktab(p_level)+dump_node_code(bnode->statements[i],p_level)+";\n"; + String scode = dump_node_code(bnode->statements[i],p_level); + + if (bnode->statements[i]->type==SL::Node::TYPE_CONTROL_FLOW || bnode->statements[i]->type==SL::Node::TYPE_CONTROL_FLOW) { + code+=scode; //use directly + } else { + code+=_mktab(p_level)+scode+";\n"; + } } + code+=_mktab(p_level-1)+"}\n"; } break; @@ -168,18 +209,7 @@ static String dump_node_code(SL::Node *p_node,int p_level) { } break; case SL::Node::TYPE_CONSTANT: { SL::ConstantNode *cnode=(SL::ConstantNode*)p_node; - switch(cnode->datatype) { - - - case SL::TYPE_BOOL: code=cnode->value.operator bool()?"true":"false"; break; - case SL::TYPE_FLOAT: code=cnode->value; break; - case SL::TYPE_VEC2: { Vector2 v = cnode->value; code="vec2("+rtos(v.x)+", "+rtos(v.y)+")"; } break; - case SL::TYPE_VEC3: { Vector3 v = cnode->value; code="vec3("+rtos(v.x)+", "+rtos(v.y)+", "+rtos(v.z)+")"; } break; - case SL::TYPE_VEC4: { Plane v = cnode->value; code="vec4("+rtos(v.normal.x)+", "+rtos(v.normal.y)+", "+rtos(v.normal.z)+", "+rtos(v.d)+")"; } break; - case SL::TYPE_MAT3: { Matrix3 x = cnode->value; code="mat3( vec3("+rtos(x.get_axis(0).x)+", "+rtos(x.get_axis(0).y)+", "+rtos(x.get_axis(0).z)+"), vec3("+rtos(x.get_axis(1).x)+", "+rtos(x.get_axis(1).y)+", "+rtos(x.get_axis(1).z)+"), vec3("+rtos(x.get_axis(2).x)+", "+rtos(x.get_axis(2).y)+", "+rtos(x.get_axis(2).z)+"))"; } break; - case SL::TYPE_MAT4: { Transform x = cnode->value; code="mat4( vec3("+rtos(x.basis.get_axis(0).x)+", "+rtos(x.basis.get_axis(0).y)+", "+rtos(x.basis.get_axis(0).z)+"), vec3("+rtos(x.basis.get_axis(1).x)+", "+rtos(x.basis.get_axis(1).y)+", "+rtos(x.basis.get_axis(1).z)+"), vec3("+rtos(x.basis.get_axis(2).x)+", "+rtos(x.basis.get_axis(2).y)+", "+rtos(x.basis.get_axis(2).z)+"), vec3("+rtos(x.origin.x)+", "+rtos(x.origin.y)+", "+rtos(x.origin.z)+"))"; } break; - default: code="value.get_type())+" ("+itos(cnode->datatype)+">"; - } + return get_constant_text(cnode->datatype,cnode->values); } break; case SL::Node::TYPE_OPERATOR: { @@ -193,28 +223,25 @@ static String dump_node_code(SL::Node *p_node,int p_level) { case SL::OP_ASSIGN_SUB: case SL::OP_ASSIGN_MUL: case SL::OP_ASSIGN_DIV: + case SL::OP_ASSIGN_SHIFT_LEFT: + case SL::OP_ASSIGN_SHIFT_RIGHT: + case SL::OP_ASSIGN_MOD: + case SL::OP_ASSIGN_BIT_AND: + case SL::OP_ASSIGN_BIT_OR: + case SL::OP_ASSIGN_BIT_XOR: code=dump_node_code(onode->arguments[0],p_level)+_opstr(onode->op)+dump_node_code(onode->arguments[1],p_level); break; - - case SL::OP_ADD: - case SL::OP_SUB: - case SL::OP_MUL: - case SL::OP_DIV: - case SL::OP_CMP_EQ: - case SL::OP_CMP_NEQ: - case SL::OP_CMP_LEQ: - case SL::OP_CMP_GEQ: - case SL::OP_CMP_LESS: - case SL::OP_CMP_GREATER: - case SL::OP_CMP_OR: - case SL::OP_CMP_AND: - - code="("+dump_node_code(onode->arguments[0],p_level)+_opstr(onode->op)+dump_node_code(onode->arguments[1],p_level)+")"; - break; - case SL::OP_NEG: + case SL::OP_BIT_INVERT: + case SL::OP_NEGATE: case SL::OP_NOT: + case SL::OP_DECREMENT: + case SL::OP_INCREMENT: code=_opstr(onode->op)+dump_node_code(onode->arguments[0],p_level); break; + case SL::OP_POST_DECREMENT: + case SL::OP_POST_INCREMENT: + code=dump_node_code(onode->arguments[0],p_level)+_opstr(onode->op); + break; case SL::OP_CALL: case SL::OP_CONSTRUCT: code=dump_node_code(onode->arguments[0],p_level)+"("; @@ -225,7 +252,12 @@ static String dump_node_code(SL::Node *p_node,int p_level) { } code+=")"; break; - default: {} + default: { + + code="("+dump_node_code(onode->arguments[0],p_level)+_opstr(onode->op)+dump_node_code(onode->arguments[1],p_level)+")"; + break; + + } } } break; @@ -233,20 +265,19 @@ static String dump_node_code(SL::Node *p_node,int p_level) { SL::ControlFlowNode *cfnode=(SL::ControlFlowNode*)p_node; if (cfnode->flow_op==SL::FLOW_OP_IF) { - code+="if ("+dump_node_code(cfnode->statements[0],p_level)+") {\n"; - code+=dump_node_code(cfnode->statements[1],p_level+1); - if (cfnode->statements.size()==3) { + code+=_mktab(p_level)+"if ("+dump_node_code(cfnode->expressions[0],p_level)+")\n"; + code+=dump_node_code(cfnode->blocks[0],p_level+1); + if (cfnode->blocks.size()==2) { - code+="} else {\n"; - code+=dump_node_code(cfnode->statements[2],p_level+1); + code+=_mktab(p_level)+"else\n"; + code+=dump_node_code(cfnode->blocks[1],p_level+1); } - code+="}\n"; } else if (cfnode->flow_op==SL::FLOW_OP_RETURN) { - if (cfnode->statements.size()) { - code="return "+dump_node_code(cfnode->statements[0],p_level); + if (cfnode->blocks.size()) { + code="return "+dump_node_code(cfnode->blocks[0],p_level); } else { code="return"; } @@ -264,16 +295,14 @@ static String dump_node_code(SL::Node *p_node,int p_level) { } -static Error recreate_code(void *p_str,SL::ProgramNode *p_program) { +static Error recreate_code(void *p_str,SL::ShaderNode *p_program) { + - print_line("recr"); String *str=(String*)p_str; *str=dump_node_code(p_program,0); return OK; - - } @@ -283,6 +312,7 @@ MainLoop* test() { if (cmdlargs.empty()) { //try editor! + print_line("usage: godot -test shader_lang "); return NULL; } @@ -303,50 +333,29 @@ MainLoop* test() { code+=c; } - int errline; - int errcol; - String error; - print_line(SL::lex_debug(code)); - Error err = SL::compile(code,ShaderLanguage::SHADER_MATERIAL_FRAGMENT,NULL,NULL,&error,&errline,&errcol); + SL sl; + print_line("tokens:\n\n"+sl.token_debug(code)); + + Map > dt; + dt["fragment"]["ALBEDO"]=SL::TYPE_VEC3; + + Set rm; + rm.insert("popo"); + + Error err = sl.compile(code,dt,rm); if (err) { - print_line("Error: "+itos(errline)+":"+itos(errcol)+" "+error); + print_line("Error at line: "+rtos(sl.get_error_line())+": "+sl.get_error_text()); return NULL; + } else { + String code; + recreate_code(&code,sl.get_shader()); + print_line("code:\n\n"+code); } - print_line("Compile OK! - pretty printing"); - - String rcode; - err = SL::compile(code,ShaderLanguage::SHADER_MATERIAL_FRAGMENT,recreate_code,&rcode,&error,&errline,&errcol); - - if (!err) { - print_line(rcode); - } -#if 0 - ShaderCompilerGLES2 comp; - String codeline,globalsline; - SL::VarInfo vi; - vi.name="mongs"; - vi.type=SL::TYPE_VEC3; - - - ShaderCompilerGLES2::Flags fl; - comp.compile(code,ShaderLanguage::SHADER_MATERIAL_FRAGMENT,codeline,globalsline,fl); -#endif - return NULL; -} - -} -#endif - -typedef ShaderLanguage SL; - -namespace TestShaderLang { - -MainLoop* test() { - return NULL; } + } diff --git a/scene/register_scene_types.cpp b/scene/register_scene_types.cpp index 1bff6801fe5..ad845555a46 100644 --- a/scene/register_scene_types.cpp +++ b/scene/register_scene_types.cpp @@ -548,7 +548,7 @@ void register_scene_types() { // ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); // ObjectTypeDB::register_type(); - ObjectTypeDB::register_type(); + ObjectTypeDB::register_type(); ObjectTypeDB::add_compatibility_type("Shader","MaterialShader"); ObjectTypeDB::add_compatibility_type("ParticleSystemMaterial","FixedMaterial"); ObjectTypeDB::add_compatibility_type("UnshadedMaterial","FixedMaterial"); diff --git a/scene/resources/shader.cpp b/scene/resources/shader.cpp index b6c8fcf7a17..8889d1b6f87 100644 --- a/scene/resources/shader.cpp +++ b/scene/resources/shader.cpp @@ -131,7 +131,7 @@ void Shader::_bind_methods() { ADD_PROPERTY( PropertyInfo(Variant::STRING, "code",PROPERTY_HINT_NONE,"",PROPERTY_USAGE_NOEDITOR), _SCS("set_code"), _SCS("get_code") ); - BIND_CONSTANT( MODE_MATERIAL ); + BIND_CONSTANT( MODE_SPATIAL); BIND_CONSTANT( MODE_CANVAS_ITEM ); BIND_CONSTANT( MODE_POST_PROCESS ); diff --git a/scene/resources/shader.h b/scene/resources/shader.h index 988305b6540..eddb98333d0 100644 --- a/scene/resources/shader.h +++ b/scene/resources/shader.h @@ -42,7 +42,7 @@ class Shader : public Resource { public: enum Mode { - MODE_MATERIAL, + MODE_SPATIAL, MODE_CANVAS_ITEM, MODE_POST_PROCESS, MODE_MAX @@ -100,13 +100,13 @@ public: VARIANT_ENUM_CAST( Shader::Mode ); -class MaterialShader : public Shader { +class SpatialShader : public Shader { - OBJ_TYPE(MaterialShader,Shader); + OBJ_TYPE(SpatialShader,Shader); public: - MaterialShader() : Shader(MODE_MATERIAL) {}; + SpatialShader() : Shader(MODE_SPATIAL) {}; }; class CanvasItemShader : public Shader { diff --git a/servers/register_server_types.cpp b/servers/register_server_types.cpp index 06eaa4d1224..19ee3a48c79 100644 --- a/servers/register_server_types.cpp +++ b/servers/register_server_types.cpp @@ -36,7 +36,7 @@ #include "spatial_sound_server.h" #include "spatial_sound_2d_server.h" #include "script_debugger_remote.h" - +#include "visual/shader_types.h" static void _debugger_get_resource_usage(List* r_usage) { List tinfo; @@ -55,6 +55,8 @@ static void _debugger_get_resource_usage(Listadd_singleton( Globals::Singleton("VisualServer",VisualServer::get_singleton()) ); @@ -70,6 +72,8 @@ void register_server_types() { Globals::get_singleton()->add_singleton( Globals::Singleton("SpatialSound2DServer",SpatialSound2DServer::get_singleton()) ); Globals::get_singleton()->add_singleton( Globals::Singleton("SS2D",SpatialSound2DServer::get_singleton()) ); + shader_types = memnew( ShaderTypes ); + ObjectTypeDB::register_virtual_type(); ObjectTypeDB::register_virtual_type(); @@ -87,5 +91,5 @@ void register_server_types() { void unregister_server_types(){ - + memdelete( shader_types ); } diff --git a/servers/visual/shader_language.cpp b/servers/visual/shader_language.cpp index 287293a4379..1a9912e3fb7 100644 --- a/servers/visual/shader_language.cpp +++ b/servers/visual/shader_language.cpp @@ -44,14 +44,71 @@ static bool _is_hex(CharType c) { return (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F'); } + +String ShaderLanguage::get_operator_text(Operator p_op) { + + static const char* op_names[OP_MAX]={"==", + "!=", + "<", + "<=", + ">", + ">=", + "&&", + "||", + "!", + "-", + "+", + "-", + "*", + "/", + "%", + "<<", + ">>", + "=", + "+=", + "-=", + "*=", + "/=", + "%=", + "<<=", + ">>=", + "&=", + "|=", + "^=", + "&", + "|", + "^", + "~", + "++" + "--", + "()", + "construct"}; + + return op_names[p_op]; + +} + + const char * ShaderLanguage::token_names[TK_MAX]={ "EMPTY", "IDENTIFIER", "TRUE", "FALSE", "REAL_CONSTANT", + "INT_CONSTANT", "TYPE_VOID", "TYPE_BOOL", + "TYPE_BVEC2", + "TYPE_BVEC3", + "TYPE_BVEC4", + "TYPE_INT", + "TYPE_IVEC2", + "TYPE_IVEC3", + "TYPE_IVEC4", + "TYPE_UINT", + "TYPE_UVEC2", + "TYPE_UVEC3", + "TYPE_UVEC4", "TYPE_FLOAT", "TYPE_VEC2", "TYPE_VEC3", @@ -59,9 +116,13 @@ const char * ShaderLanguage::token_names[TK_MAX]={ "TYPE_MAT2", "TYPE_MAT3", "TYPE_MAT4", - "TYPE_TEXTURE", - "TYPE_CUBEMAP", - "TYPE_COLOR", + "TYPE_SAMPLER2D", + "TYPE_ISAMPLER2D", + "TYPE_USAMPLER2D", + "TYPE_SAMPLERCUBE", + "PRECISION_LOW", + "PRECISION_MID", + "PRECISION_HIGH", "OP_EQUAL", "OP_NOT_EQUAL", "OP_LESS", @@ -75,14 +136,35 @@ const char * ShaderLanguage::token_names[TK_MAX]={ "OP_SUB", "OP_MUL", "OP_DIV", - "OP_NEG", + "OP_MOD", + "OP_SHIFT_LEFT", + "OP_SHIFT_RIGHT", "OP_ASSIGN", "OP_ASSIGN_ADD", "OP_ASSIGN_SUB", "OP_ASSIGN_MUL", "OP_ASSIGN_DIV", + "OP_ASSIGN_MOD", + "OP_ASSIGN_SHIFT_LEFT", + "OP_ASSIGN_SHIFT_RIGHT", + "OP_ASSIGN_BIT_AND", + "OP_ASSIGN_BIT_OR", + "OP_ASSIGN_BIT_XOR", + "OP_BIT_AND", + "OP_BIT_OR", + "OP_BIT_XOR", + "OP_BIT_INVERT", + "OP_INCREMENT", + "OP_DECREMENT", "CF_IF", "CF_ELSE", + "CF_FOR", + "CF_WHILE", + "CF_DO", + "CF_SWITCH", + "CF_CASE", + "CF_BREAK", + "CF_CONTINUE", "CF_RETURN", "BRACKET_OPEN", "BRACKET_CLOSE", @@ -90,11 +172,23 @@ const char * ShaderLanguage::token_names[TK_MAX]={ "CURLY_BRACKET_CLOSE", "PARENTHESIS_OPEN", "PARENTHESIS_CLOSE", + "QUESTION", "COMMA", + "COLON", "SEMICOLON", "PERIOD", "UNIFORM", + "VARYING", + "RENDER_MODE", + "HINT_WHITE_TEXTURE", + "HINT_BLACK_TEXTURE", + "HINT_NORMAL_TEXTURE", + "HINT_ALBEDO_TEXTURE", + "HINT_COLOR", + "HINT_RANGE", + "CURSOR", "ERROR", + "EOF", }; String ShaderLanguage::get_token_text(Token p_token) { @@ -104,6 +198,8 @@ String ShaderLanguage::get_token_text(Token p_token) { name+="("+rtos(p_token.constant)+")"; } else if (p_token.type==TK_IDENTIFIER) { name+="("+String(p_token.text)+")"; + } else if (p_token.type==TK_ERROR) { + name+="("+String(p_token.text)+")"; } return name; @@ -115,18 +211,79 @@ ShaderLanguage::Token ShaderLanguage::_make_token(TokenType p_type,const StringN tk.type=p_type; tk.text=p_text; tk.line=tk_line; + if (tk.type==TK_ERROR) { + _set_error(p_text); + } return tk; } +const ShaderLanguage::KeyWord ShaderLanguage::keyword_list[]={ + {TK_TRUE,"true"}, + {TK_FALSE,"false"}, + {TK_TYPE_VOID,"void"}, + {TK_TYPE_BOOL,"bool"}, + {TK_TYPE_BVEC2,"bvec2"}, + {TK_TYPE_BVEC3,"bvec3"}, + {TK_TYPE_BVEC4,"bvec4"}, + {TK_TYPE_INT,"int"}, + {TK_TYPE_IVEC2,"ivec2"}, + {TK_TYPE_IVEC3,"ivec3"}, + {TK_TYPE_IVEC4,"ivec4"}, + {TK_TYPE_UINT,"uint"}, + {TK_TYPE_UVEC2,"uvec2"}, + {TK_TYPE_UVEC3,"uvec3"}, + {TK_TYPE_UVEC4,"uvec4"}, + {TK_TYPE_FLOAT,"float"}, + {TK_TYPE_VEC2,"vec2"}, + {TK_TYPE_VEC3,"vec3"}, + {TK_TYPE_VEC4,"vec4"}, + {TK_TYPE_MAT2,"mat2"}, + {TK_TYPE_MAT3,"mat3"}, + {TK_TYPE_MAT4,"mat4"}, + {TK_TYPE_SAMPLER2D,"sampler2D"}, + {TK_TYPE_ISAMPLER2D,"isampler2D"}, + {TK_TYPE_USAMPLER2D,"usampler2D"}, + {TK_TYPE_SAMPLERCUBE,"samplerCube"}, + {TK_PRECISION_LOW,"lowp"}, + {TK_PRECISION_MID,"mediump"}, + {TK_PRECISION_HIGH,"highp"}, + {TK_CF_IF,"if"}, + {TK_CF_ELSE,"else"}, + {TK_CF_FOR,"for"}, + {TK_CF_WHILE,"while"}, + {TK_CF_DO,"do"}, + {TK_CF_SWITCH,"switch"}, + {TK_CF_CASE,"case"}, + {TK_CF_BREAK,"break"}, + {TK_CF_CONTINUE,"continue"}, + {TK_CF_RETURN,"return"}, + {TK_UNIFORM,"uniform"}, + {TK_VARYING,"varying"}, + {TK_RENDER_MODE,"render_mode"}, + {TK_HINT_WHITE_TEXTURE,"white"}, + {TK_HINT_BLACK_TEXTURE,"black"}, + {TK_HINT_NORMAL_TEXTURE,"normal"}, + {TK_HINT_ALBEDO_TEXTURE,"albedo"}, + {TK_HINT_COLOR,"color"}, + {TK_HINT_RANGE,"hint_range"}, + + {TK_ERROR,NULL} +}; + + ShaderLanguage::Token ShaderLanguage::_get_token() { -#define GETCHAR(m_idx) ((char_idx tokens; - String error; - int errline,errcol; - if (tokenize(p_code,&tokens,&error,&errline,&errcol)!=OK) - return error; - String ret; - for(int i=0;iarguments[i+1]->type==Node::TYPE_CONSTANT && convert_constant(static_cast(p_func->arguments[i+1]),builtin_func_defs[idx].args[i])) { + //all good + } else if (args[i]!=builtin_func_defs[idx].args[i]) { fail=true; break; } } - if (!fail && argcount<4 && intrinsic_func_defs[idx].args[argcount]!=TYPE_VOID) + if (!fail && argcount<4 && builtin_func_defs[idx].args[argcount]!=TYPE_VOID) fail=true; //make sure the number of arguments matches if (!fail) { - found_intrinsic=true; + if (r_ret_type) - *r_ret_type=intrinsic_func_defs[idx].rettype; + *r_ret_type=builtin_func_defs[idx].rettype; return true; } @@ -1700,8 +1821,24 @@ bool ShaderLanguage::_validate_function_call(BlockNode* p_block, OperatorNode *p } } + if (failed_builtin) { + String err ="Invalid arguments for built-in function: "+String(name)+"("; + for(int i=0;i0) + err+=","; + + if (p_func->arguments[i+1]->type==Node::TYPE_CONSTANT && p_func->arguments[i+1]->get_datatype()==TYPE_INT && static_cast(p_func->arguments[i+1])->values[0].sint<0) { + err+="-"; + } + err+=get_datatype_name(args[i]); + } + err+=")"; + _set_error(err); + return false; + } + #if 0 - if (found_intrinsic) { + if (found_builtin) { if (p_func->op==OP_CONSTRUCT && all_const) { @@ -1769,22 +1906,22 @@ bool ShaderLanguage::_validate_function_call(BlockNode* p_block, OperatorNode *p block=block->parent_block; } + if (name==exclude_function) { + _set_error("Recursion is not allowed"); + return false; + } for(int i=0;ifunctions.size();i++) { - if (shader->functions[i].name==exclude_function) { - _set_error("Recursion is not allowed"); - return false; - } + + if (name != shader->functions[i].name) + continue; if (!shader->functions[i].callable) { _set_error("Function '"+String(name)+" can't be called from source code."); return false; } - if (name != shader->functions[i].name) - continue; - FunctionNode *pfunc = shader->functions[i].function; @@ -1795,7 +1932,10 @@ bool ShaderLanguage::_validate_function_call(BlockNode* p_block, OperatorNode *p for(int i=0;iarguments[i].type) { + + if (get_scalar_type(args[i])==args[i] && p_func->arguments[i+1]->type==Node::TYPE_CONSTANT && convert_constant(static_cast(p_func->arguments[i+1]),pfunc->arguments[i].type)) { + //all good + } else if (args[i]!=pfunc->arguments[i].type) { fail=true; break; } @@ -1811,35 +1951,57 @@ bool ShaderLanguage::_validate_function_call(BlockNode* p_block, OperatorNode *p } -bool ShaderLanguage::_parse_function_arguments(BlockNode* p_block,const Map &p_builtin_types,OperatorNode* p_func) { +bool ShaderLanguage::_parse_function_arguments(BlockNode* p_block,const Map &p_builtin_types,OperatorNode* p_func,int *r_complete_arg) { + TkPos pos = _get_tkpos(); Token tk = _get_token(); + if (tk.type==TK_PARENTHESIS_CLOSE) { + return true; + } + + _set_tkpos(pos);; + while(true) { - if (tk.type==TK_PARENTHESIS_CLOSE) { - return true; + + if (r_complete_arg) { + pos = _get_tkpos(); + tk = _get_token(); + + if (tk.type==TK_CURSOR) { + + *r_complete_arg=p_func->arguments.size()-1; + } else { + + _set_tkpos(pos); + } } - Node *arg= _parse_expression(p_block,p_builtin_types); - if (!arg) + Node *arg= _parse_and_reduce_expression(p_block,p_builtin_types); + + if (!arg) { + return false; + } + p_func->arguments.push_back(arg); tk = _get_token(); + if (tk.type==TK_PARENTHESIS_CLOSE) { - //none - } else if (tk.type==TK_COMMA) { - tk = _get_token(); //next - } else { + + return true; + } else if (tk.type!=TK_COMMA) { // something is broken _set_error("Expected ',' or ')' after argument"); return false; } + } return true; @@ -1886,6 +2048,149 @@ bool ShaderLanguage::is_token_operator(TokenType p_type) { } +bool ShaderLanguage::convert_constant(ConstantNode* p_constant, DataType p_to_type,ConstantNode::Value *p_value) { + + if (p_constant->datatype==p_to_type) { + if (p_value) { + for(int i=0;ivalues.size();i++) { + p_value[i]=p_constant->values[i]; + } + } + return true; + } else if (p_constant->datatype==TYPE_INT && p_to_type==TYPE_FLOAT) { + + if (p_value) { + p_value->real=p_constant->values[0].sint; + } + return true; + } else if (p_constant->datatype==TYPE_UINT && p_to_type==TYPE_FLOAT) { + + if (p_value) { + p_value->real=p_constant->values[0].uint; + } + return true; + } else if (p_constant->datatype==TYPE_INT && p_to_type==TYPE_UINT) { + if (p_constant->values[0].sint<0) { + return false; + } + if (p_value) { + p_value->uint=p_constant->values[0].sint; + } + return true; + } else if (p_constant->datatype==TYPE_UINT && p_to_type==TYPE_INT) { + + if (p_constant->values[0].uint>0x7FFFFFFF) { + return false; + } + if (p_value) { + p_value->sint=p_constant->values[0].uint; + } + return true; + } else + return false; + +} + +bool ShaderLanguage::is_scalar_type(DataType p_type) { + + return p_type==TYPE_BOOL || p_type==TYPE_INT || p_type==TYPE_UINT || p_type==TYPE_FLOAT; +} + +void ShaderLanguage::get_keyword_list(List *r_keywords) { + + Set kws; + + int idx=0; + + while(keyword_list[idx].text) { + + kws.insert(keyword_list[idx].text); + idx++; + } + + idx=0; + + while (builtin_func_defs[idx].name) { + + kws.insert(builtin_func_defs[idx].name); + + idx++; + } + + for(Set::Element *E=kws.front();E;E=E->next()) { + r_keywords->push_back(E->get()); + } +} + +ShaderLanguage::DataType ShaderLanguage::get_scalar_type(DataType p_type) { + + static const DataType scalar_types[]={ + TYPE_VOID, + TYPE_BOOL, + TYPE_BOOL, + TYPE_BOOL, + TYPE_BOOL, + TYPE_INT, + TYPE_INT, + TYPE_INT, + TYPE_INT, + TYPE_UINT, + TYPE_UINT, + TYPE_UINT, + TYPE_UINT, + TYPE_FLOAT, + TYPE_FLOAT, + TYPE_FLOAT, + TYPE_FLOAT, + TYPE_FLOAT, + TYPE_FLOAT, + TYPE_FLOAT, + TYPE_FLOAT, + TYPE_INT, + TYPE_UINT, + TYPE_FLOAT, + }; + + return scalar_types[p_type]; +} + + +bool ShaderLanguage::_get_completable_identifier(BlockNode *p_block,CompletionType p_type,StringName& identifier) { + + identifier=StringName(); + + TkPos pos; + + Token tk = _get_token(); + + if (tk.type==TK_IDENTIFIER) { + identifier=tk.text; + pos = _get_tkpos(); + tk = _get_token(); + } + + if (tk.type==TK_CURSOR) { + + completion_type=p_type; + completion_line=tk_line; + completion_block=p_block; + + pos = _get_tkpos(); + tk = _get_token(); + + if (tk.type==TK_IDENTIFIER) { + identifier=identifier.operator String() + tk.text.operator String(); + } else { + _set_tkpos(pos); + } + return true; + } else if (identifier!=StringName()){ + _set_tkpos(pos); + } + + return false; +} + ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const Map &p_builtin_types) { @@ -1896,14 +2201,17 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const while(true) { Node *expr=NULL; - int pos = char_idx; + TkPos prepos = _get_tkpos(); Token tk = _get_token(); + TkPos pos = _get_tkpos(); + + print_line("in expr: "+get_token_text(tk)); if (tk.type==TK_PARENTHESIS_OPEN) { //handle subexpression - expr = _parse_expression(p_block,p_builtin_types); + expr = _parse_and_reduce_expression(p_block,p_builtin_types); if (!expr) return NULL; @@ -1919,15 +2227,19 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const ConstantNode *constant = alloc_node(); - constant->value=tk.constant; + ConstantNode::Value v; + v.real=tk.constant; + constant->values.push_back(v); constant->datatype=TYPE_FLOAT; expr=constant; - } else if (tk.type==TK_REAL_CONSTANT) { + } else if (tk.type==TK_INT_CONSTANT) { ConstantNode *constant = alloc_node(); - constant->value=int(tk.constant); + ConstantNode::Value v; + v.sint=tk.constant; + constant->values.push_back(v); constant->datatype=TYPE_INT; expr=constant; @@ -1936,7 +2248,9 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const //handle true constant ConstantNode *constant = alloc_node(); - constant->value=true; + ConstantNode::Value v; + v.boolean=true; + constant->values.push_back(v); constant->datatype=TYPE_BOOL; expr=constant; @@ -1944,7 +2258,9 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const //handle false constant ConstantNode *constant = alloc_node(); - constant->value=false; + ConstantNode::Value v; + v.boolean=false; + constant->values.push_back(v); constant->datatype=TYPE_BOOL; expr=constant; @@ -1953,9 +2269,11 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const //make sure void is not used in expression _set_error("Void value not allowed in Expression"); return NULL; - } else if (is_token_precision(tk.type) || is_token_nonvoid_datatype(tk.type)) { + } else if (is_token_nonvoid_datatype(tk.type)) { //basic type constructor + print_line("parse constructor"); + OperatorNode *func = alloc_node(); func->op=OP_CONSTRUCT; @@ -1976,9 +2294,20 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const return NULL; } - if (!_parse_function_arguments(p_block,p_builtin_types,func)) - return NULL; + int carg=-1; + bool ok = _parse_function_arguments(p_block,p_builtin_types,func,&carg); + + if (carg>=0) { + completion_type=COMPLETION_CALL_ARGUMENTS; + completion_line=tk_line; + completion_block=p_block; + completion_function=funcname->name; + completion_argument=carg; + } + + if (!ok) + return NULL; if (!_validate_function_call(p_block,func,&func->return_cache)) { _set_error("No matching constructor found for: '"+String(funcname->name)+"'"); @@ -1986,16 +2315,22 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const } //validate_Function_call() - expr=func; + expr=_reduce_expression(p_block,func); } else if (tk.type==TK_IDENTIFIER) { + _set_tkpos(prepos); + + StringName identifier; + + _get_completable_identifier(p_block,COMPLETION_IDENTIFIER,identifier); + tk=_get_token(); if (tk.type==TK_PARENTHESIS_OPEN) { //a function - StringName name = tk.text; + StringName name = identifier; OperatorNode *func = alloc_node(); func->op=OP_CALL; @@ -2003,9 +2338,22 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const funcname->name=name; func->arguments.push_back(funcname); - if (!_parse_function_arguments(p_block,p_builtin_types,func)) - return NULL; + int carg=-1; + bool ok =_parse_function_arguments(p_block,p_builtin_types,func,&carg); + + + + if (carg>=0) { + completion_type=COMPLETION_CALL_ARGUMENTS; + completion_line=tk_line; + completion_block=p_block; + completion_function=funcname->name; + completion_argument=carg; + } + + if (!ok) + return NULL; if (!_validate_function_call(p_block,func,&func->return_cache)) { _set_error("No matching function found for: '"+String(funcname->name)+"'"); @@ -2016,9 +2364,8 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const } else { //an identifier - char_idx=pos; //rollback - StringName identifier = tk.text; + _set_tkpos(pos); DataType data_type; IdentifierType ident_type; @@ -2044,7 +2391,7 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const } else if (tk.type==TK_OP_ADD) { continue; //this one does nothing - } if (tk.type==TK_OP_SUB || tk.type==TK_OP_NOT || tk.type==TK_OP_BIT_INVERT || tk.type==TK_OP_INCREMENT || tk.type==TK_OP_DECREMENT) { + } else if (tk.type==TK_OP_SUB || tk.type==TK_OP_NOT || tk.type==TK_OP_BIT_INVERT || tk.type==TK_OP_INCREMENT || tk.type==TK_OP_DECREMENT) { Expression e; @@ -2077,19 +2424,23 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const /* OK now see what's NEXT to the operator.. */ while(true) { - int pos = char_idx; + TkPos pos = _get_tkpos(); tk=_get_token(); if (tk.type==TK_PERIOD) { - tk=_get_token(); - if (tk.type!=TK_IDENTIFIER) { + StringName identifier; + if (_get_completable_identifier(p_block,COMPLETION_INDEX,identifier)) { + completion_base=expr->get_datatype(); + } + + if (identifier==StringName()) { _set_error("Expected identifier as member"); return NULL; } DataType dt = expr->get_datatype(); - String ident = tk.text; + String ident = identifier; bool ok=true; DataType member_type; @@ -2242,7 +2593,8 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const } expr=op; } else { - char_idx=pos; //rollback + + _set_tkpos(pos); break; } } @@ -2252,7 +2604,7 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const e.node=expr; expression.push_back(e); - pos = char_idx; + pos = _get_tkpos(); tk = _get_token(); if (is_token_operator(tk.type)) { @@ -2297,12 +2649,12 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const _set_error("Invalid token for operator: "+get_token_text(tk)); return NULL; } - } break; + } expression.push_back(o); } else { - char_idx=pos; //something else, so rollback and end + _set_tkpos(pos); //something else, so rollback and end break; } } @@ -2419,7 +2771,7 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const at+=get_datatype_name(op->arguments[i]->get_datatype()); } - _set_error("Invalid argument to unary operator: "+at); + _set_error("Invalid arguments to unary operator '"+get_operator_text(op->op)+"' :" +at); return NULL; } expression.remove(i+1); @@ -2506,7 +2858,7 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const at+=get_datatype_name(op->arguments[i]->get_datatype()); } - _set_error("Invalid arguments to operator: "+at); + _set_error("Invalid arguments to operator '"+get_operator_text(op->op)+"' :" +at); return NULL; } @@ -2530,45 +2882,88 @@ ShaderLanguage::Node* ShaderLanguage::_reduce_expression(BlockNode *p_block, Sha if (op->op==OP_CONSTRUCT) { + ERR_FAIL_COND_V(op->arguments[0]->type!=Node::TYPE_VARIABLE,p_node); VariableNode *vn = static_cast(op->arguments[0]); - StringName name=vn->name; + // StringName name=vn->name; - if (name=="vec2" || name=="vec3" || name=="vec4") { - Vector values; + DataType base=get_scalar_type(op->get_datatype()); - for(int i=1;iarguments.size();i++) { + Vector values; - op->arguments[i]=_reduce_expression(p_block,op->arguments[i]); - if (op->arguments[i]->type==Node::TYPE_CONSTANT) { - ConstantNode *cn = static_cast(op->arguments[i]); - values.push_back(cn->value); + for(int i=1;iarguments.size();i++) { + + + op->arguments[i]=_reduce_expression(p_block,op->arguments[i]); + if (op->arguments[i]->type==Node::TYPE_CONSTANT) { + ConstantNode *cn = static_cast(op->arguments[i]); + + if (get_scalar_type(cn->datatype)==base) { + + for(int j=0;jvalues.size();j++) { + values.push_back(cn->values[j]); + } + } else if (get_scalar_type(cn->datatype)==cn->datatype) { + + ConstantNode::Value v; + if (!convert_constant(cn,base,&v)) { + return p_node; + } + values.push_back(v); } else { - return p_node; //do not bother, not reducible + return p_node; } - } - ConstantNode *cn=alloc_node(); - - if (name=="vec2") { - cn->datatype=TYPE_VEC2; - cn->value=Vector2(values[0],values[1]); - } else if (name=="vec3") { - cn->datatype=TYPE_VEC3; - cn->value=Vector3(values[0],values[1],values[2]); - } else if (name=="vec4") { - cn->datatype=TYPE_VEC4; - cn->value=Plane(values[0],values[1],values[2],values[3]); } else { - ERR_FAIL_V(p_node); + return p_node; + } + } + + + ConstantNode *cn=alloc_node(); + cn->datatype=op->get_datatype(); + cn->values=values; + return cn; + } else if (op->op==OP_NEGATE) { + + op->arguments[0]=_reduce_expression(p_block,op->arguments[0]); + if (op->arguments[0]->type==Node::TYPE_CONSTANT) { + + ConstantNode *cn = static_cast(op->arguments[0]); + + DataType base=get_scalar_type(cn->datatype); + + Vector values; + + for(int i=0;ivalues.size();i++) { + + ConstantNode::Value nv; + switch(base) { + case TYPE_BOOL: { + nv.boolean=!cn->values[i].boolean; + } break; + case TYPE_INT: { + nv.sint=-cn->values[i].sint; + } break; + case TYPE_UINT: { + nv.uint=-cn->values[i].uint; + } break; + case TYPE_FLOAT: { + nv.real=-cn->values[i].real; + } break; + default: {} + } + + values.push_back(nv); } + + cn->values=values; return cn; } } - return p_node; @@ -2579,6 +2974,8 @@ ShaderLanguage::Node* ShaderLanguage::_parse_and_reduce_expression(BlockNode *p_ ShaderLanguage::Node* expr = _parse_expression(p_block,p_builtin_types); + if (!expr) //errored + return NULL; expr = _reduce_expression(p_block,expr); @@ -2592,7 +2989,7 @@ Error ShaderLanguage::_parse_block(BlockNode* p_block,const Mapvariables[name]=var; tk = _get_token(); @@ -2670,6 +3068,7 @@ Error ShaderLanguage::_parse_block(BlockNode* p_block,const Map(); block->parent_block=p_block; _parse_block(block,p_builtin_types,false,p_can_break,p_can_continue); + p_block->statements.push_back(block); } else if (tk.type==TK_CF_IF) { //if () {} tk = _get_token(); @@ -2692,29 +3091,34 @@ Error ShaderLanguage::_parse_block(BlockNode* p_block,const Map(); block->parent_block=p_block; + cf->expressions.push_back(n); cf->blocks.push_back(block); + p_block->statements.push_back(cf); - Error err=_parse_block(p_block,p_builtin_types,true,p_can_break,p_can_continue); + Error err=_parse_block(block,p_builtin_types,true,p_can_break,p_can_continue); - pos=char_idx; + pos=_get_tkpos(); tk = _get_token(); if (tk.type==TK_CF_ELSE) { block = alloc_node(); block->parent_block=p_block; cf->blocks.push_back(block); - err=_parse_block(p_block,p_builtin_types,true,p_can_break,p_can_continue); + err=_parse_block(block,p_builtin_types,true,p_can_break,p_can_continue); } else { - char_idx=pos; //rollback + _set_tkpos(pos); //rollback } } else { //nothng else, so expression - char_idx=pos; //rollback - _parse_and_reduce_expression(p_block,p_builtin_types); + _set_tkpos(pos); //rollback + Node*expr = _parse_and_reduce_expression(p_block,p_builtin_types); + if (!expr) + return ERR_PARSE_ERROR; + p_block->statements.push_back(expr); tk = _get_token(); if (tk.type!=TK_SEMICOLON) { @@ -2742,8 +3146,11 @@ Error ShaderLanguage::_parse_shader(const Map< StringName, Mapuniforms[name]=uniform; //todo parse default value tk = _get_token(); @@ -2835,19 +3241,35 @@ Error ShaderLanguage::_parse_shader(const Map< StringName, Map(expr)->value; + ConstantNode* cn = static_cast(expr); + + uniform.default_value.resize(cn->values.size()); + + if (!convert_constant(cn,uniform.type,uniform.default_value.ptr())) { + _set_error("Can't convert constant to "+get_datatype_name(uniform.type)); + return ERR_PARSE_ERROR; + } tk = _get_token(); } if (tk.type==TK_COLON) { //hint + tk = _get_token(); if (tk.type==TK_HINT_WHITE_TEXTURE) { - uniform.hint=ShaderNode::Uniform::HINT_WHITE_TEXTURE; + uniform.hint=ShaderNode::Uniform::HINT_WHITE; } else if (tk.type==TK_HINT_BLACK_TEXTURE) { - uniform.hint=ShaderNode::Uniform::HINT_BLACK_TEXTURE; + uniform.hint=ShaderNode::Uniform::HINT_BLACK; } else if (tk.type==TK_HINT_NORMAL_TEXTURE) { - uniform.hint=ShaderNode::Uniform::HINT_NORMAL_TEXTURE; + uniform.hint=ShaderNode::Uniform::HINT_NORMAL; + } else if (tk.type==TK_HINT_ALBEDO_TEXTURE) { + uniform.hint=ShaderNode::Uniform::HINT_ALBEDO; + } else if (tk.type==TK_HINT_COLOR) { + if (type!=TYPE_VEC4) { + _set_error("Color hint is for vec4 only"); + return ERR_PARSE_ERROR; + } + uniform.hint=ShaderNode::Uniform::HINT_COLOR; } else if (tk.type==TK_HINT_RANGE) { uniform.hint=ShaderNode::Uniform::HINT_RANGE; @@ -2913,17 +3335,23 @@ Error ShaderLanguage::_parse_shader(const Map< StringName, Mapuniforms[name]=uniform; + if (tk.type!=TK_SEMICOLON) { _set_error("Expected ';'"); return ERR_PARSE_ERROR; @@ -2965,15 +3393,14 @@ Error ShaderLanguage::_parse_shader(const Map< StringName, Map(),name)) { _set_error("Redefinition of '"+String(name)+"'"); @@ -3113,7 +3540,6 @@ Error ShaderLanguage::compile(const String& p_code, const Map< StringName, Map(); Error err = _parse_shader(p_functions,p_render_modes); @@ -3122,6 +3548,268 @@ Error ShaderLanguage::compile(const String& p_code, const Map< StringName, Map > &p_functions,const Set& p_render_modes,List* r_options,String& r_call_hint) { + + clear(); + + code=p_code; + + nodes=NULL; + + shader = alloc_node(); + Error err = _parse_shader(p_functions,p_render_modes); + + switch(completion_type) { + + case COMPLETION_NONE: { + //do none + return ERR_PARSE_ERROR; + } break; + case COMPLETION_RENDER_MODE: { + for(const Set::Element *E=p_render_modes.front();E;E=E->next()) { + + r_options->push_back(E->get()); + } + + return OK; + } break; + case COMPLETION_MAIN_FUNCTION: { + print_line("complete main func"); + for(const Map< StringName, Map >::Element *E=p_functions.front();E;E=E->next()) { + + r_options->push_back(E->key()); + } + + return OK; + } break; + case COMPLETION_IDENTIFIER: + case COMPLETION_FUNCTION_CALL: { + + print_line("complete identifier"); + bool comp_ident=completion_type==COMPLETION_IDENTIFIER; + Set matches; + + StringName skip_function; + + BlockNode *block=completion_block; + + + while(block) { + + if (comp_ident) { + for (const Map::Element *E=block->variables.front();E;E=E->next()) { + + if (E->get().linekey()); + } + } + } + + + if (block->parent_function) { + if (comp_ident) { + for(int i=0;iparent_function->arguments.size();i++) { + matches.insert(block->parent_function->arguments[i].name); + } + } + skip_function=block->parent_function->name; + } + block=block->parent_block; + } + + if (comp_ident && skip_function!=StringName() && p_functions.has(skip_function)) { + + for (Map::Element *E=p_functions[skip_function].front();E;E=E->next()) { + matches.insert(E->key()); + } + } + + if (comp_ident) { + for (const Map::Element *E=shader->varyings.front();E;E=E->next()) { + matches.insert(E->key()); + + } + for (const Map::Element *E=shader->uniforms.front();E;E=E->next()) { + matches.insert(E->key()); + } + + + } + + for(int i=0;ifunctions.size();i++) { + if (!shader->functions[i].callable || shader->functions[i].name==skip_function) + continue; + matches.insert(String(shader->functions[i].name)+"("); + } + + int idx=0; + + while (builtin_func_defs[idx].name) { + + matches.insert(String(builtin_func_defs[idx].name)+"("); + idx++; + } + + for(Set::Element *E=matches.front();E;E=E->next()) { + r_options->push_back(E->get()); + } + + return OK; + + } break; + case COMPLETION_CALL_ARGUMENTS: { + + print_line("complete callargs"); + for(int i=0;ifunctions.size();i++) { + if (!shader->functions[i].callable) + continue; + if (shader->functions[i].name==completion_function) { + + String calltip; + + calltip+=get_datatype_name( shader->functions[i].function->return_type ); + calltip+=" "; + calltip+=shader->functions[i].name; + calltip+="("; + + for(int j=0;jfunctions[i].function->arguments.size();j++) { + + if (j>0) + calltip+=", "; + else + calltip+=" "; + + if (j==completion_argument) { + calltip+=CharType(0xFFFF); + } + + calltip+=get_datatype_name(shader->functions[i].function->arguments[j].type); + calltip+=" "; + calltip+=shader->functions[i].function->arguments[j].name; + + if (j==completion_argument) { + calltip+=CharType(0xFFFF); + } + + } + + if (shader->functions[i].function->arguments.size()) + calltip+=" "; + calltip+=")"; + + r_call_hint=calltip; + return OK; + } + + } + + int idx=0; + + String calltip; + + while (builtin_func_defs[idx].name) { + + if (completion_function==builtin_func_defs[idx].name) { + + if (calltip.length()) + calltip+="\n"; + + calltip+=get_datatype_name( builtin_func_defs[idx].rettype ); + calltip+=" "; + calltip+=builtin_func_defs[idx].name; + calltip+="("; + + bool found_arg=false; + for(int i=0;i<4;i++) { + + if (builtin_func_defs[idx].args[i]==TYPE_VOID) + break; + + if (i>0) + calltip+=", "; + else + calltip+=" "; + + if (i==completion_argument) { + calltip+=CharType(0xFFFF); + } + + calltip+=get_datatype_name(builtin_func_defs[idx].args[i]); + + if (i==completion_argument) { + calltip+=CharType(0xFFFF); + } + + found_arg=true; + + } + + if (found_arg) + calltip+=" "; + calltip+=")"; + + + } + idx++; + } + + r_call_hint=calltip; + + return OK; + + } break; + case COMPLETION_INDEX: { + + const char colv[4]={'r','g','b','a'}; + const char coordv[4]={'x','y','z','w'}; + + + int limit=0; + + switch(completion_base) { + case TYPE_BVEC2: + case TYPE_IVEC2: + case TYPE_UVEC2: + case TYPE_VEC2: { + limit=2; + + } break; + case TYPE_BVEC3: + case TYPE_IVEC3: + case TYPE_UVEC3: + case TYPE_VEC3: { + + limit=3; + + } break; + case TYPE_BVEC4: + case TYPE_IVEC4: + case TYPE_UVEC4: + case TYPE_VEC4: { + + limit=4; + + } break; + case TYPE_MAT2: limit=2; break; + case TYPE_MAT3: limit=3; break; + case TYPE_MAT4: limit=4; break; + default: {} + } + + for(int i=0;ipush_back(String::chr(colv[i])); + r_options->push_back(String::chr(coordv[i])); + } + + } break; + + + } + + return ERR_PARSE_ERROR; +} + String ShaderLanguage::get_error_text() { return error_str; @@ -3132,6 +3820,11 @@ int ShaderLanguage::get_error_line() { return error_line; } +ShaderLanguage::ShaderNode *ShaderLanguage::get_shader() { + + return shader; +} + ShaderLanguage::ShaderLanguage() { nodes=NULL; diff --git a/servers/visual/shader_language.h b/servers/visual/shader_language.h index b37853c77ee..45dc5561159 100644 --- a/servers/visual/shader_language.h +++ b/servers/visual/shader_language.h @@ -138,7 +138,10 @@ public: TK_HINT_WHITE_TEXTURE, TK_HINT_BLACK_TEXTURE, TK_HINT_NORMAL_TEXTURE, + TK_HINT_ALBEDO_TEXTURE, + TK_HINT_COLOR, TK_HINT_RANGE, + TK_CURSOR, TK_ERROR, TK_EOF, TK_MAX @@ -291,7 +294,15 @@ public: struct ConstantNode : public Node { DataType datatype; - Variant value; + + union Value { + bool boolean; + float real; + int32_t sint; + uint32_t uint; + }; + + Vector values; virtual DataType get_datatype() const { return datatype; } ConstantNode() { type=TYPE_CONSTANT; } @@ -306,6 +317,7 @@ public: struct Variable { DataType type; DataPrecision precision; + int line; //for completion }; Map variables; @@ -316,6 +328,7 @@ public: struct ControlFlowNode : public Node { FlowOperation flow_op; + Vector expressions; Vector blocks; ControlFlowNode() { type=TYPE_CONTROL_FLOW; flow_op=FLOW_OP_IF;} }; @@ -368,16 +381,19 @@ public: struct Uniform { enum Hint { HINT_NONE, - HINT_WHITE_TEXTURE, - HINT_BLACK_TEXTURE, - HINT_NORMAL_TEXTURE, + HINT_COLOR, HINT_RANGE, + HINT_ALBEDO, + HINT_NORMAL, + HINT_BLACK, + HINT_WHITE, + HINT_MAX }; int order; DataType type; DataPrecision precission; - Variant default_value; + Vector default_value; Hint hint; float hint_range[3]; @@ -414,15 +430,12 @@ public: enum CompletionType { COMPLETION_NONE, - COMPLETION_BUILT_IN_TYPE_CONSTANT, - COMPLETION_FUNCTION, + COMPLETION_RENDER_MODE, + COMPLETION_MAIN_FUNCTION, COMPLETION_IDENTIFIER, - COMPLETION_PARENT_FUNCTION, - COMPLETION_METHOD, + COMPLETION_FUNCTION_CALL, COMPLETION_CALL_ARGUMENTS, COMPLETION_INDEX, - COMPLETION_VIRTUAL_FUNC, - COMPLETION_YIELD, }; struct Token { @@ -433,7 +446,10 @@ public: uint16_t line; }; + + static String get_operator_text(Operator p_op); static String get_token_text(Token p_token); + static bool is_token_datatype(TokenType p_type); static DataType get_token_datatype(TokenType p_type); static bool is_token_precision(TokenType p_type); @@ -442,8 +458,16 @@ public: static bool is_token_nonvoid_datatype(TokenType p_type); static bool is_token_operator(TokenType p_type); + static bool convert_constant(ConstantNode* p_constant, DataType p_to_type,ConstantNode::Value *p_value=NULL); + static DataType get_scalar_type(DataType p_type); + static bool is_scalar_type(DataType p_type); + + static void get_keyword_list(List *r_keywords); private: + struct KeyWord { TokenType token; const char *text;}; + static const KeyWord keyword_list[]; + bool error_set; String error_str; int error_line; @@ -452,10 +476,29 @@ private: int char_idx; int tk_line; + struct TkPos { + int char_idx; + int tk_line; + }; + + TkPos _get_tkpos() { + TkPos tkp; + tkp.char_idx=char_idx; + tkp.tk_line=tk_line; + return tkp; + } + + + void _set_tkpos(TkPos p_pos) { + char_idx=p_pos.char_idx; + tk_line=p_pos.tk_line; + } void _set_error(const String& p_str) { if (error_set) return; + + error_line=tk_line; error_set=true; error_str=p_str; } @@ -485,7 +528,7 @@ private: bool _validate_operator(OperatorNode *p_op,DataType *r_ret_type=NULL); - struct IntrinsicFuncDef { + struct BuiltinFuncDef { enum { MAX_ARGS=5 }; const char* name; @@ -494,11 +537,20 @@ private: }; + CompletionType completion_type; + int completion_line; + BlockNode *completion_block; + DataType completion_base; + StringName completion_function; + int completion_argument; - static const IntrinsicFuncDef intrinsic_func_defs[]; + + bool _get_completable_identifier(BlockNode *p_block, CompletionType p_type, StringName& identifier); + + static const BuiltinFuncDef builtin_func_defs[]; bool _validate_function_call(BlockNode* p_block, OperatorNode *p_func,DataType *r_ret_type); - bool _parse_function_arguments(BlockNode *p_block, const Map &p_builtin_types, OperatorNode* p_func); + bool _parse_function_arguments(BlockNode *p_block, const Map &p_builtin_types, OperatorNode* p_func, int *r_complete_arg=NULL); Node* _parse_expression(BlockNode *p_block, const Map &p_builtin_types); @@ -516,10 +568,16 @@ public: void clear(); Error compile(const String& p_code,const Map< StringName, Map > &p_functions,const Set& p_render_modes); + Error complete(const String& p_code,const Map< StringName, Map > &p_functions,const Set& p_render_modes,List* r_options,String& r_call_hint); + + + String get_error_text(); int get_error_line(); - static String lex_debug(const String& p_code); + ShaderNode *get_shader(); + + String token_debug(const String& p_code); ShaderLanguage(); ~ShaderLanguage(); diff --git a/servers/visual/shader_types.cpp b/servers/visual/shader_types.cpp new file mode 100644 index 00000000000..675be3458c1 --- /dev/null +++ b/servers/visual/shader_types.cpp @@ -0,0 +1,99 @@ +#include "shader_types.h" + + +const Map< StringName, Map >& ShaderTypes::get_functions(VS::ShaderMode p_mode) { + + return shader_modes[p_mode].functions; +} + +const Set& ShaderTypes::get_modes(VS::ShaderMode p_mode) { + + return shader_modes[p_mode].modes; +} + + +ShaderTypes *ShaderTypes::singleton=NULL; + +ShaderTypes::ShaderTypes() +{ + singleton=this; + + + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["SRC_VERTEX"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["SRC_NORMAL"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["SRC_TANGENT"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["SRC_BONES"]=ShaderLanguage::TYPE_IVEC4; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["SRC_WEIGHTS"]=ShaderLanguage::TYPE_VEC4; + + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["POSITION"]=ShaderLanguage::TYPE_VEC4 ; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["VERTEX"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["NORMAL"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["TANGENT"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["BINORMAL"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["UV"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["UV2"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["COLOR"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["POINT_SIZE"]=ShaderLanguage::TYPE_FLOAT; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["INSTANCE_ID"]=ShaderLanguage::TYPE_INT; + + //builtins + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["WORLD_MATRIX"]=ShaderLanguage::TYPE_MAT4; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["INV_CAMERA_MATRIX"]=ShaderLanguage::TYPE_MAT4; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["PROJECTION_MATRIX"]=ShaderLanguage::TYPE_MAT4; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["TIME"]=ShaderLanguage::TYPE_FLOAT; + shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["VIEWPORT_SIZE"]=ShaderLanguage::TYPE_VEC2; + + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["VERTEX"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["FRAGCOORD"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["FRONT_FACING"]=ShaderLanguage::TYPE_BOOL; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["NORMAL"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["TANGENT"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["BINORMAL"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["NORMALMAP"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["NORMALMAP_DEPTH"]=ShaderLanguage::TYPE_FLOAT; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["UV"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["UV2"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["COLOR"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["NORMAL"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["ALBEDO"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["ALPHA"]=ShaderLanguage::TYPE_FLOAT; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["METAL"]=ShaderLanguage::TYPE_FLOAT; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["ROUGH"]=ShaderLanguage::TYPE_FLOAT; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["EMISSION"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["SPECIAL"]=ShaderLanguage::TYPE_FLOAT; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["DISCARD"]=ShaderLanguage::TYPE_BOOL; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["SCREEN_UV"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["POINT_COORD"]=ShaderLanguage::TYPE_VEC2; + + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["WORLD_MATRIX"]=ShaderLanguage::TYPE_MAT4; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["INV_CAMERA_MATRIX"]=ShaderLanguage::TYPE_MAT4; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["PROJECTION_MATRIX"]=ShaderLanguage::TYPE_MAT4; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["TIME"]=ShaderLanguage::TYPE_FLOAT; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["VIEWPORT_SIZE"]=ShaderLanguage::TYPE_VEC2; + + shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_mix"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_add"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_sub"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_mul"); + + shader_modes[VS::SHADER_SPATIAL].modes.insert("special_glow"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("special_subsurf"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("special_specular"); + + shader_modes[VS::SHADER_SPATIAL].modes.insert("depth_draw_opaque"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("depth_draw_always"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("depth_draw_never"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("depth_draw_alpha_prepass"); + + shader_modes[VS::SHADER_SPATIAL].modes.insert("cull_front"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("cull_back"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("cull_disable"); + + shader_modes[VS::SHADER_SPATIAL].modes.insert("lightmap_on_uv2"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("unshaded"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("ontop"); + + shader_modes[VS::SHADER_SPATIAL].modes.insert("vertex_model_space"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("vertex_camera_space"); + +} diff --git a/servers/visual/shader_types.h b/servers/visual/shader_types.h new file mode 100644 index 00000000000..411d5790a94 --- /dev/null +++ b/servers/visual/shader_types.h @@ -0,0 +1,27 @@ +#ifndef SHADERTYPES_H +#define SHADERTYPES_H + +#include "shader_language.h" +#include "servers/visual_server.h" +class ShaderTypes { + + + struct Type { + + Map< StringName, Map > functions; + Set modes; + }; + + Map shader_modes; + + static ShaderTypes *singleton; +public: + static ShaderTypes *get_singleton() { return singleton; } + + const Map< StringName, Map >& get_functions(VS::ShaderMode p_mode); + const Set& get_modes(VS::ShaderMode p_mode); + + ShaderTypes(); +}; + +#endif // SHADERTYPES_H diff --git a/tools/editor/editor_node.cpp b/tools/editor/editor_node.cpp index 9b5ed422c5a..a4ee102aed5 100644 --- a/tools/editor/editor_node.cpp +++ b/tools/editor/editor_node.cpp @@ -6540,9 +6540,10 @@ EditorNode::EditorNode() { //more visually meaningful to have this later raise_bottom_panel_item(AnimationPlayerEditor::singleton); + add_editor_plugin( memnew( ShaderEditorPlugin(this) ) ); /* add_editor_plugin( memnew( ShaderGraphEditorPlugin(this,true) ) ); add_editor_plugin( memnew( ShaderGraphEditorPlugin(this,false) ) ); - add_editor_plugin( memnew( ShaderEditorPlugin(this,true) ) ); + add_editor_plugin( memnew( ShaderEditorPlugin(this,false) ) );*/ add_editor_plugin( memnew( CameraEditorPlugin(this) ) ); add_editor_plugin( memnew( SampleEditorPlugin(this) ) ); diff --git a/tools/editor/plugins/shader_editor_plugin.cpp b/tools/editor/plugins/shader_editor_plugin.cpp index 5541ae987f9..9c8aa3c85d9 100644 --- a/tools/editor/plugins/shader_editor_plugin.cpp +++ b/tools/editor/plugins/shader_editor_plugin.cpp @@ -37,8 +37,8 @@ #include "tools/editor/editor_node.h" #include "tools/editor/property_editor.h" #include "os/os.h" +#include "servers/visual/shader_types.h" -#if 0 /*** SETTINGS EDITOR ****/ @@ -51,19 +51,14 @@ Ref ShaderTextEditor::get_edited_shader() const { return shader; } -void ShaderTextEditor::set_edited_shader(const Ref& p_shader,ShaderLanguage::ShaderType p_type) { +void ShaderTextEditor::set_edited_shader(const Ref& p_shader) { shader=p_shader; - type=p_type; + _load_theme_settings(); - if (p_type==ShaderLanguage::SHADER_MATERIAL_LIGHT || p_type==ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT) - get_text_edit()->set_text(shader->get_light_code()); - else if (p_type==ShaderLanguage::SHADER_MATERIAL_VERTEX || p_type==ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX) - get_text_edit()->set_text(shader->get_vertex_code()); - else - get_text_edit()->set_text(shader->get_fragment_code()); + get_text_edit()->set_text(p_shader->get_code()); _line_col_changed(); @@ -104,7 +99,25 @@ void ShaderTextEditor::_load_theme_settings() { List keywords; - ShaderLanguage::get_keyword_list(type,&keywords); + ShaderLanguage::get_keyword_list(&keywords); + + if (shader.is_valid()) { + + + for(const Map< StringName, Map >::Element *E=ShaderTypes::get_singleton()->get_functions(VisualServer::ShaderMode(shader->get_mode())).front();E;E=E->next()) { + + for (const Map::Element *F=E->get().front();F;F=F->next()) { + keywords.push_back(F->key()); + } + + } + + for(const Set::Element *E =ShaderTypes::get_singleton()->get_modes(VisualServer::ShaderMode(shader->get_mode())).front();E;E=E->next()) { + + keywords.push_back(E->get()); + + } + } for(List::Element *E=keywords.front();E;E=E->next()) { @@ -142,22 +155,34 @@ void ShaderTextEditor::_load_theme_settings() { } +void ShaderTextEditor::_code_complete_script(const String& p_code, List* r_options) { + + print_line("code complete"); + + ShaderLanguage sl; + String calltip; + + Error err = sl.complete(p_code,ShaderTypes::get_singleton()->get_functions(VisualServer::ShaderMode(shader->get_mode())),ShaderTypes::get_singleton()->get_modes(VisualServer::ShaderMode(shader->get_mode())),r_options,calltip); + + if (calltip!="") { + get_text_edit()->set_code_hint(calltip); + } +} void ShaderTextEditor::_validate_script() { - String errortxt; - int line,col; - String code=get_text_edit()->get_text(); //List params; //shader->get_param_list(¶ms); - Error err = ShaderLanguage::compile(code,type,NULL,NULL,&errortxt,&line,&col); + ShaderLanguage sl; + + Error err = sl.compile(code,ShaderTypes::get_singleton()->get_functions(VisualServer::ShaderMode(shader->get_mode())),ShaderTypes::get_singleton()->get_modes(VisualServer::ShaderMode(shader->get_mode()))); if (err!=OK) { - String error_text="error("+itos(line+1)+","+itos(col)+"): "+errortxt; + String error_text="error("+itos(sl.get_error_line())+"): "+sl.get_error_text(); set_error(error_text); - get_text_edit()->set_line_as_marked(line,true); + get_text_edit()->set_line_as_marked(sl.get_error_line(),true); } else { for(int i=0;iget_line_count();i++) @@ -187,9 +212,7 @@ ShaderTextEditor::ShaderTextEditor() { void ShaderEditor::_menu_option(int p_option) { - ShaderTextEditor *current = tab_container->get_current_tab_control()->cast_to(); - if (!current) - return; + ShaderTextEditor *current = shader_editor; switch(p_option) { case EDIT_UNDO: { @@ -245,24 +268,11 @@ void ShaderEditor::_menu_option(int p_option) { } } -void ShaderEditor::_tab_changed(int p_which) { - - ShaderTextEditor *shader_editor = tab_container->get_tab_control(p_which)->cast_to(); - - if (shader_editor && is_inside_tree()) - shader_editor->get_text_edit()->grab_focus(); - - ensure_select_current(); -} void ShaderEditor::_notification(int p_what) { if (p_what==NOTIFICATION_ENTER_TREE) { - close->set_normal_texture( get_icon("Close","EditorIcons")); - close->set_hover_texture( get_icon("CloseHover","EditorIcons")); - close->set_pressed_texture( get_icon("Close","EditorIcons")); - close->connect("pressed",this,"_close_callback"); } if (p_what==NOTIFICATION_DRAW) { @@ -365,57 +375,31 @@ void ShaderEditor::clear() { void ShaderEditor::_params_changed() { - fragment_editor->_validate_script(); - vertex_editor->_validate_script(); - light_editor->_validate_script(); + shader_editor->_validate_script(); } void ShaderEditor::_editor_settings_changed() { - vertex_editor->get_text_edit()->set_auto_brace_completion(EditorSettings::get_singleton()->get("text_editor/auto_brace_complete")); - vertex_editor->get_text_edit()->set_scroll_pass_end_of_file(EditorSettings::get_singleton()->get("text_editor/scroll_past_end_of_file")); - vertex_editor->get_text_edit()->set_tab_size(EditorSettings::get_singleton()->get("text_editor/tab_size")); - vertex_editor->get_text_edit()->set_draw_tabs(EditorSettings::get_singleton()->get("text_editor/draw_tabs")); - vertex_editor->get_text_edit()->set_show_line_numbers(EditorSettings::get_singleton()->get("text_editor/show_line_numbers")); - vertex_editor->get_text_edit()->set_syntax_coloring(EditorSettings::get_singleton()->get("text_editor/syntax_highlighting")); - vertex_editor->get_text_edit()->set_highlight_all_occurrences(EditorSettings::get_singleton()->get("text_editor/highlight_all_occurrences")); - vertex_editor->get_text_edit()->cursor_set_blink_enabled(EditorSettings::get_singleton()->get("text_editor/caret_blink")); - vertex_editor->get_text_edit()->cursor_set_blink_speed(EditorSettings::get_singleton()->get("text_editor/caret_blink_speed")); - vertex_editor->get_text_edit()->add_constant_override("line_spacing", EditorSettings::get_singleton()->get("text_editor/line_spacing")); - vertex_editor->get_text_edit()->cursor_set_block_mode(EditorSettings::get_singleton()->get("text_editor/block_caret")); + shader_editor->get_text_edit()->set_auto_brace_completion(EditorSettings::get_singleton()->get("text_editor/auto_brace_complete")); + shader_editor->get_text_edit()->set_scroll_pass_end_of_file(EditorSettings::get_singleton()->get("text_editor/scroll_past_end_of_file")); + shader_editor->get_text_edit()->set_tab_size(EditorSettings::get_singleton()->get("text_editor/tab_size")); + shader_editor->get_text_edit()->set_draw_tabs(EditorSettings::get_singleton()->get("text_editor/draw_tabs")); + shader_editor->get_text_edit()->set_show_line_numbers(EditorSettings::get_singleton()->get("text_editor/show_line_numbers")); + shader_editor->get_text_edit()->set_syntax_coloring(EditorSettings::get_singleton()->get("text_editor/syntax_highlighting")); + shader_editor->get_text_edit()->set_highlight_all_occurrences(EditorSettings::get_singleton()->get("text_editor/highlight_all_occurrences")); + shader_editor->get_text_edit()->cursor_set_blink_enabled(EditorSettings::get_singleton()->get("text_editor/caret_blink")); + shader_editor->get_text_edit()->cursor_set_blink_speed(EditorSettings::get_singleton()->get("text_editor/caret_blink_speed")); + shader_editor->get_text_edit()->add_constant_override("line_spacing", EditorSettings::get_singleton()->get("text_editor/line_spacing")); + shader_editor->get_text_edit()->cursor_set_block_mode(EditorSettings::get_singleton()->get("text_editor/block_caret")); - fragment_editor->get_text_edit()->set_auto_brace_completion(EditorSettings::get_singleton()->get("text_editor/auto_brace_complete")); - fragment_editor->get_text_edit()->set_scroll_pass_end_of_file(EditorSettings::get_singleton()->get("text_editor/scroll_past_end_of_file")); - fragment_editor->get_text_edit()->set_tab_size(EditorSettings::get_singleton()->get("text_editor/tab_size")); - fragment_editor->get_text_edit()->set_draw_tabs(EditorSettings::get_singleton()->get("text_editor/draw_tabs")); - fragment_editor->get_text_edit()->set_show_line_numbers(EditorSettings::get_singleton()->get("text_editor/show_line_numbers")); - fragment_editor->get_text_edit()->set_syntax_coloring(EditorSettings::get_singleton()->get("text_editor/syntax_highlighting")); - fragment_editor->get_text_edit()->set_highlight_all_occurrences(EditorSettings::get_singleton()->get("text_editor/highlight_all_occurrences")); - fragment_editor->get_text_edit()->cursor_set_blink_enabled(EditorSettings::get_singleton()->get("text_editor/caret_blink")); - fragment_editor->get_text_edit()->cursor_set_blink_speed(EditorSettings::get_singleton()->get("text_editor/caret_blink_speed")); - fragment_editor->get_text_edit()->add_constant_override("line_spacing", EditorSettings::get_singleton()->get("text_editor/line_spacing")); - fragment_editor->get_text_edit()->cursor_set_block_mode(EditorSettings::get_singleton()->get("text_editor/block_caret")); - - light_editor->get_text_edit()->set_auto_brace_completion(EditorSettings::get_singleton()->get("text_editor/auto_brace_complete")); - light_editor->get_text_edit()->set_scroll_pass_end_of_file(EditorSettings::get_singleton()->get("text_editor/scroll_past_end_of_file")); - light_editor->get_text_edit()->set_tab_size(EditorSettings::get_singleton()->get("text_editor/tab_size")); - light_editor->get_text_edit()->set_draw_tabs(EditorSettings::get_singleton()->get("text_editor/draw_tabs")); - light_editor->get_text_edit()->set_show_line_numbers(EditorSettings::get_singleton()->get("text_editor/show_line_numbers")); - light_editor->get_text_edit()->set_syntax_coloring(EditorSettings::get_singleton()->get("text_editor/syntax_highlighting")); - light_editor->get_text_edit()->set_highlight_all_occurrences(EditorSettings::get_singleton()->get("text_editor/highlight_all_occurrences")); - light_editor->get_text_edit()->cursor_set_blink_enabled(EditorSettings::get_singleton()->get("text_editor/caret_blink")); - light_editor->get_text_edit()->cursor_set_blink_speed(EditorSettings::get_singleton()->get("text_editor/caret_blink_speed")); - light_editor->get_text_edit()->add_constant_override("line_spacing", EditorSettings::get_singleton()->get("text_editor/line_spacing")); - light_editor->get_text_edit()->cursor_set_block_mode(EditorSettings::get_singleton()->get("text_editor/block_caret")); } void ShaderEditor::_bind_methods() { ObjectTypeDB::bind_method("_editor_settings_changed",&ShaderEditor::_editor_settings_changed); - ObjectTypeDB::bind_method("_tab_changed",&ShaderEditor::_tab_changed); + ObjectTypeDB::bind_method("_menu_option",&ShaderEditor::_menu_option); ObjectTypeDB::bind_method("_params_changed",&ShaderEditor::_params_changed); - ObjectTypeDB::bind_method("_close_callback",&ShaderEditor::_close_callback); ObjectTypeDB::bind_method("apply_shaders",&ShaderEditor::apply_shaders); // ObjectTypeDB::bind_method("_close_current_tab",&ShaderEditor::_close_current_tab); } @@ -441,16 +425,7 @@ void ShaderEditor::edit(const Ref& p_shader) { shader=p_shader; - if (shader->get_mode()==Shader::MODE_MATERIAL) { - vertex_editor->set_edited_shader(p_shader,ShaderLanguage::SHADER_MATERIAL_VERTEX); - fragment_editor->set_edited_shader(p_shader,ShaderLanguage::SHADER_MATERIAL_FRAGMENT); - light_editor->set_edited_shader(shader,ShaderLanguage::SHADER_MATERIAL_LIGHT); - } else if (shader->get_mode()==Shader::MODE_CANVAS_ITEM) { - - vertex_editor->set_edited_shader(p_shader,ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX); - fragment_editor->set_edited_shader(p_shader,ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT); - light_editor->set_edited_shader(shader,ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT); - } + shader_editor->set_edited_shader(p_shader); //vertex_editor->set_edited_shader(shader,ShaderLanguage::SHADER_MATERIAL_VERTEX); // see if already has it @@ -474,35 +449,21 @@ void ShaderEditor::apply_shaders() { if (shader.is_valid()) { - shader->set_code(vertex_editor->get_text_edit()->get_text(),fragment_editor->get_text_edit()->get_text(),light_editor->get_text_edit()->get_text(),0,0); + shader->set_code(shader_editor->get_text_edit()->get_text()); shader->set_edited(true); } } -void ShaderEditor::_close_callback() { - - hide(); -} - ShaderEditor::ShaderEditor() { - tab_container = memnew( TabContainer ); - add_child(tab_container); - tab_container->set_area_as_parent_rect(); - tab_container->set_begin(Point2(0,0)); - //tab_container->set_begin(Point2(0,0)); - - close = memnew( TextureButton ); - close->set_anchor_and_margin(MARGIN_LEFT,ANCHOR_END,20); - close->set_anchor_and_margin(MARGIN_RIGHT,ANCHOR_END,4); - close->set_anchor_and_margin(MARGIN_TOP,ANCHOR_BEGIN,2); - add_child(close); + HBoxContainer *hbc = memnew( HBoxContainer); + add_child(hbc); edit_menu = memnew( MenuButton ); - add_child(edit_menu); + hbc->add_child(edit_menu); edit_menu->set_pos(Point2(5,-1)); edit_menu->set_text(TTR("Edit")); edit_menu->get_popup()->add_shortcut(ED_SHORTCUT("script_editor/undo", TTR("Undo"), KEY_MASK_CMD|KEY_Z), EDIT_UNDO); @@ -517,7 +478,7 @@ ShaderEditor::ShaderEditor() { search_menu = memnew( MenuButton ); - add_child(search_menu); + hbc->add_child(search_menu); search_menu->set_pos(Point2(38,-1)); search_menu->set_text(TTR("Search")); search_menu->get_popup()->add_shortcut(ED_SHORTCUT("script_editor/find", TTR("Find.."), KEY_MASK_CMD|KEY_F), SEARCH_FIND); @@ -530,34 +491,15 @@ ShaderEditor::ShaderEditor() { search_menu->get_popup()->connect("item_pressed", this,"_menu_option"); - tab_container->connect("tab_changed", this,"_tab_changed"); - - erase_tab_confirm = memnew( ConfirmationDialog ); - add_child(erase_tab_confirm); - erase_tab_confirm->connect("confirmed", this,"_close_current_tab"); - - goto_line_dialog = memnew(GotoLineDialog); add_child(goto_line_dialog); - vertex_editor = memnew( ShaderTextEditor ); - tab_container->add_child(vertex_editor); - vertex_editor->set_name(TTR("Vertex")); - - fragment_editor = memnew( ShaderTextEditor ); - tab_container->add_child(fragment_editor); - fragment_editor->set_name(TTR("Fragment")); - - light_editor = memnew( ShaderTextEditor ); - tab_container->add_child(light_editor); - light_editor->set_name(TTR("Lighting")); - - tab_container->set_current_tab(1); + shader_editor = memnew( ShaderTextEditor ); + add_child(shader_editor); + shader_editor->set_v_size_flags(SIZE_EXPAND_FILL); - vertex_editor->connect("script_changed", this,"apply_shaders"); - fragment_editor->connect("script_changed", this,"apply_shaders"); - light_editor->connect("script_changed", this,"apply_shaders"); + shader_editor->connect("script_changed", this,"apply_shaders"); EditorSettings::get_singleton()->connect("settings_changed",this,"_editor_settings_changed"); _editor_settings_changed(); @@ -567,15 +509,7 @@ ShaderEditor::ShaderEditor() { void ShaderEditorPlugin::edit(Object *p_object) { Shader* s = p_object->cast_to(); - if (!s || s->cast_to()) { - shader_editor->hide(); //Dont edit ShaderGraph - return; - } - - if (_2d && s->get_mode()==Shader::MODE_CANVAS_ITEM) - shader_editor->edit(s); - else if (!_2d && s->get_mode()==Shader::MODE_MATERIAL) - shader_editor->edit(s); + shader_editor->edit(s); } @@ -583,24 +517,25 @@ bool ShaderEditorPlugin::handles(Object *p_object) const { bool handles = true; Shader *shader=p_object->cast_to(); - if (!shader || shader->cast_to()) // Dont handle ShaderGraph's - handles = false; - if (handles && _2d) - handles = shader->get_mode()==Shader::MODE_CANVAS_ITEM; - else if (handles && !_2d) - return shader->get_mode()==Shader::MODE_MATERIAL; + //if (!shader || shader->cast_to()) // Dont handle ShaderGraph's + // handles = false; - if (!handles) - shader_editor->hide(); - return handles; + return shader!=NULL; } void ShaderEditorPlugin::make_visible(bool p_visible) { if (p_visible) { - shader_editor->show(); + button->show(); + editor->make_bottom_panel_item_visible(shader_editor); + } else { + + button->hide(); + if (shader_editor->is_visible()) + editor->hide_bottom_panel(); shader_editor->apply_shaders(); + } } @@ -634,19 +569,14 @@ void ShaderEditorPlugin::apply_changes() { shader_editor->apply_shaders(); } -ShaderEditorPlugin::ShaderEditorPlugin(EditorNode *p_node, bool p_2d) { +ShaderEditorPlugin::ShaderEditorPlugin(EditorNode *p_node) { + editor=p_node; shader_editor = memnew( ShaderEditor ); - _2d=p_2d; - if (p_2d) - add_control_to_container(CONTAINER_CANVAS_EDITOR_BOTTOM,shader_editor); - else - add_control_to_container(CONTAINER_SPATIAL_EDITOR_BOTTOM,shader_editor); -// editor->get_viewport()->add_child(shader_editor); -// shader_editor->set_area_as_parent_rect(); - shader_editor->hide(); + shader_editor->set_custom_minimum_size(Size2(0,300)); + button=editor->add_bottom_panel_item("Shader",shader_editor); } @@ -654,4 +584,4 @@ ShaderEditorPlugin::ShaderEditorPlugin(EditorNode *p_node, bool p_2d) { ShaderEditorPlugin::~ShaderEditorPlugin() { } -#endif + diff --git a/tools/editor/plugins/shader_editor_plugin.h b/tools/editor/plugins/shader_editor_plugin.h index ef7cc6772ec..ffe6a42e888 100644 --- a/tools/editor/plugins/shader_editor_plugin.h +++ b/tools/editor/plugins/shader_editor_plugin.h @@ -38,33 +38,34 @@ #include "scene/resources/shader.h" #include "servers/visual/shader_language.h" -#if 0 class ShaderTextEditor : public CodeTextEditor { OBJ_TYPE( ShaderTextEditor, CodeTextEditor ); Ref shader; - ShaderLanguage::ShaderType type; protected: static void _bind_methods(); virtual void _load_theme_settings(); + + virtual void _code_complete_script(const String& p_code, List* r_options); + public: virtual void _validate_script(); Ref get_edited_shader() const; - void set_edited_shader(const Ref& p_shader,ShaderLanguage::ShaderType p_type); + void set_edited_shader(const Ref& p_shader); ShaderTextEditor(); }; -class ShaderEditor : public Control { +class ShaderEditor : public VBoxContainer { - OBJ_TYPE(ShaderEditor, Control ); + OBJ_TYPE(ShaderEditor, VBoxContainer ); enum { @@ -88,22 +89,17 @@ class ShaderEditor : public Control { MenuButton *settings_menu; uint64_t idle; - TabContainer *tab_container; GotoLineDialog *goto_line_dialog; ConfirmationDialog *erase_tab_confirm; - TextureButton *close; - ShaderTextEditor *vertex_editor; - ShaderTextEditor *fragment_editor; - ShaderTextEditor *light_editor; + ShaderTextEditor *shader_editor; + - void _tab_changed(int p_which); void _menu_option(int p_optin); void _params_changed(); mutable Ref shader; - void _close_callback(); void _editor_settings_changed(); @@ -134,6 +130,8 @@ class ShaderEditorPlugin : public EditorPlugin { bool _2d; ShaderEditor *shader_editor; EditorNode *editor; + Button *button; + public: virtual String get_name() const { return "Shader"; } @@ -150,10 +148,9 @@ public: virtual void save_external_data(); virtual void apply_changes(); - ShaderEditorPlugin(EditorNode *p_node,bool p_2d); + ShaderEditorPlugin(EditorNode *p_node); ~ShaderEditorPlugin(); }; #endif -#endif From 1527cf8c0d17891dd0ebf99d484f83daa46eba3c Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Mon, 10 Oct 2016 18:31:01 -0300 Subject: [PATCH 005/223] 2D Shaders are working again using the new syntax, though all is buggy in general --- core/error_macros.h | 3 +- core/os/os.cpp | 1 + core/os/os.h | 3 +- core/pair.h | 3 + drivers/gles3/rasterizer_canvas_gles3.cpp | 145 ++-- drivers/gles3/rasterizer_canvas_gles3.h | 2 + drivers/gles3/rasterizer_gles3.cpp | 10 + drivers/gles3/rasterizer_storage_gles3.cpp | 899 ++++++++++++++++++++- drivers/gles3/rasterizer_storage_gles3.h | 109 ++- drivers/gles3/shader_compiler_gles3.cpp | 528 ++++++++++++ drivers/gles3/shader_compiler_gles3.h | 67 ++ drivers/gles3/shader_gles3.cpp | 83 +- drivers/gles3/shader_gles3.h | 27 +- drivers/gles3/shaders/canvas.glsl | 72 +- drivers/unix/os_unix.cpp | 4 + platform/windows/os_windows.cpp | 9 + scene/2d/canvas_item.cpp | 14 +- scene/2d/canvas_item.h | 4 +- scene/gui/color_picker.cpp | 37 - scene/gui/color_picker.h | 2 - scene/resources/material.h | 3 + servers/visual/shader_language.cpp | 53 +- servers/visual/shader_language.h | 8 +- servers/visual/shader_types.cpp | 62 ++ servers/visual_server.h | 1 + tools/editor/editor_log.cpp | 4 + 26 files changed, 1980 insertions(+), 173 deletions(-) create mode 100644 drivers/gles3/shader_compiler_gles3.cpp create mode 100644 drivers/gles3/shader_compiler_gles3.h diff --git a/core/error_macros.h b/core/error_macros.h index 6d931e899ee..b656827463e 100644 --- a/core/error_macros.h +++ b/core/error_macros.h @@ -49,7 +49,8 @@ enum ErrorHandlerType { ERR_HANDLER_ERROR, ERR_HANDLER_WARNING, - ERR_HANDLER_SCRIPT + ERR_HANDLER_SCRIPT, + ERR_HANDLER_SHADER, }; typedef void (*ErrorHandlerFunc)(void*,const char*,const char*,int p_line,const char *, const char *,ErrorHandlerType p_type); diff --git a/core/os/os.cpp b/core/os/os.cpp index ee324762344..43956809e2c 100644 --- a/core/os/os.cpp +++ b/core/os/os.cpp @@ -68,6 +68,7 @@ void OS::print_error(const char* p_function,const char* p_file,int p_line,const case ERR_ERROR: err_type="**ERROR**"; break; case ERR_WARNING: err_type="**WARNING**"; break; case ERR_SCRIPT: err_type="**SCRIPT ERROR**"; break; + case ERR_SHADER: err_type="**SHADER ERROR**"; break; } if (p_rationale && *p_rationale) diff --git a/core/os/os.h b/core/os/os.h index c2b46a5420e..d939198bfb6 100644 --- a/core/os/os.h +++ b/core/os/os.h @@ -120,7 +120,8 @@ public: enum ErrorType { ERR_ERROR, ERR_WARNING, - ERR_SCRIPT + ERR_SCRIPT, + ERR_SHADER }; virtual void print_error(const char* p_function,const char* p_file,int p_line,const char *p_code,const char*p_rationale,ErrorType p_type=ERR_ERROR); diff --git a/core/pair.h b/core/pair.h index 9bffc37f492..b5208016eb7 100644 --- a/core/pair.h +++ b/core/pair.h @@ -34,6 +34,9 @@ struct Pair { F first; S second; + + Pair() {} + Pair( F p_first, S p_second) { first=p_first; second=p_second; } }; #endif // PAIR_H diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index a1e755cb4ae..b2228a6cfa0 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -138,6 +138,7 @@ void RasterizerCanvasGLES3::canvas_begin(){ state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_DISTANCE_FIELD,false); + state.canvas_shader.set_custom_shader(0); state.canvas_shader.bind(); state.canvas_shader.set_uniform(CanvasShaderGLES3::FINAL_MODULATE,Color(1,1,1,1)); state.canvas_shader.set_uniform(CanvasShaderGLES3::MODELVIEW_MATRIX,Matrix32()); @@ -520,6 +521,8 @@ void RasterizerCanvasGLES3::_canvas_item_render_commands(Item *p_item,Item *curr //err.. } + state.canvas_shader.set_uniform(CanvasShaderGLES3::COLOR_TEXPIXEL_SIZE,texpixel_size); + glVertexAttrib4f(1,rect->rect.pos.x,rect->rect.pos.y,rect->rect.size.x,rect->rect.size.y); glVertexAttrib4f(2,src_rect.pos.x,src_rect.pos.y,src_rect.size.x,src_rect.size.y); @@ -567,6 +570,8 @@ void RasterizerCanvasGLES3::_canvas_item_render_commands(Item *p_item,Item *curr Size2 texpixel_size( 1.0/texture->width, 1.0/texture->height ); + state.canvas_shader.set_uniform(CanvasShaderGLES3::COLOR_TEXPIXEL_SIZE,texpixel_size); + #define DSTRECT(m_x,m_y,m_w,m_h) glVertexAttrib4f(1,m_x,m_y,m_w,m_h) #define SRCRECT(m_x,m_y,m_w,m_h) glVertexAttrib4f(2,(m_x)*texpixel_size.x,(m_y)*texpixel_size.y,(m_w)*texpixel_size.x,(m_h)*texpixel_size.y) @@ -634,8 +639,13 @@ void RasterizerCanvasGLES3::_canvas_item_render_commands(Item *p_item,Item *curr ERR_CONTINUE( primitive->points.size()<1); - _bind_canvas_texture(primitive->texture); + RasterizerStorageGLES3::Texture* texture = _bind_canvas_texture(primitive->texture); + if (texture ) { + Size2 texpixel_size( 1.0/texture->width, 1.0/texture->height ); + state.canvas_shader.set_uniform(CanvasShaderGLES3::COLOR_TEXPIXEL_SIZE,texpixel_size); + + } if (primitive->colors.size()==1 && primitive->points.size()>1) { Color c = primitive->colors[0]; @@ -652,6 +662,14 @@ void RasterizerCanvasGLES3::_canvas_item_render_commands(Item *p_item,Item *curr Item::CommandPolygon* polygon = static_cast(c); _set_texture_rect_mode(false); + + RasterizerStorageGLES3::Texture* texture = _bind_canvas_texture(polygon->texture); + + if (texture ) { + Size2 texpixel_size( 1.0/texture->width, 1.0/texture->height ); + state.canvas_shader.set_uniform(CanvasShaderGLES3::COLOR_TEXPIXEL_SIZE,texpixel_size); + + } _draw_polygon(polygon->count,polygon->indices.ptr(),polygon->points.ptr(),polygon->uvs.ptr(),polygon->colors.ptr(),polygon->texture,polygon->colors.size()==1); } break; @@ -800,10 +818,14 @@ void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + int last_blend_mode=-1; + RID canvas_last_material; bool prev_distance_field=false; + + while(p_item_list) { Item *ci=p_item_list; @@ -878,53 +900,71 @@ void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const RID material = material_owner->material; if (material!=canvas_last_material || rebind_shader) { -#if 0 - Shader *shader = NULL; - if (material && material->shader.is_valid()) { - shader = shader_owner.get(material->shader); - if (shader && !shader->valid) { - shader=NULL; + + RasterizerStorageGLES3::Material *material_ptr = storage->material_owner.getornull(material); + RasterizerStorageGLES3::Shader *shader_ptr = NULL; + + if (material_ptr) { + + shader_ptr = material_ptr->shader; + + if (shader_ptr && shader_ptr->mode!=VS::SHADER_CANVAS_ITEM) { + shader_ptr=NULL; //do not use non canvasitem shader } } - shader_cache=shader; - if (shader) { - canvas_shader.set_custom_shader(shader->custom_code_id); - _canvas_item_setup_shader_params(material,shader); - } else { - shader_cache=NULL; - canvas_shader.set_custom_shader(0); - canvas_shader.bind(); - uses_texpixel_size=false; + if (shader_ptr && shader_ptr!=shader_cache) { + + state.canvas_shader.set_custom_shader(shader_ptr->custom_code_id); + state.canvas_shader.bind(); + + if (material_ptr->ubo_id) { + glBindBufferBase(GL_UNIFORM_BUFFER,2,material_ptr->ubo_id); + } + + int tc = material_ptr->textures.size(); + RID* textures = material_ptr->textures.ptr(); + + for(int i=0;itexture_owner.getornull( textures[i] ); + if (!t) { + //check hints + glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + continue; + } + + glBindTexture(t->target,t->tex_id); + } + + + } else if (!shader_ptr) { + state.canvas_shader.set_custom_shader(0); + state.canvas_shader.bind(); } + shader_cache=shader_ptr; - canvas_shader.set_uniform(CanvasShaderGLES3::PROJECTION_MATRIX,canvas_transform); - if (canvas_use_modulate) - reset_modulate=true; canvas_last_material=material; rebind_shader=false; -#endif + } - if (material.is_valid() && shader_cache) { -#if 0 - _canvas_item_setup_shader_uniforms(material,shader_cache); -#endif - } - - bool unshaded = false; //(material && material->shading_mode==VS::CANVAS_ITEM_SHADING_UNSHADED) || ci->blend_mode!=VS::MATERIAL_BLEND_MODE_MIX; + int blend_mode = shader_cache ? shader_cache->canvas_item.blend_mode : RasterizerStorageGLES3::Shader::CanvasItem::BLEND_MODE_MIX; + bool unshaded = shader_cache && (shader_cache->canvas_item.light_mode==RasterizerStorageGLES3::Shader::CanvasItem::LIGHT_MODE_UNSHADED || blend_mode!=RasterizerStorageGLES3::Shader::CanvasItem::BLEND_MODE_MIX); bool reclip=false; -#if 0 - if (ci==p_item_list || ci->blend_mode!=canvas_blend_mode) { - switch(ci->blend_mode) { + if (last_blend_mode!=blend_mode) { - case VS::MATERIAL_BLEND_MODE_MIX: { + switch(blend_mode) { + + case RasterizerStorageGLES3::Shader::CanvasItem::BLEND_MODE_MIX: { glBlendEquation(GL_FUNC_ADD); - if (current_rt && current_rt_transparent) { + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); } else { @@ -932,33 +972,30 @@ void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const } } break; - case VS::MATERIAL_BLEND_MODE_ADD: { + case RasterizerStorageGLES3::Shader::CanvasItem::BLEND_MODE_ADD: { glBlendEquation(GL_FUNC_ADD); glBlendFunc(GL_SRC_ALPHA,GL_ONE); } break; - case VS::MATERIAL_BLEND_MODE_SUB: { + case RasterizerStorageGLES3::Shader::CanvasItem::BLEND_MODE_SUB: { glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); glBlendFunc(GL_SRC_ALPHA,GL_ONE); } break; - case VS::MATERIAL_BLEND_MODE_MUL: { + case RasterizerStorageGLES3::Shader::CanvasItem::BLEND_MODE_MUL: { glBlendEquation(GL_FUNC_ADD); glBlendFunc(GL_DST_COLOR,GL_ZERO); } break; - case VS::MATERIAL_BLEND_MODE_PREMULT_ALPHA: { + case RasterizerStorageGLES3::Shader::CanvasItem::BLEND_MODE_PMALPHA: { glBlendEquation(GL_FUNC_ADD); glBlendFunc(GL_ONE,GL_ONE_MINUS_SRC_ALPHA); } break; } - canvas_blend_mode=ci->blend_mode; + last_blend_mode=blend_mode; } -#endif - -// canvas_shader.set_uniform(CanvasShaderGLES3::CANVAS_MODULATE,unshaded ? Color(1,1,1,1) : p_modulate); state.canvas_item_modulate = unshaded ? ci->final_modulate : Color( ci->final_modulate.r * p_modulate.r, @@ -974,10 +1011,10 @@ void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const state.canvas_shader.set_uniform(CanvasShaderGLES3::EXTRA_MATRIX,state.extra_matrix); - if (unshaded || (state.canvas_item_modulate.a>0.001 && (!material.is_valid() /*|| material->shading_mode!=VS::CANVAS_ITEM_SHADING_ONLY_LIGHT*/) && !ci->light_masked )) + if (unshaded || (state.canvas_item_modulate.a>0.001 && (!shader_cache || shader_cache->canvas_item.light_mode!=RasterizerStorageGLES3::Shader::CanvasItem::LIGHT_MODE_LIGHT_ONLY) && !ci->light_masked )) _canvas_item_render_commands(ci,current_clip,reclip); - if (/*canvas_blend_mode==VS::MATERIAL_BLEND_MODE_MIX &&*/ p_light && !unshaded) { + if ((blend_mode==RasterizerStorageGLES3::Shader::CanvasItem::BLEND_MODE_MIX || RasterizerStorageGLES3::Shader::CanvasItem::BLEND_MODE_PMALPHA) && p_light && !unshaded) { Light *light = p_light; bool light_used=false; @@ -1046,12 +1083,7 @@ void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const bool light_rebind = state.canvas_shader.bind(); if (light_rebind) { -#if 0 - if (material && shader_cache) { - _canvas_item_setup_shader_params(material,shader_cache); - _canvas_item_setup_shader_uniforms(material,shader_cache); - } -#endif + state.canvas_shader.set_uniform(CanvasShaderGLES3::FINAL_MODULATE,state.canvas_item_modulate); state.canvas_shader.set_uniform(CanvasShaderGLES3::MODELVIEW_MATRIX,state.final_transform); state.canvas_shader.set_uniform(CanvasShaderGLES3::EXTRA_MATRIX,Matrix32()); @@ -1100,15 +1132,12 @@ void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF9,false); state.canvas_shader.set_conditional(CanvasShaderGLES3::SHADOW_FILTER_PCF13,false); - state.canvas_shader.bind(); -#if 0 - if (material && shader_cache) { - _canvas_item_setup_shader_params(material,shader_cache); - _canvas_item_setup_shader_uniforms(material,shader_cache); - } -#endif + last_blend_mode=-1; + + /* + //this is set again, so it should not be needed anyway? state.canvas_item_modulate = unshaded ? ci->final_modulate : Color( ci->final_modulate.r * p_modulate.r, ci->final_modulate.g * p_modulate.g, @@ -1120,7 +1149,6 @@ void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const state.canvas_shader.set_uniform(CanvasShaderGLES3::EXTRA_MATRIX,Matrix32()); state.canvas_shader.set_uniform(CanvasShaderGLES3::FINAL_MODULATE,state.canvas_item_modulate); - glBlendEquation(GL_FUNC_ADD); if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { @@ -1130,6 +1158,7 @@ void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const } //@TODO RESET canvas_blend_mode + */ } @@ -1380,6 +1409,9 @@ void RasterizerCanvasGLES3::reset_canvas() { state.vp=canvas_transform; store_transform(canvas_transform,state.canvas_item_ubo_data.projection_matrix); + for(int i=0;i<4;i++) { + state.canvas_item_ubo_data.time[i]=storage->frame.time[i]; + } glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_item_ubo); glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(CanvasItemUBO), &state.canvas_item_ubo_data); @@ -1442,6 +1474,7 @@ void RasterizerCanvasGLES3::initialize() { glBindBuffer(GL_UNIFORM_BUFFER, 0); state.canvas_shader.init(); + state.canvas_shader.set_base_material_tex_index(1); state.canvas_shadow_shader.init(); state.canvas_shader.set_conditional(CanvasShaderGLES3::USE_RGBA_SHADOWS,storage->config.use_rgba_2d_shadows); diff --git a/drivers/gles3/rasterizer_canvas_gles3.h b/drivers/gles3/rasterizer_canvas_gles3.h index 1f72a8dbcfb..8f34f07d064 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.h +++ b/drivers/gles3/rasterizer_canvas_gles3.h @@ -5,12 +5,14 @@ #include "rasterizer_storage_gles3.h" #include "shaders/canvas_shadow.glsl.h" + class RasterizerCanvasGLES3 : public RasterizerCanvas { public: struct CanvasItemUBO { float projection_matrix[16]; + float time[4]; }; diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index 27b06906457..eaa9825605d 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -115,8 +115,17 @@ void RasterizerGLES3::initialize() { void RasterizerGLES3::begin_frame(){ + double time_total = double(OS::get_singleton()->get_ticks_usec())/1000000.0; + storage->frame.time[0]=time_total; + storage->frame.time[1]=Math::fmod(time_total,3600); + storage->frame.time[2]=Math::fmod(time_total,900); + storage->frame.time[3]=Math::fmod(time_total,60); + + storage->update_dirty_shaders(); + storage->update_dirty_materials(); } + void RasterizerGLES3::set_current_render_target(RID p_render_target){ if (!p_render_target.is_valid() && storage->frame.current_rt && storage->frame.clear_request) { @@ -257,6 +266,7 @@ RasterizerGLES3::RasterizerGLES3() storage = memnew( RasterizerStorageGLES3 ); canvas = memnew( RasterizerCanvasGLES3 ); canvas->storage=storage; + storage->canvas=canvas; } diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index b4d65080a47..74b7d72652d 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -1,4 +1,5 @@ #include "rasterizer_storage_gles3.h" +#include "rasterizer_canvas_gles3.h" #include "globals.h" /* TEXTURE API */ @@ -1000,65 +1001,906 @@ void RasterizerStorageGLES3::texture_set_shrink_all_x2_on_set_data(bool p_enable RID RasterizerStorageGLES3::shader_create(VS::ShaderMode p_mode){ - return RID(); + Shader *shader = memnew( Shader ); + shader->mode=p_mode; + RID rid = shader_owner.make_rid(shader); + shader_set_mode(rid,p_mode); + _shader_make_dirty(shader); + shader->self=rid; + + return rid; +} + +void RasterizerStorageGLES3::_shader_make_dirty(Shader* p_shader) { + + if (p_shader->dirty_list.in_list()) + return; + + _shader_dirty_list.add(&p_shader->dirty_list); } void RasterizerStorageGLES3::shader_set_mode(RID p_shader,VS::ShaderMode p_mode){ + ERR_FAIL_INDEX(p_mode,VS::SHADER_MAX); + Shader *shader=shader_owner.get(p_shader); + ERR_FAIL_COND(!shader); + + if (shader->custom_code_id && p_mode==shader->mode) + return; + + + if (shader->custom_code_id) { + + shader->shader->free_custom_shader(shader->custom_code_id); + shader->custom_code_id=0; + } + + shader->mode=p_mode; + + ShaderGLES3* shaders[VS::SHADER_MAX]={ + &canvas->state.canvas_shader, + &canvas->state.canvas_shader, + &canvas->state.canvas_shader, + + }; + + shader->shader=shaders[p_mode]; + + shader->custom_code_id = shader->shader->create_custom_shader(); + + _shader_make_dirty(shader); } VS::ShaderMode RasterizerStorageGLES3::shader_get_mode(RID p_shader) const { - return VS::SHADER_SPATIAL; + const Shader *shader=shader_owner.get(p_shader); + ERR_FAIL_COND_V(!shader,VS::SHADER_MAX); + + return shader->mode; } void RasterizerStorageGLES3::shader_set_code(RID p_shader, const String& p_code){ + Shader *shader=shader_owner.get(p_shader); + ERR_FAIL_COND(!shader); + shader->code=p_code; + _shader_make_dirty(shader); } String RasterizerStorageGLES3::shader_get_code(RID p_shader) const{ - return String(); + const Shader *shader=shader_owner.get(p_shader); + ERR_FAIL_COND_V(!shader,String()); + + + return shader->code; } + +void RasterizerStorageGLES3::_update_shader(Shader* p_shader) const { + + + _shader_dirty_list.remove( &p_shader->dirty_list ); + + p_shader->valid=false; + + p_shader->uniforms.clear(); + + ShaderCompilerGLES3::GeneratedCode gen_code; + ShaderCompilerGLES3::IdentifierActions *actions=NULL; + + + + switch(p_shader->mode) { + case VS::SHADER_CANVAS_ITEM: { + + p_shader->canvas_item.light_mode=Shader::CanvasItem::LIGHT_MODE_NORMAL; + p_shader->canvas_item.blend_mode=Shader::CanvasItem::BLEND_MODE_MIX; + + shaders.actions_canvas.render_mode_values["blend_add"]=Pair(&p_shader->canvas_item.blend_mode,Shader::CanvasItem::BLEND_MODE_ADD); + shaders.actions_canvas.render_mode_values["blend_mix"]=Pair(&p_shader->canvas_item.blend_mode,Shader::CanvasItem::BLEND_MODE_MIX); + shaders.actions_canvas.render_mode_values["blend_sub"]=Pair(&p_shader->canvas_item.blend_mode,Shader::CanvasItem::BLEND_MODE_SUB); + shaders.actions_canvas.render_mode_values["blend_mul"]=Pair(&p_shader->canvas_item.blend_mode,Shader::CanvasItem::BLEND_MODE_MUL); + shaders.actions_canvas.render_mode_values["blend_premul_alpha"]=Pair(&p_shader->canvas_item.blend_mode,Shader::CanvasItem::BLEND_MODE_PMALPHA); + + shaders.actions_canvas.render_mode_values["unshaded"]=Pair(&p_shader->canvas_item.light_mode,Shader::CanvasItem::LIGHT_MODE_UNSHADED); + shaders.actions_canvas.render_mode_values["light_only"]=Pair(&p_shader->canvas_item.light_mode,Shader::CanvasItem::LIGHT_MODE_LIGHT_ONLY); + + actions=&shaders.actions_canvas; + actions->uniforms=&p_shader->uniforms; + + } break; + } + + + Error err = shaders.compiler.compile(p_shader->mode,p_shader->code,actions,p_shader->path,gen_code); + + ERR_FAIL_COND(err!=OK); + + p_shader->shader->set_custom_shader_code(p_shader->custom_code_id,gen_code.vertex,gen_code.vertex_global,gen_code.fragment,gen_code.light,gen_code.fragment_global,gen_code.uniforms,gen_code.texture_uniforms,gen_code.defines); + + p_shader->ubo_size=gen_code.uniform_total_size; + p_shader->ubo_offsets=gen_code.uniform_offsets; + p_shader->texture_count=gen_code.texture_uniforms.size(); + + //all materials using this shader will have to be invalidated, unfortunately + + for (SelfList* E = p_shader->materials.first();E;E=E->next() ) { + + _material_make_dirty(E->self()); + } + + p_shader->valid=true; + p_shader->version++; + +} + +void RasterizerStorageGLES3::update_dirty_shaders() { + + while( _shader_dirty_list.first() ) { + _update_shader(_shader_dirty_list.first()->self() ); + } +} + void RasterizerStorageGLES3::shader_get_param_list(RID p_shader, List *p_param_list) const{ + Shader *shader=shader_owner.get(p_shader); + ERR_FAIL_COND(!shader); + + if (shader->dirty_list.in_list()) + _update_shader(shader); // ok should be not anymore dirty + + + Map order; + + + for(Map::Element *E=shader->uniforms.front();E;E=E->next()) { + + + order[E->get().order]=E->key(); + } + + + for(Map::Element *E=order.front();E;E=E->next()) { + + PropertyInfo pi; + ShaderLanguage::ShaderNode::Uniform &u=shader->uniforms[E->get()]; + pi.name=E->get(); + switch(u.type) { + case ShaderLanguage::TYPE_VOID: pi.type=Variant::NIL; break; + case ShaderLanguage::TYPE_BOOL: pi.type=Variant::BOOL; break; + case ShaderLanguage::TYPE_BVEC2: pi.type=Variant::INT; pi.hint=PROPERTY_HINT_FLAGS; pi.hint_string="x,y"; break; + case ShaderLanguage::TYPE_BVEC3: pi.type=Variant::INT; pi.hint=PROPERTY_HINT_FLAGS; pi.hint_string="x,y,z"; break; + case ShaderLanguage::TYPE_BVEC4: pi.type=Variant::INT; pi.hint=PROPERTY_HINT_FLAGS; pi.hint_string="x,y,z,w"; break; + case ShaderLanguage::TYPE_UINT: + case ShaderLanguage::TYPE_INT: { + pi.type=Variant::INT; + if (u.hint==ShaderLanguage::ShaderNode::Uniform::HINT_RANGE) { + pi.hint=PROPERTY_HINT_RANGE; + pi.hint_string=rtos(u.hint_range[0])+","+rtos(u.hint_range[1]); + } + + } break; + case ShaderLanguage::TYPE_IVEC2: + case ShaderLanguage::TYPE_IVEC3: + case ShaderLanguage::TYPE_IVEC4: + case ShaderLanguage::TYPE_UVEC2: + case ShaderLanguage::TYPE_UVEC3: + case ShaderLanguage::TYPE_UVEC4: { + + pi.type=Variant::INT_ARRAY; + } break; + case ShaderLanguage::TYPE_FLOAT: { + pi.type=Variant::REAL; + if (u.hint==ShaderLanguage::ShaderNode::Uniform::HINT_RANGE) { + pi.hint=PROPERTY_HINT_RANGE; + pi.hint_string=rtos(u.hint_range[0])+","+rtos(u.hint_range[1])+","+rtos(u.hint_range[2]); + } + + } break; + case ShaderLanguage::TYPE_VEC2: pi.type=Variant::VECTOR2; break; + case ShaderLanguage::TYPE_VEC3: pi.type=Variant::VECTOR3; break; + case ShaderLanguage::TYPE_VEC4: { + if (u.hint==ShaderLanguage::ShaderNode::Uniform::HINT_COLOR) { + pi.type=Variant::COLOR; + } else { + pi.type=Variant::PLANE; + } + } break; + case ShaderLanguage::TYPE_MAT2: pi.type=Variant::MATRIX32; break; + case ShaderLanguage::TYPE_MAT3: pi.type=Variant::MATRIX3; break; + case ShaderLanguage::TYPE_MAT4: pi.type=Variant::TRANSFORM; break; + case ShaderLanguage::TYPE_SAMPLER2D: + case ShaderLanguage::TYPE_ISAMPLER2D: + case ShaderLanguage::TYPE_USAMPLER2D: { + + pi.type=Variant::OBJECT; + pi.hint=PROPERTY_HINT_RESOURCE_TYPE; + pi.hint_string="Texture"; + } break; + case ShaderLanguage::TYPE_SAMPLERCUBE: { + + pi.type=Variant::OBJECT; + pi.hint=PROPERTY_HINT_RESOURCE_TYPE; + pi.hint_string="CubeMap"; + } break; + }; + + p_param_list->push_back(pi); + + } } void RasterizerStorageGLES3::shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture){ + Shader *shader=shader_owner.get(p_shader); + ERR_FAIL_COND(!shader); + ERR_FAIL_COND(p_texture.is_valid() && !texture_owner.owns(p_texture)); + if (p_texture.is_valid()) + shader->default_textures[p_name]=p_texture; + else + shader->default_textures.erase(p_name); + + _shader_make_dirty(shader); } RID RasterizerStorageGLES3::shader_get_default_texture_param(RID p_shader, const StringName& p_name) const{ - return RID(); + const Shader *shader=shader_owner.get(p_shader); + ERR_FAIL_COND_V(!shader,RID()); + + const Map::Element *E=shader->default_textures.find(p_name); + if (!E) + return RID(); + return E->get(); } /* COMMON MATERIAL API */ +void RasterizerStorageGLES3::_material_make_dirty(Material* p_material) const { + + if (p_material->dirty_list.in_list()) + return; + + _material_dirty_list.add(&p_material->dirty_list); +} + RID RasterizerStorageGLES3::material_create(){ - return RID(); + Material *material = memnew( Material ); + + return material_owner.make_rid(material); } -void RasterizerStorageGLES3::material_set_shader(RID p_shader_material, RID p_shader){ +void RasterizerStorageGLES3::material_set_shader(RID p_material, RID p_shader){ + Material *material = material_owner.get( p_material ); + ERR_FAIL_COND(!material); + + Shader *shader=shader_owner.getornull(p_shader); + + if (material->shader) { + //if shader, remove from previous shader material list + material->shader->materials.remove( &material->list ); + } + material->shader=shader; + + if (shader) { + shader->materials.add(&material->list); + } + + _material_make_dirty(material); } -RID RasterizerStorageGLES3::material_get_shader(RID p_shader_material) const{ + +RID RasterizerStorageGLES3::material_get_shader(RID p_material) const{ + + const Material *material = material_owner.get( p_material ); + ERR_FAIL_COND_V(!material,RID()); + + if (material->shader) + return material->shader->self; return RID(); } void RasterizerStorageGLES3::material_set_param(RID p_material, const StringName& p_param, const Variant& p_value){ + Material *material = material_owner.get( p_material ); + ERR_FAIL_COND(!material); + + if (p_value.get_type()==Variant::NIL) + material->params.erase(p_param); + else + material->params[p_param]=p_value; + + _material_make_dirty(material); } Variant RasterizerStorageGLES3::material_get_param(RID p_material, const StringName& p_param) const{ + const Material *material = material_owner.get( p_material ); + ERR_FAIL_COND_V(!material,RID()); + + if (material->params.has(p_param)) + return material->params[p_param]; + return Variant(); } +_FORCE_INLINE_ static void _fill_std140_variant_ubo_value(ShaderLanguage::DataType type, const Variant& value, uint8_t *data) { + switch(type) { + case ShaderLanguage::TYPE_BOOL: { + + bool v = value; + + GLuint *gui = (GLuint*)data; + *gui = v ? GL_TRUE : GL_FALSE; + } break; + case ShaderLanguage::TYPE_BVEC2: { + + int v = value; + GLuint *gui = (GLuint*)data; + gui[0]=v&1 ? GL_TRUE : GL_FALSE; + gui[1]=v&2 ? GL_TRUE : GL_FALSE; + + } break; + case ShaderLanguage::TYPE_BVEC3: { + + int v = value; + GLuint *gui = (GLuint*)data; + gui[0]=v&1 ? GL_TRUE : GL_FALSE; + gui[1]=v&2 ? GL_TRUE : GL_FALSE; + gui[2]=v&4 ? GL_TRUE : GL_FALSE; + + } break; + case ShaderLanguage::TYPE_BVEC4: { + + int v = value; + GLuint *gui = (GLuint*)data; + gui[0]=v&1 ? GL_TRUE : GL_FALSE; + gui[1]=v&2 ? GL_TRUE : GL_FALSE; + gui[2]=v&4 ? GL_TRUE : GL_FALSE; + gui[3]=v&8 ? GL_TRUE : GL_FALSE; + + } break; + case ShaderLanguage::TYPE_INT: { + + int v = value; + GLint *gui = (GLint*)data; + gui[0]=v; + + } break; + case ShaderLanguage::TYPE_IVEC2: { + + DVector iv = value; + int s = iv.size(); + GLint *gui = (GLint*)data; + + DVector::Read r = iv.read(); + + for(int i=0;i<2;i++) { + if (i iv = value; + int s = iv.size(); + GLint *gui = (GLint*)data; + + DVector::Read r = iv.read(); + + for(int i=0;i<3;i++) { + if (i iv = value; + int s = iv.size(); + GLint *gui = (GLint*)data; + + DVector::Read r = iv.read(); + + for(int i=0;i<4;i++) { + if (i iv = value; + int s = iv.size(); + GLuint *gui = (GLuint*)data; + + DVector::Read r = iv.read(); + + for(int i=0;i<2;i++) { + if (i iv = value; + int s = iv.size(); + GLuint *gui = (GLuint*)data; + + DVector::Read r = iv.read(); + + for(int i=0;i<3;i++) { + if (i iv = value; + int s = iv.size(); + GLuint *gui = (GLuint*)data; + + DVector::Read r = iv.read(); + + for(int i=0;i<4;i++) { + if (i& value, uint8_t *data) { + + switch(type) { + case ShaderLanguage::TYPE_BOOL: { + + GLuint *gui = (GLuint*)data; + *gui = value[0].boolean ? GL_TRUE : GL_FALSE; + } break; + case ShaderLanguage::TYPE_BVEC2: { + + GLuint *gui = (GLuint*)data; + gui[0]=value[0].boolean ? GL_TRUE : GL_FALSE; + gui[1]=value[1].boolean ? GL_TRUE : GL_FALSE; + + } break; + case ShaderLanguage::TYPE_BVEC3: { + + GLuint *gui = (GLuint*)data; + gui[0]=value[0].boolean ? GL_TRUE : GL_FALSE; + gui[1]=value[1].boolean ? GL_TRUE : GL_FALSE; + gui[2]=value[2].boolean ? GL_TRUE : GL_FALSE; + + } break; + case ShaderLanguage::TYPE_BVEC4: { + + GLuint *gui = (GLuint*)data; + gui[0]=value[0].boolean ? GL_TRUE : GL_FALSE; + gui[1]=value[1].boolean ? GL_TRUE : GL_FALSE; + gui[2]=value[2].boolean ? GL_TRUE : GL_FALSE; + gui[3]=value[3].boolean ? GL_TRUE : GL_FALSE; + + } break; + case ShaderLanguage::TYPE_INT: { + + GLint *gui = (GLint*)data; + gui[0]=value[0].sint; + + } break; + case ShaderLanguage::TYPE_IVEC2: { + + GLint *gui = (GLint*)data; + + for(int i=0;i<2;i++) { + gui[i]=value[i].sint; + + } + + } break; + case ShaderLanguage::TYPE_IVEC3: { + + GLint *gui = (GLint*)data; + + for(int i=0;i<3;i++) { + gui[i]=value[i].sint; + + } + + } break; + case ShaderLanguage::TYPE_IVEC4: { + + GLint *gui = (GLint*)data; + + for(int i=0;i<4;i++) { + gui[i]=value[i].sint; + + } + + } break; + case ShaderLanguage::TYPE_UINT: { + + + GLuint *gui = (GLuint*)data; + gui[0]=value[0].uint; + + } break; + case ShaderLanguage::TYPE_UVEC2: { + + GLint *gui = (GLint*)data; + + for(int i=0;i<2;i++) { + gui[i]=value[i].uint; + } + } break; + case ShaderLanguage::TYPE_UVEC3: { + GLint *gui = (GLint*)data; + + for(int i=0;i<3;i++) { + gui[i]=value[i].uint; + } + + } break; + case ShaderLanguage::TYPE_UVEC4: { + GLint *gui = (GLint*)data; + + for(int i=0;i<4;i++) { + gui[i]=value[i].uint; + } + } break; + case ShaderLanguage::TYPE_FLOAT: { + + GLfloat *gui = (GLfloat*)data; + gui[0]=value[0].real; + + } break; + case ShaderLanguage::TYPE_VEC2: { + + GLfloat *gui = (GLfloat*)data; + + for(int i=0;i<2;i++) { + gui[i]=value[i].real; + } + + } break; + case ShaderLanguage::TYPE_VEC3: { + + GLfloat *gui = (GLfloat*)data; + + for(int i=0;i<3;i++) { + gui[i]=value[i].real; + } + + } break; + case ShaderLanguage::TYPE_VEC4: { + + GLfloat *gui = (GLfloat*)data; + + for(int i=0;i<4;i++) { + gui[i]=value[i].real; + } + } break; + case ShaderLanguage::TYPE_MAT2: { + GLfloat *gui = (GLfloat*)data; + + for(int i=0;i<2;i++) { + gui[i]=value[i].real; + } + } break; + case ShaderLanguage::TYPE_MAT3: { + + + + GLfloat *gui = (GLfloat*)data; + + gui[ 0]=value[0].real; + gui[ 1]=value[1].real; + gui[ 2]=value[2].real; + gui[ 3]=0; + gui[ 4]=value[3].real; + gui[ 5]=value[4].real; + gui[ 6]=value[5].real; + gui[ 7]=0; + gui[ 8]=value[6].real; + gui[ 9]=value[7].real; + gui[10]=value[8].real; + gui[11]=0; + } break; + case ShaderLanguage::TYPE_MAT4: { + + GLfloat *gui = (GLfloat*)data; + + for(int i=0;i<16;i++) { + gui[i]=value[i].real; + } + } break; + default: {} + } + +} + + +_FORCE_INLINE_ static void _fill_std140_ubo_empty(ShaderLanguage::DataType type, uint8_t *data) { + + switch(type) { + + case ShaderLanguage::TYPE_BOOL: + case ShaderLanguage::TYPE_INT: + case ShaderLanguage::TYPE_UINT: + case ShaderLanguage::TYPE_FLOAT: { + zeromem(data,4); + } break; + case ShaderLanguage::TYPE_BVEC2: + case ShaderLanguage::TYPE_IVEC2: + case ShaderLanguage::TYPE_UVEC2: + case ShaderLanguage::TYPE_VEC2: { + zeromem(data,8); + } break; + case ShaderLanguage::TYPE_BVEC3: + case ShaderLanguage::TYPE_IVEC3: + case ShaderLanguage::TYPE_UVEC3: + case ShaderLanguage::TYPE_VEC3: + case ShaderLanguage::TYPE_BVEC4: + case ShaderLanguage::TYPE_IVEC4: + case ShaderLanguage::TYPE_UVEC4: + case ShaderLanguage::TYPE_VEC4: + case ShaderLanguage::TYPE_MAT2:{ + + zeromem(data,16); + } break; + case ShaderLanguage::TYPE_MAT3:{ + + zeromem(data,48); + } break; + case ShaderLanguage::TYPE_MAT4:{ + zeromem(data,64); + } break; + + default: {} + } + +} + +void RasterizerStorageGLES3::_update_material(Material* material) { + + if (material->dirty_list.in_list()) + _material_dirty_list.remove( &material->dirty_list ); + + //clear ubo if it needs to be cleared + if (material->ubo_size) { + + if (!material->shader || material->shader->ubo_size!=material->ubo_size) { + //by by ubo + glDeleteBuffers(1,&material->ubo_id); + material->ubo_id=0; + material->ubo_size=0; + } + } + + //create ubo if it needs to be created + if (material->ubo_size==0 && material->shader && material->shader->ubo_size) { + + glGenBuffers(1, &material->ubo_id); + glBindBuffer(GL_UNIFORM_BUFFER, material->ubo_id); + glBufferData(GL_UNIFORM_BUFFER, material->shader->ubo_size, NULL, GL_DYNAMIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + material->ubo_size=material->shader->ubo_size; + } + + //fill up the UBO if it needs to be filled + if (material->shader && material->ubo_size) { + uint8_t* local_ubo = (uint8_t*)alloca(material->ubo_size); + + for(Map::Element *E=material->shader->uniforms.front();E;E=E->next()) { + + if (E->get().order<0) + continue; // texture, does not go here + + //regular uniform + uint8_t *data = &local_ubo[ material->shader->ubo_offsets[E->get().order] ]; + + Map::Element *V = material->params.find(E->key()); + + if (V) { + //user provided + _fill_std140_variant_ubo_value(E->get().type,V->get(),data); + } else if (E->get().default_value.size()){ + //default value + _fill_std140_ubo_value(E->get().type,E->get().default_value,data); + //value=E->get().default_value; + } else { + //zero because it was not provided + _fill_std140_ubo_empty(E->get().type,data); + } + + + } + + glBindBuffer(GL_UNIFORM_BUFFER,material->ubo_id); + glBufferSubData(GL_UNIFORM_BUFFER, 0, material->ubo_size, local_ubo); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + } + + //set up the texture array, for easy access when it needs to be drawn + if (material->shader && material->shader->texture_count) { + + material->textures.resize(material->shader->texture_count); + + for(Map::Element *E=material->shader->uniforms.front();E;E=E->next()) { + + if (E->get().texture_order<0) + continue; // not a texture, does not go here + + RID texture; + + Map::Element *V = material->params.find(E->key()); + if (V) { + texture=V->get(); + } + + if (!texture.is_valid()) { + Map::Element *W = material->shader->default_textures.find(E->key()); + if (W) { + texture=W->get(); + } + } + + material->textures[ E->get().texture_order ]=texture; + + + } + + + } else { + material->textures.clear(); + } + +} + +void RasterizerStorageGLES3::update_dirty_materials() { + + while( _material_dirty_list.first() ) { + + Material *material = _material_dirty_list.first()->self(); + + _update_material(material); + } +} + /* MESH API */ RID RasterizerStorageGLES3::mesh_create(){ @@ -1950,6 +2792,49 @@ bool RasterizerStorageGLES3::free(RID p_rid){ info.texture_mem-=texture->total_data_size; texture_owner.free(p_rid); memdelete(texture); + + } else if (shader_owner.owns(p_rid)) { + + // delete the texture + Shader *shader = shader_owner.get(p_rid); + + if (shader->shader) + shader->shader->free_custom_shader(shader->custom_code_id); + + if (shader->dirty_list.in_list()) + _shader_dirty_list.remove(&shader->dirty_list); + + while (shader->materials.first()) { + + Material *mat = shader->materials.first()->self(); + + mat->shader=NULL; + _material_make_dirty(mat); + + shader->materials.remove( shader->materials.first() ); + } + + //material_shader.free_custom_shader(shader->custom_code_id); + shader_owner.free(p_rid); + memdelete(shader); + + } else if (material_owner.owns(p_rid)) { + + // delete the texture + Material *material = material_owner.get(p_rid); + + if (material->shader) { + material->shader->materials.remove( & material->list ); + } + + if (material->ubo_id) { + glDeleteBuffers(1,&material->ubo_id); + } + + material_owner.free(p_rid); + memdelete(material); + + } else if (canvas_occluder_owner.owns(p_rid)) { diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index 9820fbc941a..b7b3e607c6a 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -2,16 +2,21 @@ #define RASTERIZERSTORAGEGLES3_H #include "servers/visual/rasterizer.h" +#include "servers/visual/shader_language.h" #include "shader_gles3.h" #include "shaders/copy.glsl.h" #include "shaders/canvas.glsl.h" +#include "self_list.h" +#include "shader_compiler_gles3.h" - +class RasterizerCanvasGLES3; class RasterizerStorageGLES3 : public RasterizerStorage { public: + RasterizerCanvasGLES3 *canvas; + enum FBOFormat { FBO_FORMAT_16_BITS, FBO_FORMAT_32_BITS, @@ -47,9 +52,13 @@ public: Set extensions; } config; - struct Shaders { + mutable struct Shaders { CopyShaderGLES3 copy; + + ShaderCompilerGLES3 compiler; + + ShaderCompilerGLES3::IdentifierActions actions_canvas; } shaders; struct Resources { @@ -169,11 +178,71 @@ public: /* SHADER API */ + struct Material; + struct Shader : public RID_Data { + RID self; + VS::ShaderMode mode; + ShaderGLES3 *shader; + String code; + SelfList::List materials; + + + + Map uniforms; + Vector ubo_offsets; + uint32_t ubo_size; + + uint32_t texture_count; + + uint32_t custom_code_id; + uint32_t version; + + SelfList dirty_list; + + Map default_textures; + + bool valid; + + String path; + + struct CanvasItem { + + enum BlendMode { + BLEND_MODE_MIX, + BLEND_MODE_ADD, + BLEND_MODE_SUB, + BLEND_MODE_MUL, + BLEND_MODE_PMALPHA, + }; + + int blend_mode; + + enum LightMode { + LIGHT_MODE_NORMAL, + LIGHT_MODE_UNSHADED, + LIGHT_MODE_LIGHT_ONLY + }; + + int light_mode; + + } canvas_item; + + Shader() : dirty_list(this) { + + shader=NULL; + valid=false; + custom_code_id=0; + version=1; + } }; + mutable SelfList::List _shader_dirty_list; + void _shader_make_dirty(Shader* p_shader); + + mutable RID_Owner shader_owner; virtual RID shader_create(VS::ShaderMode p_mode=VS::SHADER_SPATIAL); @@ -187,17 +256,48 @@ public: virtual void shader_set_default_texture_param(RID p_shader, const StringName& p_name, RID p_texture); virtual RID shader_get_default_texture_param(RID p_shader, const StringName& p_name) const; + void _update_shader(Shader* p_shader) const; + + void update_dirty_shaders(); /* COMMON MATERIAL API */ + struct Material : public RID_Data { + + Shader *shader; + GLuint ubo_id; + uint32_t ubo_size; + Map params; + SelfList list; + SelfList dirty_list; + Vector textures; + + Material() : list(this), dirty_list(this) { + shader=NULL; + ubo_id=0; + ubo_size=0; + } + + }; + + mutable SelfList::List _material_dirty_list; + void _material_make_dirty(Material *p_material) const; + + + mutable RID_Owner material_owner; + virtual RID material_create(); - virtual void material_set_shader(RID p_shader_material, RID p_shader); - virtual RID material_get_shader(RID p_shader_material) const; + virtual void material_set_shader(RID p_material, RID p_shader); + virtual RID material_get_shader(RID p_material) const; virtual void material_set_param(RID p_material, const StringName& p_param, const Variant& p_value); virtual Variant material_get_param(RID p_material, const StringName& p_param) const; + void _update_material(Material* material); + + void update_dirty_materials(); + /* MESH API */ virtual RID mesh_create(); @@ -432,6 +532,7 @@ public: bool clear_request; Color clear_request_color; int canvas_draw_commands; + float time[4]; } frame; void initialize(); diff --git a/drivers/gles3/shader_compiler_gles3.cpp b/drivers/gles3/shader_compiler_gles3.cpp new file mode 100644 index 00000000000..49707400ca1 --- /dev/null +++ b/drivers/gles3/shader_compiler_gles3.cpp @@ -0,0 +1,528 @@ +#include "shader_compiler_gles3.h" +#include "os/os.h" + +#define SL ShaderLanguage + +static String _mktab(int p_level) { + + String tb; + for(int i=0;i& p_values) { + + switch(p_type) { + case SL::TYPE_BOOL: return p_values[0].boolean?"true":"false"; + case SL::TYPE_BVEC2: return String()+"bvec2("+(p_values[0].boolean?"true":"false")+(p_values[1].boolean?"true":"false")+")"; + case SL::TYPE_BVEC3: return String()+"bvec3("+(p_values[0].boolean?"true":"false")+","+(p_values[1].boolean?"true":"false")+","+(p_values[2].boolean?"true":"false")+")"; + case SL::TYPE_BVEC4: return String()+"bvec4("+(p_values[0].boolean?"true":"false")+","+(p_values[1].boolean?"true":"false")+","+(p_values[2].boolean?"true":"false")+","+(p_values[3].boolean?"true":"false")+")"; + case SL::TYPE_INT: return rtos(p_values[0].sint); + case SL::TYPE_IVEC2: return String()+"ivec2("+rtos(p_values[0].sint)+","+rtos(p_values[1].sint)+")"; + case SL::TYPE_IVEC3: return String()+"ivec3("+rtos(p_values[0].sint)+","+rtos(p_values[1].sint)+","+rtos(p_values[2].sint)+")"; + case SL::TYPE_IVEC4: return String()+"ivec4("+rtos(p_values[0].sint)+","+rtos(p_values[1].sint)+","+rtos(p_values[2].sint)+","+rtos(p_values[3].sint)+")"; + case SL::TYPE_UINT: return rtos(p_values[0].real); + case SL::TYPE_UVEC2: return String()+"uvec2("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+")"; + case SL::TYPE_UVEC3: return String()+"uvec3("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+")"; + case SL::TYPE_UVEC4: return String()+"uvec4("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+","+rtos(p_values[3].real)+")"; + case SL::TYPE_FLOAT: return rtos(p_values[0].real); + case SL::TYPE_VEC2: return String()+"vec2("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+")"; + case SL::TYPE_VEC3: return String()+"vec3("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+")"; + case SL::TYPE_VEC4: return String()+"vec4("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+","+rtos(p_values[3].real)+")"; + default: ERR_FAIL_V(String()); + } +} + +void ShaderCompilerGLES3::_dump_function_deps(SL::ShaderNode* p_node, const StringName& p_for_func, const Map& p_func_code, String& r_to_add, Set &added) { + + int fidx=-1; + + for(int i=0;ifunctions.size();i++) { + if (p_node->functions[i].name==p_for_func) { + fidx=i; + break; + } + } + + ERR_FAIL_COND(fidx==-1); + + for (Set::Element *E=p_node->functions[fidx].uses_function.front();E;E=E->next()) { + + if (added.has(E->get())) { + continue; //was added already + } + + _dump_function_deps(p_node,E->get(),p_func_code,r_to_add,added); + + SL::FunctionNode *fnode=NULL; + + for(int i=0;ifunctions.size();i++) { + if (p_node->functions[i].name==E->get()) { + fnode=p_node->functions[i].function; + break; + } + } + + ERR_FAIL_COND(!fnode); + + r_to_add+="\n"; + + String header; + header=_typestr(fnode->return_type)+" "+_mkid(fnode->name)+"("; + for(int i=0;iarguments.size();i++) { + + if (i>0) + header+=", "; + header+=_prestr(fnode->arguments[i].precision)+_typestr(fnode->arguments[i].type)+" "+_mkid(fnode->arguments[i].name); + } + + header+=")\n"; + r_to_add+=header; + r_to_add+=p_func_code[E->get()]; + + added.insert(E->get()); + } +} + +String ShaderCompilerGLES3::_dump_node_code(SL::Node *p_node, int p_level, GeneratedCode& r_gen_code, IdentifierActions &p_actions, const DefaultIdentifierActions &p_default_actions) { + + String code; + + switch(p_node->type) { + + case SL::Node::TYPE_SHADER: { + + SL::ShaderNode *pnode=(SL::ShaderNode*)p_node; + + for(int i=0;irender_modes.size();i++) { + + if (p_default_actions.render_mode_defines.has(pnode->render_modes[i]) && !used_rmode_defines.has(pnode->render_modes[i])) { + + r_gen_code.defines.push_back(p_default_actions.render_mode_defines[pnode->render_modes[i]].utf8()); + used_rmode_defines.insert(pnode->render_modes[i]); + } + + if (p_actions.render_mode_flags.has(pnode->render_modes[i])) { + *p_actions.render_mode_flags[pnode->render_modes[i]]=true; + } + + if (p_actions.render_mode_values.has(pnode->render_modes[i])) { + Pair &p = p_actions.render_mode_values[pnode->render_modes[i]]; + *p.first=p.second; + } + } + + + int max_texture_uniforms=0; + int max_uniforms=0; + + for(Map::Element *E=pnode->uniforms.front();E;E=E->next()) { + if (SL::is_sampler_type(E->get().type)) + max_texture_uniforms++; + else + max_uniforms++; + } + + r_gen_code.texture_uniforms.resize(max_texture_uniforms); + + Vector uniform_sizes; + uniform_sizes.resize(max_uniforms); + + for(Map::Element *E=pnode->uniforms.front();E;E=E->next()) { + + String ucode="uniform "; + ucode+=_prestr(E->get().precission); + ucode+=_typestr(E->get().type); + ucode+=" "+_mkid(E->key()); + ucode+=";\n"; + if (SL::is_sampler_type(E->get().type)) { + r_gen_code.vertex_global+=ucode; + r_gen_code.fragment_global+=ucode; + r_gen_code.texture_uniforms[E->get().texture_order]=_mkid(E->key()); + } else { + if (r_gen_code.uniforms.empty()) { + + r_gen_code.defines.push_back(String("#define USE_MATERIAL\n").ascii()); + } + r_gen_code.uniforms+=ucode; + uniform_sizes[E->get().order]=_get_datatype_size(E->get().type); + } + + p_actions.uniforms->insert(E->key(),E->get()); + + } + + // add up + for(int i=0;i0) + uniform_sizes[i]=uniform_sizes[i]+uniform_sizes[i-1]; + } + //offset + r_gen_code.uniform_offsets.resize(uniform_sizes.size()); + for(int i=0;i0) + r_gen_code.uniform_offsets[i]=uniform_sizes[i]-1; + else + r_gen_code.uniform_offsets[i]=0; + } + + if (uniform_sizes.size()) { + r_gen_code.uniform_total_size=uniform_sizes[ uniform_sizes.size() -1 ]; + } else { + r_gen_code.uniform_total_size=0; + } + + for(Map::Element *E=pnode->varyings.front();E;E=E->next()) { + + String vcode; + vcode+=_prestr(E->get().precission); + vcode+=_typestr(E->get().type); + vcode+=" "+String(E->key()); + vcode+=";\n"; + r_gen_code.vertex_global+="out "+vcode; + r_gen_code.fragment_global+="in "+vcode; + } + + Map function_code; + + //code for functions + for(int i=0;ifunctions.size();i++) { + SL::FunctionNode *fnode=pnode->functions[i].function; + function_code[fnode->name]=_dump_node_code(fnode->body,p_level+1,r_gen_code,p_actions,p_default_actions); + } + + //place functions in actual code + + Set added_vtx; + Set added_fragment; //share for light + + for(int i=0;ifunctions.size();i++) { + + SL::FunctionNode *fnode=pnode->functions[i].function; + + + if (fnode->name=="vertex") { + + _dump_function_deps(pnode,fnode->name,function_code,r_gen_code.vertex_global,added_vtx); + r_gen_code.vertex=function_code["vertex"]; + } + + if (fnode->name=="fragment") { + + _dump_function_deps(pnode,fnode->name,function_code,r_gen_code.fragment_global,added_fragment); + r_gen_code.fragment=function_code["fragment"]; + } + + if (fnode->name=="light") { + + _dump_function_deps(pnode,fnode->name,function_code,r_gen_code.fragment_global,added_fragment); + r_gen_code.light=function_code["light"]; + } + } + + //code+=dump_node_code(pnode->body,p_level); + } break; + case SL::Node::TYPE_FUNCTION: { + + } break; + case SL::Node::TYPE_BLOCK: { + SL::BlockNode *bnode=(SL::BlockNode*)p_node; + + //variables + code+=_mktab(p_level-1)+"{\n"; + for(Map::Element *E=bnode->variables.front();E;E=E->next()) { + + code+=_mktab(p_level)+_prestr(E->get().precision)+_typestr(E->get().type)+" "+_mkid(E->key())+";\n"; + } + + for(int i=0;istatements.size();i++) { + + String scode = _dump_node_code(bnode->statements[i],p_level,r_gen_code,p_actions,p_default_actions); + + if (bnode->statements[i]->type==SL::Node::TYPE_CONTROL_FLOW || bnode->statements[i]->type==SL::Node::TYPE_CONTROL_FLOW) { + code+=scode; //use directly + } else { + code+=_mktab(p_level)+scode+";\n"; + } + } + code+=_mktab(p_level-1)+"}\n"; + + + } break; + case SL::Node::TYPE_VARIABLE: { + SL::VariableNode *vnode=(SL::VariableNode*)p_node; + + if (p_default_actions.usage_defines.has(vnode->name) && !used_name_defines.has(vnode->name)) { + r_gen_code.defines.push_back(p_default_actions.usage_defines[vnode->name].utf8()); + used_name_defines.insert(vnode->name); + } + + if (p_actions.usage_flag_pointers.has(vnode->name) && !used_name_defines.has(vnode->name)) { + *p_actions.usage_flag_pointers[vnode->name]=true; + used_name_defines.insert(vnode->name); + } + + if (p_default_actions.renames.has(vnode->name)) + code=p_default_actions.renames[vnode->name]; + else + code=_mkid(vnode->name); + + + } break; + case SL::Node::TYPE_CONSTANT: { + SL::ConstantNode *cnode=(SL::ConstantNode*)p_node; + return get_constant_text(cnode->datatype,cnode->values); + + } break; + case SL::Node::TYPE_OPERATOR: { + SL::OperatorNode *onode=(SL::OperatorNode*)p_node; + + + switch(onode->op) { + + case SL::OP_ASSIGN: + case SL::OP_ASSIGN_ADD: + case SL::OP_ASSIGN_SUB: + case SL::OP_ASSIGN_MUL: + case SL::OP_ASSIGN_DIV: + case SL::OP_ASSIGN_SHIFT_LEFT: + case SL::OP_ASSIGN_SHIFT_RIGHT: + case SL::OP_ASSIGN_MOD: + case SL::OP_ASSIGN_BIT_AND: + case SL::OP_ASSIGN_BIT_OR: + case SL::OP_ASSIGN_BIT_XOR: + code=_dump_node_code(onode->arguments[0],p_level,r_gen_code,p_actions,p_default_actions)+_opstr(onode->op)+_dump_node_code(onode->arguments[1],p_level,r_gen_code,p_actions,p_default_actions); + break; + case SL::OP_BIT_INVERT: + case SL::OP_NEGATE: + case SL::OP_NOT: + case SL::OP_DECREMENT: + case SL::OP_INCREMENT: + code=_opstr(onode->op)+_dump_node_code(onode->arguments[0],p_level,r_gen_code,p_actions,p_default_actions); + break; + case SL::OP_POST_DECREMENT: + case SL::OP_POST_INCREMENT: + code=_dump_node_code(onode->arguments[0],p_level,r_gen_code,p_actions,p_default_actions)+_opstr(onode->op); + break; + case SL::OP_CALL: + case SL::OP_CONSTRUCT: { + + ERR_FAIL_COND_V(onode->arguments[0]->type!=SL::Node::TYPE_VARIABLE,String()); + + SL::VariableNode *vnode=(SL::VariableNode*)onode->arguments[0]; + + if (onode->op==SL::OP_CONSTRUCT) { + code+=String(vnode->name); + } else { + + if (internal_functions.has(vnode->name)) { + code+=vnode->name; + } else if (p_default_actions.renames.has(vnode->name)) { + code+=p_default_actions.renames[vnode->name]; + } else { + code+=_mkid(vnode->name); + } + } + + code+="("; + + for(int i=1;iarguments.size();i++) { + if (i>1) + code+=", "; + code+=_dump_node_code(onode->arguments[i],p_level,r_gen_code,p_actions,p_default_actions); + } + code+=")"; + } break; + default: { + + code="("+_dump_node_code(onode->arguments[0],p_level,r_gen_code,p_actions,p_default_actions)+_opstr(onode->op)+_dump_node_code(onode->arguments[1],p_level,r_gen_code,p_actions,p_default_actions)+")"; + break; + + } + } + + } break; + case SL::Node::TYPE_CONTROL_FLOW: { + SL::ControlFlowNode *cfnode=(SL::ControlFlowNode*)p_node; + if (cfnode->flow_op==SL::FLOW_OP_IF) { + + code+=_mktab(p_level)+"if ("+_dump_node_code(cfnode->expressions[0],p_level,r_gen_code,p_actions,p_default_actions)+")\n"; + code+=_dump_node_code(cfnode->blocks[0],p_level+1,r_gen_code,p_actions,p_default_actions); + if (cfnode->blocks.size()==2) { + + code+=_mktab(p_level)+"else\n"; + code+=_dump_node_code(cfnode->blocks[1],p_level+1,r_gen_code,p_actions,p_default_actions); + } + + + } else if (cfnode->flow_op==SL::FLOW_OP_RETURN) { + + if (cfnode->blocks.size()) { + code="return "+_dump_node_code(cfnode->blocks[0],p_level,r_gen_code,p_actions,p_default_actions); + } else { + code="return"; + } + } + + } break; + case SL::Node::TYPE_MEMBER: { + SL::MemberNode *mnode=(SL::MemberNode*)p_node; + code=_dump_node_code(mnode->owner,p_level,r_gen_code,p_actions,p_default_actions)+"."+mnode->name; + + } break; + } + + return code; + +} + + +Error ShaderCompilerGLES3::compile(VS::ShaderMode p_mode, const String& p_code, IdentifierActions* p_actions, const String &p_path,GeneratedCode& r_gen_code) { + + + + Error err = parser.compile(p_code,ShaderTypes::get_singleton()->get_functions(p_mode),ShaderTypes::get_singleton()->get_modes(p_mode)); + + if (err!=OK) { + _err_print_error(NULL,p_path.utf8().get_data(),parser.get_error_line(),parser.get_error_text().utf8().get_data(),ERR_HANDLER_SHADER); + return err; + } + + r_gen_code.defines.clear(); + r_gen_code.vertex=String(); + r_gen_code.vertex_global=String(); + r_gen_code.fragment=String(); + r_gen_code.fragment_global=String(); + r_gen_code.light=String(); + + + + used_name_defines.clear(); + used_rmode_defines.clear(); + + _dump_node_code(parser.get_shader(),1,r_gen_code,*p_actions,actions[p_mode]); + + return OK; + +} + + +ShaderCompilerGLES3::ShaderCompilerGLES3() { + + /** CANVAS ITEM SHADER **/ + + actions[VS::SHADER_CANVAS_ITEM].renames["SRC_VERTEX"]="vertex"; + actions[VS::SHADER_CANVAS_ITEM].renames["VERTEX"]="outvec.xy"; + actions[VS::SHADER_CANVAS_ITEM].renames["VERTEX_COLOR"]="vertex_color"; + actions[VS::SHADER_CANVAS_ITEM].renames["UV"]="uv_interp"; + actions[VS::SHADER_CANVAS_ITEM].renames["POINT_SIZE"]="gl_PointSize"; + + actions[VS::SHADER_CANVAS_ITEM].renames["WORLD_MATRIX"]="modelview_matrix"; + actions[VS::SHADER_CANVAS_ITEM].renames["PROJECTION_MATRIX"]="projection_matrix"; + actions[VS::SHADER_CANVAS_ITEM].renames["EXTRA_MATRIX"]=="extra_matrix"; + actions[VS::SHADER_CANVAS_ITEM].renames["TIME"]="time"; + + actions[VS::SHADER_CANVAS_ITEM].renames["COLOR"]="color"; + actions[VS::SHADER_CANVAS_ITEM].renames["NORMAL"]="normal"; + actions[VS::SHADER_CANVAS_ITEM].renames["NORMALMAP"]="normal_map"; + actions[VS::SHADER_CANVAS_ITEM].renames["NORMALMAP_DEPTH"]="normal_depth"; + actions[VS::SHADER_CANVAS_ITEM].renames["UV"]="uv_interp"; + actions[VS::SHADER_CANVAS_ITEM].renames["COLOR"]="color"; + actions[VS::SHADER_CANVAS_ITEM].renames["TEXTURE"]="color_texture"; + actions[VS::SHADER_CANVAS_ITEM].renames["TEXTURE_PIXEL_SIZE"]="color_texpixel_size"; + actions[VS::SHADER_CANVAS_ITEM].renames["SCREEN_UV"]="screen_uv"; + actions[VS::SHADER_CANVAS_ITEM].renames["SCREEN_TEXTURE"]="screen_texture"; + actions[VS::SHADER_CANVAS_ITEM].renames["POINT_COORD"]="gl_PointCoord"; + + actions[VS::SHADER_CANVAS_ITEM].renames["LIGHT_VEC"]="light_vec"; + actions[VS::SHADER_CANVAS_ITEM].renames["LIGHT_HEIGHT"]="light_height"; + actions[VS::SHADER_CANVAS_ITEM].renames["LIGHT_COLOR"]="light_color"; + actions[VS::SHADER_CANVAS_ITEM].renames["LIGHT_UV"]="light_uv"; + //actions[VS::SHADER_CANVAS_ITEM].renames["LIGHT_SHADOW_COLOR"]="light_shadow_color"; + actions[VS::SHADER_CANVAS_ITEM].renames["LIGHT"]="light"; + actions[VS::SHADER_CANVAS_ITEM].renames["SHADOW_COLOR"]="shadow_color"; + + actions[VS::SHADER_CANVAS_ITEM].usage_defines["COLOR"]="#define COLOR_USED\n"; + actions[VS::SHADER_CANVAS_ITEM].usage_defines["SCREEN_TEXTURE"]="#define SCREEN_TEXTURE_USED\n"; + actions[VS::SHADER_CANVAS_ITEM].usage_defines["SCREEN_UV"]="#define SCREEN_UV_USED\n"; + actions[VS::SHADER_CANVAS_ITEM].usage_defines["NORMAL"]="#define NORMAL_USED\n"; + actions[VS::SHADER_CANVAS_ITEM].usage_defines["NORMALMAP"]="#define NORMALMAP_USED\n"; + actions[VS::SHADER_CANVAS_ITEM].usage_defines["SHADOW_COLOR"]="#define SHADOW_COLOR_USED\n"; + + actions[VS::SHADER_CANVAS_ITEM].render_mode_defines["skip_transform"]="#define SKIP_TRANSFORM_USED\n"; + + List func_list; + + ShaderLanguage::get_builtin_funcs(&func_list); + + for (List::Element *E=func_list.front();E;E=E->next()) { + internal_functions.insert(E->get()); + } +} diff --git a/drivers/gles3/shader_compiler_gles3.h b/drivers/gles3/shader_compiler_gles3.h new file mode 100644 index 00000000000..ad51b999272 --- /dev/null +++ b/drivers/gles3/shader_compiler_gles3.h @@ -0,0 +1,67 @@ +#ifndef SHADERCOMPILERGLES3_H +#define SHADERCOMPILERGLES3_H + +#include "servers/visual/shader_language.h" +#include "servers/visual/shader_types.h" +#include "servers/visual_server.h" +#include "pair.h" + +class ShaderCompilerGLES3 { +public: + struct IdentifierActions { + + Map > render_mode_values; + Map render_mode_flags; + Map usage_flag_pointers; + + Map *uniforms; + }; + + struct GeneratedCode { + + Vector defines; + Vector texture_uniforms; + Vector uniform_offsets; + uint32_t uniform_total_size; + String uniforms; + String vertex_global; + String vertex; + String fragment_global; + String fragment; + String light; + + }; + +private: + + ShaderLanguage parser; + + struct DefaultIdentifierActions { + + Map renames; + Map render_mode_defines; + Map usage_defines; + }; + + void _dump_function_deps(ShaderLanguage::ShaderNode *p_node, const StringName& p_for_func, const Map &p_func_code, String& r_to_add,Set &added); + String _dump_node_code(ShaderLanguage::Node *p_node, int p_level, GeneratedCode &r_gen_code, IdentifierActions& p_actions, const DefaultIdentifierActions& p_default_actions); + + + + Set used_name_defines; + Set used_rmode_defines; + Set internal_functions; + + + DefaultIdentifierActions actions[VS::SHADER_MAX]; + +public: + + + Error compile(VS::ShaderMode p_mode, const String& p_code, IdentifierActions* p_actions, const String& p_path, GeneratedCode& r_gen_code); + + + ShaderCompilerGLES3(); +}; + +#endif // SHADERCOMPILERGLES3_H diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index f8c0234943b..35191fecf79 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -214,6 +214,8 @@ ShaderGLES3::Version* ShaderGLES3::get_current_version() { } + + v.ok=false; /* SETUP CONDITIONALS */ @@ -245,6 +247,7 @@ ShaderGLES3::Version* ShaderGLES3::get_current_version() { CharString code_string; CharString code_string2; CharString code_globals; + CharString material_string; //print_line("code version? "+itos(conditional_version.code_version)); @@ -258,6 +261,7 @@ ShaderGLES3::Version* ShaderGLES3::get_current_version() { cc=&custom_code_map[conditional_version.code_version]; v.code_version=cc->version; define_line_ofs+=2; + } @@ -273,7 +277,7 @@ ShaderGLES3::Version* ShaderGLES3::get_current_version() { if (cc) { for(int i=0;icustom_defines.size();i++) { - strings.push_back(cc->custom_defines[i]); + strings.push_back(cc->custom_defines[i].get_data()); DEBUG_PRINT("CD #"+itos(i)+": "+String(cc->custom_defines[i])); } } @@ -305,14 +309,22 @@ ShaderGLES3::Version* ShaderGLES3::get_current_version() { code_globals=cc->vertex_globals.ascii(); strings.push_back(code_globals.get_data()); } + strings.push_back(vertex_code1.get_data()); + if (cc) { + material_string=cc->uniforms.ascii(); + strings.push_back(material_string.get_data()); + } + + strings.push_back(vertex_code2.get_data()); + if (cc) { code_string=cc->vertex.ascii(); strings.push_back(code_string.get_data()); } - strings.push_back(vertex_code2.get_data()); + strings.push_back(vertex_code3.get_data()); #ifdef DEBUG_SHADER DEBUG_PRINT("\nVertex Code:\n\n"+String(code_string.get_data())); @@ -367,7 +379,8 @@ ShaderGLES3::Version* ShaderGLES3::get_current_version() { ERR_FAIL_V(NULL); } - + + /* FRAGMENT SHADER */ strings.resize(strings_base_size); @@ -396,21 +409,29 @@ ShaderGLES3::Version* ShaderGLES3::get_current_version() { code_globals=cc->fragment_globals.ascii(); strings.push_back(code_globals.get_data()); } + strings.push_back(fragment_code1.get_data()); + if (cc) { + material_string=cc->uniforms.ascii(); + strings.push_back(material_string.get_data()); + } + + strings.push_back(fragment_code2.get_data()); + if (cc) { code_string=cc->fragment.ascii(); strings.push_back(code_string.get_data()); } - strings.push_back(fragment_code2.get_data()); + strings.push_back(fragment_code3.get_data()); if (cc) { code_string2=cc->light.ascii(); strings.push_back(code_string2.get_data()); } - strings.push_back(fragment_code3.get_data()); + strings.push_back(fragment_code4.get_data()); #ifdef DEBUG_SHADER DEBUG_PRINT("\nFragment Code:\n\n"+String(code_string.get_data())); @@ -463,7 +484,7 @@ ShaderGLES3::Version* ShaderGLES3::get_current_version() { ERR_FAIL_V( NULL ); } - + glAttachShader(v.id,v.frag_id); glAttachShader(v.id,v.vert_id); @@ -552,10 +573,11 @@ ShaderGLES3::Version* ShaderGLES3::get_current_version() { if ( cc ) { - v.custom_uniform_locations.resize(cc->custom_uniforms.size()); - for(int i=0;icustom_uniforms.size();i++) { + v.texture_uniform_locations.resize(cc->texture_uniforms.size()); + for(int i=0;itexture_uniforms.size();i++) { - v.custom_uniform_locations[i]=glGetUniformLocation(v.id,String(cc->custom_uniforms[i]).ascii().get_data()); + v.texture_uniform_locations[i]=glGetUniformLocation(v.id,String(cc->texture_uniforms[i]).ascii().get_data()); + glUniform1i(v.texture_uniform_locations[i],i+base_material_tex_index); } } @@ -597,6 +619,7 @@ void ShaderGLES3::setup(const char** p_conditional_defines, int p_conditional_co //split vertex and shader code (thank you, retarded shader compiler programmers from you know what company). { String globals_tag="\nVERTEX_SHADER_GLOBALS"; + String material_tag="\nMATERIAL_UNIFORMS"; String code_tag="\nVERTEX_SHADER_CODE"; String code = vertex_code; int cpos = code.find(globals_tag); @@ -606,20 +629,31 @@ void ShaderGLES3::setup(const char** p_conditional_defines, int p_conditional_co vertex_code0=code.substr(0,cpos).ascii(); code = code.substr(cpos+globals_tag.length(),code.length()); - cpos = code.find(code_tag); + cpos = code.find(material_tag); if (cpos==-1) { vertex_code1=code.ascii(); } else { vertex_code1=code.substr(0,cpos).ascii(); - vertex_code2=code.substr(cpos+code_tag.length(),code.length()).ascii(); + String code2 = code.substr(cpos+material_tag.length(),code.length()); + + cpos = code2.find(code_tag); + if (cpos==-1) { + vertex_code2=code2.ascii(); + } else { + + vertex_code2=code2.substr(0,cpos).ascii(); + vertex_code3 = code2.substr(cpos+code_tag.length(),code2.length()).ascii(); + } + } } } { String globals_tag="\nFRAGMENT_SHADER_GLOBALS"; + String material_tag="\nMATERIAL_UNIFORMS"; String code_tag="\nFRAGMENT_SHADER_CODE"; String light_code_tag="\nLIGHT_SHADER_CODE"; String code = fragment_code; @@ -630,22 +664,31 @@ void ShaderGLES3::setup(const char** p_conditional_defines, int p_conditional_co fragment_code0=code.substr(0,cpos).ascii(); code = code.substr(cpos+globals_tag.length(),code.length()); - cpos = code.find(code_tag); + cpos = code.find(material_tag); if (cpos==-1) { fragment_code1=code.ascii(); } else { fragment_code1=code.substr(0,cpos).ascii(); - String code2 = code.substr(cpos+code_tag.length(),code.length()); + String code2 = code.substr(cpos+material_tag.length(),code.length()); - cpos = code2.find(light_code_tag); + cpos = code2.find(code_tag); if (cpos==-1) { fragment_code2=code2.ascii(); } else { fragment_code2=code2.substr(0,cpos).ascii(); - fragment_code3 = code2.substr(cpos+light_code_tag.length(),code2.length()).ascii(); + String code3 = code2.substr(cpos+code_tag.length(),code2.length()); + + cpos = code3.find(light_code_tag); + if (cpos==-1) { + fragment_code3=code3.ascii(); + } else { + + fragment_code3=code3.substr(0,cpos).ascii(); + fragment_code4 = code3.substr(cpos+light_code_tag.length(),code3.length()).ascii(); + } } } } @@ -697,7 +740,7 @@ uint32_t ShaderGLES3::create_custom_shader() { return last_custom_code++; } -void ShaderGLES3::set_custom_shader_code(uint32_t p_code_id, const String& p_vertex, const String& p_vertex_globals,const String& p_fragment,const String& p_light, const String& p_fragment_globals,const Vector& p_uniforms,const Vector &p_custom_defines) { +void ShaderGLES3::set_custom_shader_code(uint32_t p_code_id, const String& p_vertex, const String& p_vertex_globals, const String& p_fragment, const String& p_light, const String& p_fragment_globals, const String &p_uniforms, const Vector &p_texture_uniforms, const Vector &p_custom_defines) { ERR_FAIL_COND(!custom_code_map.has(p_code_id)); CustomCode *cc=&custom_code_map[p_code_id]; @@ -707,7 +750,8 @@ void ShaderGLES3::set_custom_shader_code(uint32_t p_code_id, const String& p_ver cc->fragment=p_fragment; cc->fragment_globals=p_fragment_globals; cc->light=p_light; - cc->custom_uniforms=p_uniforms; + cc->texture_uniforms=p_texture_uniforms; + cc->uniforms=p_uniforms; cc->custom_defines=p_custom_defines; cc->version++; } @@ -734,13 +778,16 @@ void ShaderGLES3::free_custom_shader(uint32_t p_code_id) { } +void ShaderGLES3::set_base_material_tex_index(int p_idx) { + base_material_tex_index=p_idx; +} ShaderGLES3::ShaderGLES3() { version=NULL; last_custom_code=1; uniforms_dirty = true; - + base_material_tex_index=0; glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS,&max_image_units); } diff --git a/drivers/gles3/shader_gles3.h b/drivers/gles3/shader_gles3.h index 7aaf65d4509..176a2282fdf 100644 --- a/drivers/gles3/shader_gles3.h +++ b/drivers/gles3/shader_gles3.h @@ -105,9 +105,10 @@ private: String fragment; String fragment_globals; String light; + String uniforms; uint32_t version; - Vector custom_uniforms; - Vector custom_defines; + Vector texture_uniforms; + Vector custom_defines; }; @@ -118,7 +119,7 @@ private: GLuint vert_id; GLuint frag_id; GLint *uniform_location; - Vector custom_uniform_locations; + Vector texture_uniform_locations; uint32_t code_version; bool ok; Version() { code_version=0; ok=false; uniform_location=NULL; } @@ -166,10 +167,14 @@ private: CharString fragment_code1; CharString fragment_code2; CharString fragment_code3; + CharString fragment_code4; CharString vertex_code0; CharString vertex_code1; CharString vertex_code2; + CharString vertex_code3; + + int base_material_tex_index; Version * get_current_version(); @@ -308,7 +313,7 @@ public: void clear_caches(); uint32_t create_custom_shader(); - void set_custom_shader_code(uint32_t p_id,const String& p_vertex, const String& p_vertex_globals,const String& p_fragment,const String& p_p_light,const String& p_fragment_globals,const Vector& p_uniforms,const Vector &p_custom_defines); + void set_custom_shader_code(uint32_t p_id,const String& p_vertex, const String& p_vertex_globals,const String& p_fragment,const String& p_p_light,const String& p_fragment_globals,const String& p_uniforms,const Vector& p_texture_uniforms,const Vector &p_custom_defines); void set_custom_shader(uint32_t p_id); void free_custom_shader(uint32_t p_id); @@ -332,23 +337,25 @@ public: uniforms_dirty = true; }; - _FORCE_INLINE_ void set_custom_uniform(int p_idx, const Variant& p_value) { + _FORCE_INLINE_ void set_texture_uniform(int p_idx, const Variant& p_value) { ERR_FAIL_COND(!version); - ERR_FAIL_INDEX(p_idx,version->custom_uniform_locations.size()); - _set_uniform_variant( version->custom_uniform_locations[p_idx], p_value ); + ERR_FAIL_INDEX(p_idx,version->texture_uniform_locations.size()); + _set_uniform_variant( version->texture_uniform_locations[p_idx], p_value ); } - _FORCE_INLINE_ GLint get_custom_uniform_location(int p_idx) { + _FORCE_INLINE_ GLint get_texture_uniform_location(int p_idx) { ERR_FAIL_COND_V(!version,-1); - ERR_FAIL_INDEX_V(p_idx,version->custom_uniform_locations.size(),-1); - return version->custom_uniform_locations[p_idx]; + ERR_FAIL_INDEX_V(p_idx,version->texture_uniform_locations.size(),-1); + return version->texture_uniform_locations[p_idx]; } virtual void init()=0; void finish(); + void set_base_material_tex_index(int p_idx); + virtual ~ShaderGLES3(); }; diff --git a/drivers/gles3/shaders/canvas.glsl b/drivers/gles3/shaders/canvas.glsl index eed2239ec97..cf2e0f776f6 100644 --- a/drivers/gles3/shaders/canvas.glsl +++ b/drivers/gles3/shaders/canvas.glsl @@ -1,7 +1,7 @@ [vertex] -layout(location=0) in highp vec3 vertex; +layout(location=0) in highp vec2 vertex; layout(location=3) in vec4 color_attrib; #ifdef USE_TEXTURE_RECT @@ -20,6 +20,7 @@ layout(location=4) in highp vec2 uv_attrib; layout(std140) uniform CanvasItemData { //ubo:0 highp mat4 projection_matrix; + highp vec4 time; }; uniform highp mat4 modelview_matrix; @@ -29,10 +30,6 @@ uniform highp mat4 extra_matrix; out mediump vec2 uv_interp; out mediump vec4 color_interp; -#if defined(USE_TIME) -uniform float time; -#endif - #ifdef USE_LIGHTING layout(std140) uniform LightData { //ubo:1 @@ -51,6 +48,7 @@ layout(std140) uniform LightData { //ubo:1 highp float shadow_distance_mult; }; + out vec4 light_uv_interp; #if defined(NORMAL_USED) @@ -66,20 +64,30 @@ out highp vec2 pos; VERTEX_SHADER_GLOBALS +#if defined(USE_MATERIAL) + +layout(std140) uniform UniformData { //ubo:2 + +MATERIAL_UNIFORMS + +}; + +#endif + void main() { - color_interp = color_attrib; + vec4 vertex_color = color_attrib; #ifdef USE_TEXTURE_RECT - uv_interp = src_rect.xy + abs(src_rect.zw) * vertex.xy; - highp vec4 outvec = vec4(dst_rect.xy + dst_rect.zw * mix(vertex.xy,vec2(1.0,1.0)-vertex.xy,lessThan(src_rect.zw,vec2(0.0,0.0))),0.0,1.0); + uv_interp = src_rect.xy + abs(src_rect.zw) * vertex; + highp vec4 outvec = vec4(dst_rect.xy + dst_rect.zw * mix(vertex,vec2(1.0,1.0)-vertex,lessThan(src_rect.zw,vec2(0.0,0.0))),0.0,1.0); #else uv_interp = uv_attrib; - highp vec4 outvec = vec4(vertex, 1.0); + highp vec4 outvec = vec4(vertex,0.0,1.0); #endif @@ -90,16 +98,16 @@ VERTEX_SHADER_CODE } -#if !defined(USE_WORLD_VEC) +#if !defined(SKIP_TRANSFORM_USED) outvec = extra_matrix * outvec; outvec = modelview_matrix * outvec; #endif - + color_interp = vertex_color; #ifdef USE_PIXEL_SNAP - outvec.xy=floor(outvec.xy+0.5); + outvec.xy=floor(outvec+0.5); #endif @@ -132,20 +140,24 @@ VERTEX_SHADER_CODE uniform mediump sampler2D color_texture; // texunit:0 +uniform highp vec2 color_texpixel_size; in mediump vec2 uv_interp; in mediump vec4 color_interp; -#if defined(ENABLE_TEXSCREEN) +#if defined(SCREEN_TEXTURE_USED) -uniform sampler2D texscreen_tex; // texunit:-3 +uniform sampler2D screen_texture; // texunit:-3 #endif -#if defined(USE_TIME) -uniform float time; -#endif +layout(std140) uniform CanvasItemData { + + highp mat4 projection_matrix; + highp vec4 time; +}; + #ifdef USE_LIGHTING @@ -188,6 +200,17 @@ FRAGMENT_SHADER_GLOBALS layout(location=0) out mediump vec4 frag_color; + +#if defined(USE_MATERIAL) + +layout(std140) uniform UniformData { + +MATERIAL_UNIFORMS + +}; + +#endif + void main() { vec4 color = color_interp; @@ -195,6 +218,9 @@ void main() { vec3 normal = vec3(0.0,0.0,1.0); #endif +#if !defined(COLOR_USED) +//default behavior, texture by color + #ifdef USE_DISTANCE_FIELD const float smoothing = 1.0/32.0; float distance = texture(color_texture, uv_interp).a; @@ -204,6 +230,7 @@ void main() { #endif +#endif #if defined(ENABLE_SCREEN_UV) vec2 screen_uv = gl_FragCoord.xy*screen_uv_mult; @@ -211,14 +238,15 @@ void main() { { -#if defined(USE_NORMALMAP) - vec3 normal_map=vec3(0.0,0.0,1.0); float normal_depth=1.0; + +#if defined(NORMALMAP_USED) + vec3 normal_map=vec3(0.0,0.0,1.0); #endif FRAGMENT_SHADER_CODE -#if defined(USE_NORMALMAP) +#if defined(NORMALMAP_USED) normal = mix(vec3(0.0,0.0,1.0), normal_map * vec3(2.0,-2.0,1.0) - vec3( 1.0, -1.0, 0.0 ), normal_depth ); #endif @@ -246,7 +274,7 @@ FRAGMENT_SHADER_CODE vec2 light_uv = light_uv_interp.xy; vec4 light = texture(light_texture,light_uv) * light_color; -#if defined(USE_OUTPUT_SHADOW_COLOR) +#if defined(SHADOW_COLOR_USED) vec4 shadow_color=vec4(0.0,0.0,0.0,0.0); #endif @@ -409,7 +437,7 @@ LIGHT_SHADER_CODE #endif -#if defined(USE_OUTPUT_SHADOW_COLOR) +#if defined(SHADOW_COLOR_USED) color=mix(shadow_color,color,shadow_attenuation); #else //color*=shadow_attenuation; diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp index 271cf302ef2..b9ee6178f05 100644 --- a/drivers/unix/os_unix.cpp +++ b/drivers/unix/os_unix.cpp @@ -88,6 +88,10 @@ void OS_Unix::print_error(const char* p_function,const char* p_file,int p_line,c print("\E[1;35mSCRIPT ERROR: %s: \E[0m\E[1m%s\n",p_function,err_details); print("\E[0;35m At: %s:%i.\E[0m\n",p_file,p_line); break; + case ERR_SHADER: + print("\E[1;36mSHADER ERROR: %s: \E[0m\E[1m%s\n",p_function,err_details); + print("\E[0;36m At: %s:%i.\E[0m\n",p_file,p_line); + break; } } diff --git a/platform/windows/os_windows.cpp b/platform/windows/os_windows.cpp index 1af7cb2a498..92359892a8b 100644 --- a/platform/windows/os_windows.cpp +++ b/platform/windows/os_windows.cpp @@ -1727,6 +1727,10 @@ void OS_Windows::print_error(const char* p_function, const char* p_file, int p_l print("SCRIPT ERROR: %s: %s\n", p_function, err_details); print(" At: %s:%i\n", p_file, p_line); break; + case ERR_SHADER: + print("SHADER ERROR: %s: %s\n", p_function, err_details); + print(" At: %s:%i\n", p_file, p_line); + break; } } else { @@ -1742,6 +1746,7 @@ void OS_Windows::print_error(const char* p_function, const char* p_file, int p_l case ERR_ERROR: basecol = FOREGROUND_RED; break; case ERR_WARNING: basecol = FOREGROUND_RED | FOREGROUND_GREEN; break; case ERR_SCRIPT: basecol = FOREGROUND_RED | FOREGROUND_BLUE; break; + case ERR_SHADER: basecol = FOREGROUND_GREEN | FOREGROUND_BLUE; break; } basecol |= current_bg; @@ -1753,6 +1758,7 @@ void OS_Windows::print_error(const char* p_function, const char* p_file, int p_l case ERR_ERROR: print("ERROR: "); break; case ERR_WARNING: print("WARNING: "); break; case ERR_SCRIPT: print("SCRIPT ERROR: "); break; + case ERR_SCRIPT: print("SHADER ERROR: "); break; } SetConsoleTextAttribute(hCon, current_fg | current_bg | FOREGROUND_INTENSITY); @@ -1763,6 +1769,7 @@ void OS_Windows::print_error(const char* p_function, const char* p_file, int p_l case ERR_ERROR: print(" At: "); break; case ERR_WARNING: print(" At: "); break; case ERR_SCRIPT: print(" At: "); break; + case ERR_SHADER: print(" At: "); break; } SetConsoleTextAttribute(hCon, current_fg | current_bg); @@ -1775,6 +1782,7 @@ void OS_Windows::print_error(const char* p_function, const char* p_file, int p_l case ERR_ERROR: print("ERROR: %s: ", p_function); break; case ERR_WARNING: print("WARNING: %s: ", p_function); break; case ERR_SCRIPT: print("SCRIPT ERROR: %s: ", p_function); break; + case ERR_SHADER: print("SCRIPT ERROR: %s: ", p_function); break; } SetConsoleTextAttribute(hCon, current_fg | current_bg | FOREGROUND_INTENSITY); @@ -1785,6 +1793,7 @@ void OS_Windows::print_error(const char* p_function, const char* p_file, int p_l case ERR_ERROR: print(" At: "); break; case ERR_WARNING: print(" At: "); break; case ERR_SCRIPT: print(" At: "); break; + case ERR_SHADER: print(" At: "); break; } SetConsoleTextAttribute(hCon, current_fg | current_bg); diff --git a/scene/2d/canvas_item.cpp b/scene/2d/canvas_item.cpp index 07ee1ff7530..06962614018 100644 --- a/scene/2d/canvas_item.cpp +++ b/scene/2d/canvas_item.cpp @@ -55,7 +55,7 @@ bool CanvasItemMaterial::_set(const StringName& p_name, const Variant& p_value) } } if (pr) { - VisualServer::get_singleton()->material_set_param(material,pr,p_value); + VisualServer::get_singleton()->material_set_param(_get_material(),pr,p_value); return true; } } @@ -78,7 +78,7 @@ bool CanvasItemMaterial::_get(const StringName& p_name,Variant &r_ret) const { StringName pr = shader->remap_param(p_name); if (pr) { - r_ret=VisualServer::get_singleton()->material_get_param(material,pr); + r_ret=VisualServer::get_singleton()->material_get_param(_get_material(),pr); return true; } } @@ -111,7 +111,7 @@ void CanvasItemMaterial::set_shader(const Ref& p_shader) { if (shader.is_valid()) rid=shader->get_rid(); - VS::get_singleton()->material_set_shader(material,rid); + VS::get_singleton()->material_set_shader(_get_material(),rid); _change_notify(); //properties for shader exposed emit_changed(); } @@ -123,18 +123,14 @@ Ref CanvasItemMaterial::get_shader() const{ void CanvasItemMaterial::set_shader_param(const StringName& p_param,const Variant& p_value){ - VS::get_singleton()->material_set_param(material,p_param,p_value); + VS::get_singleton()->material_set_param(_get_material(),p_param,p_value); } Variant CanvasItemMaterial::get_shader_param(const StringName& p_param) const{ - return VS::get_singleton()->material_get_param(material,p_param); + return VS::get_singleton()->material_get_param(_get_material(),p_param); } -RID CanvasItemMaterial::get_rid() const { - - return material; -} void CanvasItemMaterial::_bind_methods() { diff --git a/scene/2d/canvas_item.h b/scene/2d/canvas_item.h index b65c1a60b44..9cc7982aa3a 100644 --- a/scene/2d/canvas_item.h +++ b/scene/2d/canvas_item.h @@ -41,10 +41,9 @@ class Font; class StyleBox; -class CanvasItemMaterial : public Material{ +class CanvasItemMaterial : public Material { OBJ_TYPE(CanvasItemMaterial,Material); - RID material; Ref shader; public: /*enum ShadingMode { @@ -70,7 +69,6 @@ public: void set_shader_param(const StringName& p_param,const Variant& p_value); Variant get_shader_param(const StringName& p_param) const; - virtual RID get_rid() const; CanvasItemMaterial(); ~CanvasItemMaterial(); }; diff --git a/scene/gui/color_picker.cpp b/scene/gui/color_picker.cpp index 27bdb581e1c..3d9c07ac86a 100644 --- a/scene/gui/color_picker.cpp +++ b/scene/gui/color_picker.cpp @@ -34,31 +34,12 @@ #include "os/input.h" #include "os/keyboard.h" -void update_material(Refmat,const Color& p_color,float h,float s,float v) { - if (!mat.is_valid()) - return; - Ref sdr = mat->get_shader(); - if (!sdr.is_valid()) - return; - - mat->set_shader_param("R",p_color.r); - mat->set_shader_param("G",p_color.g); - mat->set_shader_param("B",p_color.b); - mat->set_shader_param("H",h); - mat->set_shader_param("S",s); - mat->set_shader_param("V",v); - mat->set_shader_param("A",p_color.a); -} void ColorPicker::_notification(int p_what) { switch(p_what) { case NOTIFICATION_THEME_CHANGED: { - uv_material->set_shader(get_shader("uv_editor")); - w_material->set_shader(get_shader("w_editor")); - update_material(uv_material,color,h,s,v); - update_material(w_material,color,h,s,v); uv_edit->set_texture(get_icon("color_main")); w_edit->set_texture(get_icon("color_hue")); sample->set_texture(get_icon("color_sample")); @@ -68,8 +49,6 @@ void ColorPicker::_notification(int p_what) { case NOTIFICATION_ENTER_TREE: { btn_pick->set_icon(get_icon("screen_picker", "ColorPicker")); - update_material(uv_material, color,h,s,v); - update_material(w_material, color,h,s,v); uv_edit->get_child(0)->cast_to()->update(); w_edit->get_child(0)->cast_to()->update(); @@ -113,8 +92,6 @@ void ColorPicker::set_color(const Color& p_color) { if (!is_inside_tree()) return; - update_material(uv_material, color,h,s,v); - update_material(w_material, color,h,s,v); uv_edit->get_child(0)->cast_to()->update(); w_edit->get_child(0)->cast_to()->update(); @@ -509,7 +486,6 @@ ColorPicker::ColorPicker() : uv_edit->add_child(c); c->set_area_as_parent_rect(); c->set_stop_mouse(false); - c->set_material(memnew ( CanvasItemMaterial )); Vector args=Vector(); args.push_back(0); args.push_back(c); @@ -525,7 +501,6 @@ ColorPicker::ColorPicker() : w_edit->add_child(c); c->set_area_as_parent_rect(); c->set_stop_mouse(false); - c->set_material(memnew ( CanvasItemMaterial )); args.clear(); args.push_back(1); args.push_back(c); @@ -593,18 +568,6 @@ ColorPicker::ColorPicker() : //_update_color(); updating=false; - uv_material.instance(); - Ref s_uv = get_shader("uv_editor"); - uv_material->set_shader(s_uv); - - w_material.instance(); - - Ref s_w = get_shader("w_editor"); - w_material->set_shader(s_w); - - uv_edit->set_material(uv_material); - w_edit->set_material(w_material); - set_color(Color(1,1,1)); diff --git a/scene/gui/color_picker.h b/scene/gui/color_picker.h index 5e2cc572742..b7424543610 100644 --- a/scene/gui/color_picker.h +++ b/scene/gui/color_picker.h @@ -57,8 +57,6 @@ private: List presets; ToolButton *btn_pick; CheckButton *btn_mode; - Ref uv_material; - Ref w_material; HSlider *scroll[4]; SpinBox *values[4]; Label *labels[4]; diff --git a/scene/resources/material.h b/scene/resources/material.h index 87afa60ce9b..1acc031641b 100644 --- a/scene/resources/material.h +++ b/scene/resources/material.h @@ -46,6 +46,9 @@ class Material : public Resource { OBJ_SAVE_TYPE( Material ); RID material; +protected: + + _FORCE_INLINE_ RID _get_material() const { return material; } public: virtual RID get_rid() const; diff --git a/servers/visual/shader_language.cpp b/servers/visual/shader_language.cpp index 1a9912e3fb7..065bb4d31ad 100644 --- a/servers/visual/shader_language.cpp +++ b/servers/visual/shader_language.cpp @@ -712,6 +712,8 @@ bool ShaderLanguage::is_token_nonvoid_datatype(TokenType p_type) { void ShaderLanguage::clear() { + current_function=StringName(); + completion_type=COMPLETION_NONE; completion_block=NULL; completion_function=StringName(); @@ -2096,6 +2098,12 @@ bool ShaderLanguage::is_scalar_type(DataType p_type) { return p_type==TYPE_BOOL || p_type==TYPE_INT || p_type==TYPE_UINT || p_type==TYPE_FLOAT; } +bool ShaderLanguage::is_sampler_type(DataType p_type) { + + return p_type==TYPE_SAMPLER2D || p_type==TYPE_ISAMPLER2D || p_type==TYPE_USAMPLER2D || p_type==TYPE_SAMPLERCUBE; + +} + void ShaderLanguage::get_keyword_list(List *r_keywords) { Set kws; @@ -2122,6 +2130,27 @@ void ShaderLanguage::get_keyword_list(List *r_keywords) { } } +void ShaderLanguage::get_builtin_funcs(List *r_keywords) { + + + Set kws; + + int idx=0; + + while (builtin_func_defs[idx].name) { + + kws.insert(builtin_func_defs[idx].name); + + idx++; + } + + for(Set::Element *E=kws.front();E;E=E->next()) { + r_keywords->push_back(E->get()); + } +} + + + ShaderLanguage::DataType ShaderLanguage::get_scalar_type(DataType p_type) { static const DataType scalar_types[]={ @@ -2342,6 +2371,12 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const bool ok =_parse_function_arguments(p_block,p_builtin_types,func,&carg); + for(int i=0;ifunctions.size();i++) { + if (shader->functions[i].name==name) { + shader->functions[i].uses_function.insert(name); + } + } + if (carg>=0) { @@ -3140,6 +3175,9 @@ Error ShaderLanguage::_parse_shader(const Map< StringName, Maprender_modes.has(tk.text)) { + if (shader->render_modes.find(tk.text)!=-1) { _set_error("Duplicate render mode: '"+String(tk.text)+"'"); return ERR_PARSE_ERROR; } - shader->render_modes.insert(tk.text); + shader->render_modes.push_back(tk.text); tk = _get_token(); if (tk.type==TK_COMMA) { @@ -3225,6 +3263,13 @@ Error ShaderLanguage::_parse_shader(const Map< StringName, Mapuniforms.size(); + if (is_sampler_type(type)) { + uniform.texture_order=texture_uniforms++; + uniform.order=-1; + } else { + uniform.texture_order=-1; + uniform.order=uniforms++; + } uniform.type=type; uniform.precission=precision; @@ -3520,9 +3565,13 @@ Error ShaderLanguage::_parse_shader(const Map< StringName, Mapbody,builtin_types); if (err) return err; + + current_function=StringName(); } } diff --git a/servers/visual/shader_language.h b/servers/visual/shader_language.h index 45dc5561159..dc1279efd39 100644 --- a/servers/visual/shader_language.h +++ b/servers/visual/shader_language.h @@ -370,6 +370,7 @@ public: struct Function { StringName name; FunctionNode*function; + Set uses_function; bool callable; }; @@ -391,6 +392,7 @@ public: }; int order; + int texture_order; DataType type; DataPrecision precission; Vector default_value; @@ -403,7 +405,7 @@ public: Map varyings; Map uniforms; - Set render_modes; + Vector render_modes; Vector functions; @@ -461,8 +463,10 @@ public: static bool convert_constant(ConstantNode* p_constant, DataType p_to_type,ConstantNode::Value *p_value=NULL); static DataType get_scalar_type(DataType p_type); static bool is_scalar_type(DataType p_type); + static bool is_sampler_type(DataType p_type); static void get_keyword_list(List *r_keywords); + static void get_builtin_funcs(List *r_keywords); private: struct KeyWord { TokenType token; const char *text;}; @@ -476,6 +480,8 @@ private: int char_idx; int tk_line; + StringName current_function; + struct TkPos { int char_idx; int tk_line; diff --git a/servers/visual/shader_types.cpp b/servers/visual/shader_types.cpp index 675be3458c1..1a01a19021b 100644 --- a/servers/visual/shader_types.cpp +++ b/servers/visual/shader_types.cpp @@ -18,6 +18,7 @@ ShaderTypes::ShaderTypes() { singleton=this; + /*************** SPATIAL ***********************/ shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["SRC_VERTEX"]=ShaderLanguage::TYPE_VEC3; shader_modes[VS::SHADER_SPATIAL].functions["vertex"]["SRC_NORMAL"]=ShaderLanguage::TYPE_VEC3; @@ -96,4 +97,65 @@ ShaderTypes::ShaderTypes() shader_modes[VS::SHADER_SPATIAL].modes.insert("vertex_model_space"); shader_modes[VS::SHADER_SPATIAL].modes.insert("vertex_camera_space"); + /************ CANVAS ITEM **************************/ + + shader_modes[VS::SHADER_CANVAS_ITEM].functions["vertex"]["SRC_VERTEX"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["vertex"]["VERTEX"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["vertex"]["UV"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["vertex"]["VERTEX_COLOR"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["vertex"]["POINT_SIZE"]=ShaderLanguage::TYPE_FLOAT; + + shader_modes[VS::SHADER_CANVAS_ITEM].functions["vertex"]["WORLD_MATRIX"]=ShaderLanguage::TYPE_MAT4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["vertex"]["PROJECTION_MATRIX"]=ShaderLanguage::TYPE_MAT4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["vertex"]["EXTRA_MATRIX"]=ShaderLanguage::TYPE_MAT4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["vertex"]["TIME"]=ShaderLanguage::TYPE_FLOAT; + + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["SRC_COLOR"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["POSITION"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["NORMAL"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["NORMALMAP"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["NORMALMAP_DEPTH"]=ShaderLanguage::TYPE_FLOAT; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["UV"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["COLOR"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["TEXTURE"]=ShaderLanguage::TYPE_SAMPLER2D; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["TEXTURE_PIXEL_SIZE"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["SCREEN_UV"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["POINT_COORD"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["fragment"]["TIME"]=ShaderLanguage::TYPE_FLOAT; + + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["POSITION"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["NORMAL"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["UV"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["COLOR"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["TEXTURE"]=ShaderLanguage::TYPE_SAMPLER2D; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["TEXTURE_PIXEL_SIZE"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["VAR1"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["VAR2"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["SCREEN_UV"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["LIGHT_VEC"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["LIGHT_HEIGHT"]=ShaderLanguage::TYPE_FLOAT; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["LIGHT_COLOR"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["LIGHT_UV"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["LIGHT_SHADOW"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["LIGHT"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["SHADOW"]=ShaderLanguage::TYPE_VEC4; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["POINT_COORD"]=ShaderLanguage::TYPE_VEC2; + shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["TIME"]=ShaderLanguage::TYPE_FLOAT; + + shader_modes[VS::SHADER_SPATIAL].modes.insert("skip_transform"); + + shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_mix"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_add"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_sub"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_mul"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_premul_alpha"); + + shader_modes[VS::SHADER_SPATIAL].modes.insert("unshaded"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("light_only"); + + + + + + } diff --git a/servers/visual_server.h b/servers/visual_server.h index 238bcc216da..90e99710fd8 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -140,6 +140,7 @@ public: SHADER_SPATIAL, SHADER_CANVAS_ITEM, SHADER_LIGHT, + SHADER_MAX }; diff --git a/tools/editor/editor_log.cpp b/tools/editor/editor_log.cpp index 02af9712a86..f69b7ba3e13 100644 --- a/tools/editor/editor_log.cpp +++ b/tools/editor/editor_log.cpp @@ -67,6 +67,10 @@ void EditorLog::_error_handler(void *p_self, const char*p_func, const char*p_fil icon = self->get_icon("ScriptError","EditorIcons"); } break; + case ERR_HANDLER_SHADER: { + + icon = self->get_icon("Shader","EditorIcons"); + } break; } From 0955371447181a0fc20eb68dc6bc5aae67b73d0d Mon Sep 17 00:00:00 2001 From: Elia Argentieri Date: Sat, 15 Oct 2016 18:20:27 +0200 Subject: [PATCH 006/223] Make the step property useful for sliders as described in #5773 --- scene/gui/range.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/scene/gui/range.cpp b/scene/gui/range.cpp index e056c55f718..ed58c4eb49b 100644 --- a/scene/gui/range.cpp +++ b/scene/gui/range.cpp @@ -136,16 +136,25 @@ double Range::get_page() const { } void Range::set_unit_value(double p_value) { + + double v; + if (shared->exp_unit_value && get_min()>0) { double exp_min = Math::log(get_min())/Math::log(2); double exp_max = Math::log(get_max())/Math::log(2); - double v = Math::pow(2,exp_min+(exp_max-exp_min)*p_value); - - set_val( v ); + v = Math::pow(2,exp_min+(exp_max-exp_min)*p_value); } else { - set_val( (get_max() - get_min()) * p_value + get_min() ); + + double percent = (get_max() - get_min()) * p_value; + if (get_step() > 0) { + double steps = round(percent / get_step()); + v = steps * get_step() + get_min(); + } else { + v = percent + get_min(); + } } + set_val( v ); } double Range::get_unit_value() const { From 2d77a6f5d3beae3b341e4a7f331202bd1a010508 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82a=C5=BCej=20Szczygie=C5=82?= Date: Mon, 17 Oct 2016 17:13:34 +0200 Subject: [PATCH 007/223] Add libsimplewebm and libwebm thirdparty libraries --- thirdparty/README.md | 6 + thirdparty/libsimplewebm/LICENSE | 21 + .../libsimplewebm/OpusVorbisDecoder.cpp | 224 + .../libsimplewebm/OpusVorbisDecoder.hpp | 63 + thirdparty/libsimplewebm/VPXDecoder.cpp | 142 + thirdparty/libsimplewebm/VPXDecoder.hpp | 80 + thirdparty/libsimplewebm/WebMDemuxer.cpp | 241 + thirdparty/libsimplewebm/WebMDemuxer.hpp | 125 + thirdparty/libsimplewebm/libwebm/AUTHORS.TXT | 4 + thirdparty/libsimplewebm/libwebm/LICENSE.TXT | 30 + thirdparty/libsimplewebm/libwebm/PATENTS.TXT | 23 + .../libsimplewebm/libwebm/README.libvpx | 11 + .../libsimplewebm/libwebm/common/webmids.h | 184 + .../libwebm/mkvmuxer/mkvmuxertypes.h | 28 + .../libwebm/mkvparser/mkvparser.cc | 7831 +++++++++++++++++ .../libwebm/mkvparser/mkvparser.h | 1111 +++ 16 files changed, 10124 insertions(+) create mode 100644 thirdparty/libsimplewebm/LICENSE create mode 100644 thirdparty/libsimplewebm/OpusVorbisDecoder.cpp create mode 100644 thirdparty/libsimplewebm/OpusVorbisDecoder.hpp create mode 100644 thirdparty/libsimplewebm/VPXDecoder.cpp create mode 100644 thirdparty/libsimplewebm/VPXDecoder.hpp create mode 100644 thirdparty/libsimplewebm/WebMDemuxer.cpp create mode 100644 thirdparty/libsimplewebm/WebMDemuxer.hpp create mode 100644 thirdparty/libsimplewebm/libwebm/AUTHORS.TXT create mode 100644 thirdparty/libsimplewebm/libwebm/LICENSE.TXT create mode 100644 thirdparty/libsimplewebm/libwebm/PATENTS.TXT create mode 100644 thirdparty/libsimplewebm/libwebm/README.libvpx create mode 100644 thirdparty/libsimplewebm/libwebm/common/webmids.h create mode 100644 thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h create mode 100644 thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc create mode 100644 thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h diff --git a/thirdparty/README.md b/thirdparty/README.md index f073bef8ec5..1de2949d442 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -95,6 +95,12 @@ Files extracted from upstream source: - `scripts/pnglibconf.h.prebuilt` as `pnglibconf.h` +## libsimplewebm + +- Upstream: https://github.com/zaps166/libsimplewebm +- License: MIT, BSD-3-Clause + + ## libvorbis - Upstream: https://www.xiph.org/vorbis diff --git a/thirdparty/libsimplewebm/LICENSE b/thirdparty/libsimplewebm/LICENSE new file mode 100644 index 00000000000..058633ac183 --- /dev/null +++ b/thirdparty/libsimplewebm/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 Błażej Szczygieł + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/thirdparty/libsimplewebm/OpusVorbisDecoder.cpp b/thirdparty/libsimplewebm/OpusVorbisDecoder.cpp new file mode 100644 index 00000000000..d7869f599b7 --- /dev/null +++ b/thirdparty/libsimplewebm/OpusVorbisDecoder.cpp @@ -0,0 +1,224 @@ +/* + MIT License + + Copyright (c) 2016 Błażej Szczygieł + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#include "OpusVorbisDecoder.hpp" + +#include +#include + +#include + +struct VorbisDecoder +{ + vorbis_info info; + vorbis_dsp_state dspState; + vorbis_block block; + ogg_packet op; + + bool hasDSPState, hasBlock; +}; + +/**/ + +OpusVorbisDecoder::OpusVorbisDecoder(const WebMDemuxer &demuxer) : + m_vorbis(NULL), m_opus(NULL), + m_numSamples(0), + m_channels(demuxer.getChannels()) +{ + switch (demuxer.getAudioCodec()) + { + case WebMDemuxer::AUDIO_VORBIS: + if (openVorbis(demuxer)) + return; + break; + case WebMDemuxer::AUDIO_OPUS: + if (openOpus(demuxer)) + return; + break; + default: + return; + } + close(); +} +OpusVorbisDecoder::~OpusVorbisDecoder() +{ + close(); +} + +bool OpusVorbisDecoder::isOpen() const +{ + return (m_vorbis || m_opus); +} + +bool OpusVorbisDecoder::getPCMS16(WebMFrame &frame, short *buffer, int &numOutSamples) +{ + if (m_vorbis) + { + m_vorbis->op.packet = frame.buffer; + m_vorbis->op.bytes = frame.bufferSize; + + if (vorbis_synthesis(&m_vorbis->block, &m_vorbis->op)) + return false; + if (vorbis_synthesis_blockin(&m_vorbis->dspState, &m_vorbis->block)) + return false; + + const int maxSamples = getBufferSamples(); + int samplesCount, count = 0; + float **pcm; + while ((samplesCount = vorbis_synthesis_pcmout(&m_vorbis->dspState, &pcm))) + { + const int toConvert = samplesCount <= maxSamples ? samplesCount : maxSamples; + for (int c = 0; c < m_channels; ++c) + { + float *samples = pcm[c]; + for (int i = 0, j = c; i < toConvert; ++i, j += m_channels) + { + int sample = samples[i] * 32767.0f; + if (sample > 32767) + sample = 32767; + else if (sample < -32768) + sample = -32768; + buffer[count + j] = sample; + } + } + vorbis_synthesis_read(&m_vorbis->dspState, toConvert); + count += toConvert; + } + + numOutSamples = count; + return true; + } + else if (m_opus) + { + const int samples = opus_decode(m_opus, frame.buffer, frame.bufferSize, buffer, m_numSamples, 0); + if (samples >= 0) + { + numOutSamples = samples; + return true; + } + } + return false; +} + +bool OpusVorbisDecoder::openVorbis(const WebMDemuxer &demuxer) +{ + size_t extradataSize = 0; + const unsigned char *extradata = demuxer.getAudioExtradata(extradataSize); + + if (extradataSize < 3 || !extradata || extradata[0] != 2) + return false; + + size_t headerSize[3] = {0}; + size_t offset = 1; + + /* Calculate three headers sizes */ + for (int i = 0; i < 2; ++i) + { + for (;;) + { + if (offset >= extradataSize) + return false; + headerSize[i] += extradata[offset]; + if (extradata[offset++] < 0xFF) + break; + } + } + headerSize[2] = extradataSize - (headerSize[0] + headerSize[1] + offset); + + if (headerSize[0] + headerSize[1] + headerSize[2] + offset != extradataSize) + return false; + + ogg_packet op[3]; + memset(op, 0, sizeof op); + + op[0].packet = (unsigned char *)extradata + offset; + op[0].bytes = headerSize[0]; + op[0].b_o_s = 1; + + op[1].packet = (unsigned char *)extradata + offset + headerSize[0]; + op[1].bytes = headerSize[1]; + + op[2].packet = (unsigned char *)extradata + offset + headerSize[0] + headerSize[1]; + op[2].bytes = headerSize[2]; + + m_vorbis = new VorbisDecoder; + m_vorbis->hasDSPState = m_vorbis->hasBlock = false; + vorbis_info_init(&m_vorbis->info); + + /* Upload three Vorbis headers into libvorbis */ + vorbis_comment vc; + vorbis_comment_init(&vc); + for (int i = 0; i < 3; ++i) + { + if (vorbis_synthesis_headerin(&m_vorbis->info, &vc, &op[i])) + { + vorbis_comment_clear(&vc); + return false; + } + } + vorbis_comment_clear(&vc); + + if (vorbis_synthesis_init(&m_vorbis->dspState, &m_vorbis->info)) + return false; + m_vorbis->hasDSPState = true; + + if (m_vorbis->info.channels != m_channels || m_vorbis->info.rate != demuxer.getSampleRate()) + return false; + + if (vorbis_block_init(&m_vorbis->dspState, &m_vorbis->block)) + return false; + m_vorbis->hasBlock = true; + + memset(&m_vorbis->op, 0, sizeof m_vorbis->op); + + m_numSamples = 4096 / m_channels; + + return true; +} +bool OpusVorbisDecoder::openOpus(const WebMDemuxer &demuxer) +{ + int opusErr = 0; + m_opus = opus_decoder_create(demuxer.getSampleRate(), m_channels, &opusErr); + if (!opusErr) + { + m_numSamples = demuxer.getSampleRate() * 0.06 + 0.5; //Maximum frame size (for 60 ms frame) + return true; + } + return false; +} + +void OpusVorbisDecoder::close() +{ + if (m_vorbis) + { + if (m_vorbis->hasBlock) + vorbis_block_clear(&m_vorbis->block); + if (m_vorbis->hasDSPState) + vorbis_dsp_clear(&m_vorbis->dspState); + vorbis_info_clear(&m_vorbis->info); + delete m_vorbis; + } + if (m_opus) + opus_decoder_destroy(m_opus); +} diff --git a/thirdparty/libsimplewebm/OpusVorbisDecoder.hpp b/thirdparty/libsimplewebm/OpusVorbisDecoder.hpp new file mode 100644 index 00000000000..bcdca731eeb --- /dev/null +++ b/thirdparty/libsimplewebm/OpusVorbisDecoder.hpp @@ -0,0 +1,63 @@ +/* + MIT License + + Copyright (c) 2016 Błażej Szczygieł + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef OPUSVORBISDECODER_HPP +#define OPUSVORBISDECODER_HPP + +#include "WebMDemuxer.hpp" + +struct VorbisDecoder; +struct OpusDecoder; + +class OpusVorbisDecoder +{ + OpusVorbisDecoder(const OpusVorbisDecoder &); + void operator =(const OpusVorbisDecoder &); +public: + OpusVorbisDecoder(const WebMDemuxer &demuxer); + ~OpusVorbisDecoder(); + + bool isOpen() const; + + inline int getBufferSamples() const + { + return m_numSamples; + } + + bool getPCMS16(WebMFrame &frame, short *buffer, int &numOutSamples); + +private: + bool openVorbis(const WebMDemuxer &demuxer); + bool openOpus(const WebMDemuxer &demuxer); + + void close(); + + VorbisDecoder *m_vorbis; + OpusDecoder *m_opus; + int m_numSamples; + int m_channels; + +}; + +#endif // OPUSVORBISDECODER_HPP diff --git a/thirdparty/libsimplewebm/VPXDecoder.cpp b/thirdparty/libsimplewebm/VPXDecoder.cpp new file mode 100644 index 00000000000..3f77b8f5cd6 --- /dev/null +++ b/thirdparty/libsimplewebm/VPXDecoder.cpp @@ -0,0 +1,142 @@ +/* + MIT License + + Copyright (c) 2016 Błażej Szczygieł + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#include "VPXDecoder.hpp" + +#include +#include + +#include +#include + +VPXDecoder::VPXDecoder(const WebMDemuxer &demuxer, unsigned threads) : + m_ctx(NULL), + m_iter(NULL), + m_delay(0) +{ + if (threads > 8) + threads = 8; + else if (threads < 1) + threads = 1; + + const vpx_codec_dec_cfg_t codecCfg = { + threads, + 0, + 0 + }; + vpx_codec_iface_t *codecIface = NULL; + + switch (demuxer.getVideoCodec()) + { + case WebMDemuxer::VIDEO_VP8: + codecIface = vpx_codec_vp8_dx(); + break; + case WebMDemuxer::VIDEO_VP9: + codecIface = vpx_codec_vp9_dx(); + m_delay = threads - 1; + break; + default: + return; + } + + m_ctx = new vpx_codec_ctx_t; + if (vpx_codec_dec_init(m_ctx, codecIface, &codecCfg, m_delay > 0 ? VPX_CODEC_USE_FRAME_THREADING : 0)) + { + delete m_ctx; + m_ctx = NULL; + } +} +VPXDecoder::~VPXDecoder() +{ + if (m_ctx) + { + vpx_codec_destroy(m_ctx); + delete m_ctx; + } +} + +bool VPXDecoder::decode(const WebMFrame &frame) +{ + m_iter = NULL; + return !vpx_codec_decode(m_ctx, frame.buffer, frame.bufferSize, NULL, 0); +} +VPXDecoder::IMAGE_ERROR VPXDecoder::getImage(Image &image) +{ + IMAGE_ERROR err = NO_FRAME; + if (vpx_image_t *img = vpx_codec_get_frame(m_ctx, &m_iter)) + { + if ((img->fmt & VPX_IMG_FMT_PLANAR) && !(img->fmt & (VPX_IMG_FMT_HAS_ALPHA | VPX_IMG_FMT_HIGHBITDEPTH))) + { + if (img->stride[0] && img->stride[1] && img->stride[2]) + { + const int uPlane = !!(img->fmt & VPX_IMG_FMT_UV_FLIP) + 1; + const int vPlane = !(img->fmt & VPX_IMG_FMT_UV_FLIP) + 1; + + image.w = img->d_w; + image.h = img->d_h; + image.chromaShiftW = img->x_chroma_shift; + image.chromaShiftH = img->y_chroma_shift; + + image.planes[0] = img->planes[0]; + image.planes[1] = img->planes[uPlane]; + image.planes[2] = img->planes[vPlane]; + + image.linesize[0] = img->stride[0]; + image.linesize[1] = img->stride[uPlane]; + image.linesize[2] = img->stride[vPlane]; + + err = NO_ERROR; + } + } + else + { + err = UNSUPPORTED_FRAME; + } + } + return err; +} + +/**/ + +#if 0 + +static inline int ceilRshift(int val, int shift) +{ + return (val + (1 << shift) - 1) >> shift; +} + +int VPXDecoder::Image::getWidth(int plane) const +{ + if (!plane) + return w; + return ceilRshift(w, chromaShiftW); +} +int VPXDecoder::Image::getHeight(int plane) const +{ + if (!plane) + return h; + return ceilRshift(h, chromaShiftH); +} + +#endif diff --git a/thirdparty/libsimplewebm/VPXDecoder.hpp b/thirdparty/libsimplewebm/VPXDecoder.hpp new file mode 100644 index 00000000000..61083958710 --- /dev/null +++ b/thirdparty/libsimplewebm/VPXDecoder.hpp @@ -0,0 +1,80 @@ +/* + MIT License + + Copyright (c) 2016 Błażej Szczygieł + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef VPXDECODER_HPP +#define VPXDECODER_HPP + +#include "WebMDemuxer.hpp" + +struct vpx_codec_ctx; + +class VPXDecoder +{ + VPXDecoder(const VPXDecoder &); + void operator =(const VPXDecoder &); +public: + class Image + { + public: +#if 0 + int getWidth(int plane) const; + int getHeight(int plane) const; +#endif + + int w, h; + int chromaShiftW, chromaShiftH; + unsigned char *planes[3]; + int linesize[3]; + }; + + enum IMAGE_ERROR + { + UNSUPPORTED_FRAME = -1, + NO_ERROR, + NO_FRAME + }; + + VPXDecoder(const WebMDemuxer &demuxer, unsigned threads = 1); + ~VPXDecoder(); + + inline bool isOpen() const + { + return (bool)m_ctx; + } + + inline int getFramesDelay() const + { + return m_delay; + } + + bool decode(const WebMFrame &frame); + IMAGE_ERROR getImage(Image &image); //The data is NOT copied! Only 3-plane, 8-bit images are supported. + +private: + vpx_codec_ctx *m_ctx; + const void *m_iter; + int m_delay; +}; + +#endif // VPXDECODER_HPP diff --git a/thirdparty/libsimplewebm/WebMDemuxer.cpp b/thirdparty/libsimplewebm/WebMDemuxer.cpp new file mode 100644 index 00000000000..cb63deccd5d --- /dev/null +++ b/thirdparty/libsimplewebm/WebMDemuxer.cpp @@ -0,0 +1,241 @@ +/* + MIT License + + Copyright (c) 2016 Błażej Szczygieł + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#include "WebMDemuxer.hpp" + +#include "mkvparser/mkvparser.h" + +#include +#include +#include + +WebMFrame::WebMFrame() : + bufferSize(0), bufferCapacity(0), + buffer(NULL), + time(0), + key(false) +{} +WebMFrame::~WebMFrame() +{ + free(buffer); +} + +/**/ + +WebMDemuxer::WebMDemuxer(mkvparser::IMkvReader *reader, int videoTrack, int audioTrack) : + m_reader(reader), + m_segment(NULL), + m_cluster(NULL), m_block(NULL), m_blockEntry(NULL), + m_blockFrameIndex(0), + m_videoTrack(NULL), m_vCodec(NO_VIDEO), + m_audioTrack(NULL), m_aCodec(NO_AUDIO), + m_isOpen(false), + m_eos(false) +{ + long long pos = 0; + if (mkvparser::EBMLHeader().Parse(m_reader, pos)) + return; + + if (mkvparser::Segment::CreateInstance(m_reader, pos, m_segment)) + return; + + if (m_segment->Load() < 0) + return; + + const mkvparser::Tracks *tracks = m_segment->GetTracks(); + const unsigned long tracksCount = tracks->GetTracksCount(); + int currVideoTrack = -1, currAudioTrack = -1; + for (unsigned long i = 0; i < tracksCount; ++i) + { + const mkvparser::Track *track = tracks->GetTrackByIndex(i); + if (const char *codecId = track->GetCodecId()) + { + if ((!m_videoTrack || currVideoTrack != videoTrack) && track->GetType() == mkvparser::Track::kVideo) + { + if (!strcmp(codecId, "V_VP8")) + m_vCodec = VIDEO_VP8; + else if (!strcmp(codecId, "V_VP9")) + m_vCodec = VIDEO_VP9; + if (m_vCodec != NO_VIDEO) + m_videoTrack = static_cast(track); + ++currVideoTrack; + } + if ((!m_audioTrack || currAudioTrack != audioTrack) && track->GetType() == mkvparser::Track::kAudio) + { + if (!strcmp(codecId, "A_VORBIS")) + m_aCodec = AUDIO_VORBIS; + else if (!strcmp(codecId, "A_OPUS")) + m_aCodec = AUDIO_OPUS; + if (m_aCodec != NO_AUDIO) + m_audioTrack = static_cast(track); + ++currAudioTrack; + } + } + } + if (!m_videoTrack && !m_audioTrack) + return; + + m_isOpen = true; +} +WebMDemuxer::~WebMDemuxer() +{ + delete m_segment; + delete m_reader; +} + +double WebMDemuxer::getLength() const +{ + return m_segment->GetDuration() / 1e9; +} + +WebMDemuxer::VIDEO_CODEC WebMDemuxer::getVideoCodec() const +{ + return m_vCodec; +} +int WebMDemuxer::getWidth() const +{ + return m_videoTrack->GetWidth(); +} +int WebMDemuxer::getHeight() const +{ + return m_videoTrack->GetHeight(); +} + +WebMDemuxer::AUDIO_CODEC WebMDemuxer::getAudioCodec() const +{ + return m_aCodec; +} +const unsigned char *WebMDemuxer::getAudioExtradata(size_t &size) const +{ + return m_audioTrack->GetCodecPrivate(size); +} +double WebMDemuxer::getSampleRate() const +{ + return m_audioTrack->GetSamplingRate(); +} +int WebMDemuxer::getChannels() const +{ + return m_audioTrack->GetChannels(); +} +int WebMDemuxer::getAudioDepth() const +{ + return m_audioTrack->GetBitDepth(); +} + +bool WebMDemuxer::readFrame(WebMFrame *videoFrame, WebMFrame *audioFrame) +{ + const long videoTrackNumber = (videoFrame && m_videoTrack) ? m_videoTrack->GetNumber() : 0; + const long audioTrackNumber = (audioFrame && m_audioTrack) ? m_audioTrack->GetNumber() : 0; + bool blockEntryEOS = false; + + if (videoFrame) + videoFrame->bufferSize = 0; + if (audioFrame) + audioFrame->bufferSize = 0; + + if (videoTrackNumber == 0 && audioTrackNumber == 0) + return false; + + if (m_eos) + return false; + + if (!m_cluster) + m_cluster = m_segment->GetFirst(); + + do + { + bool getNewBlock = false; + long status = 0; + if (!m_blockEntry && !blockEntryEOS) + { + status = m_cluster->GetFirst(m_blockEntry); + getNewBlock = true; + } + else if (blockEntryEOS || m_blockEntry->EOS()) + { + m_cluster = m_segment->GetNext(m_cluster); + if (!m_cluster || m_cluster->EOS()) + { + m_eos = true; + return false; + } + status = m_cluster->GetFirst(m_blockEntry); + blockEntryEOS = false; + getNewBlock = true; + } + else if (!m_block || m_blockFrameIndex == m_block->GetFrameCount() || notSupportedTrackNumber(videoTrackNumber, audioTrackNumber)) + { + status = m_cluster->GetNext(m_blockEntry, m_blockEntry); + if (!m_blockEntry || m_blockEntry->EOS()) + { + blockEntryEOS = true; + continue; + } + getNewBlock = true; + } + if (status || !m_blockEntry) + return false; + if (getNewBlock) + { + m_block = m_blockEntry->GetBlock(); + m_blockFrameIndex = 0; + } + } while (blockEntryEOS || notSupportedTrackNumber(videoTrackNumber, audioTrackNumber)); + + WebMFrame *frame = NULL; + + const long trackNumber = m_block->GetTrackNumber(); + if (trackNumber == videoTrackNumber) + frame = videoFrame; + else if (trackNumber == audioTrackNumber) + frame = audioFrame; + else + { + //Should not be possible + assert(trackNumber == videoTrackNumber || trackNumber == audioTrackNumber); + return false; + } + + const mkvparser::Block::Frame &blockFrame = m_block->GetFrame(m_blockFrameIndex++); + if (blockFrame.len > frame->bufferCapacity) + { + unsigned char *newBuff = (unsigned char *)realloc(frame->buffer, frame->bufferCapacity = blockFrame.len); + if (newBuff) + frame->buffer = newBuff; + else // Out of memory + return false; + } + frame->bufferSize = blockFrame.len; + + frame->time = m_block->GetTime(m_cluster) / 1e9; + frame->key = m_block->IsKey(); + + return !blockFrame.Read(m_reader, frame->buffer); +} + +inline bool WebMDemuxer::notSupportedTrackNumber(long videoTrackNumber, long audioTrackNumber) const +{ + const long trackNumber = m_block->GetTrackNumber(); + return (trackNumber != videoTrackNumber && trackNumber != audioTrackNumber); +} diff --git a/thirdparty/libsimplewebm/WebMDemuxer.hpp b/thirdparty/libsimplewebm/WebMDemuxer.hpp new file mode 100644 index 00000000000..a45ddb3f26b --- /dev/null +++ b/thirdparty/libsimplewebm/WebMDemuxer.hpp @@ -0,0 +1,125 @@ +/* + MIT License + + Copyright (c) 2016 Błażej Szczygieł + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef WEBMDEMUXER_HPP +#define WEBMDEMUXER_HPP + +#include + +namespace mkvparser { + class IMkvReader; + class Segment; + class Cluster; + class Block; + class BlockEntry; + class VideoTrack; + class AudioTrack; +} + +class WebMFrame +{ + WebMFrame(const WebMFrame &); + void operator =(const WebMFrame &); +public: + WebMFrame(); + ~WebMFrame(); + + inline bool isValid() const + { + return bufferSize > 0; + } + + long bufferSize, bufferCapacity; + unsigned char *buffer; + double time; + bool key; +}; + +class WebMDemuxer +{ + WebMDemuxer(const WebMDemuxer &); + void operator =(const WebMDemuxer &); +public: + enum VIDEO_CODEC + { + NO_VIDEO, + VIDEO_VP8, + VIDEO_VP9 + }; + enum AUDIO_CODEC + { + NO_AUDIO, + AUDIO_VORBIS, + AUDIO_OPUS + }; + + WebMDemuxer(mkvparser::IMkvReader *reader, int videoTrack = 0, int audioTrack = 0); + ~WebMDemuxer(); + + inline bool isOpen() const + { + return m_isOpen; + } + inline bool isEOS() const + { + return m_eos; + } + + double getLength() const; + + VIDEO_CODEC getVideoCodec() const; + int getWidth() const; + int getHeight() const; + + AUDIO_CODEC getAudioCodec() const; + const unsigned char *getAudioExtradata(size_t &size) const; // Needed for Vorbis + double getSampleRate() const; + int getChannels() const; + int getAudioDepth() const; + + bool readFrame(WebMFrame *videoFrame, WebMFrame *audioFrame); + +private: + inline bool notSupportedTrackNumber(long videoTrackNumber, long audioTrackNumber) const; + + mkvparser::IMkvReader *m_reader; + mkvparser::Segment *m_segment; + + const mkvparser::Cluster *m_cluster; + const mkvparser::Block *m_block; + const mkvparser::BlockEntry *m_blockEntry; + + int m_blockFrameIndex; + + const mkvparser::VideoTrack *m_videoTrack; + VIDEO_CODEC m_vCodec; + + const mkvparser::AudioTrack *m_audioTrack; + AUDIO_CODEC m_aCodec; + + bool m_isOpen; + bool m_eos; +}; + +#endif // WEBMDEMUXER_HPP diff --git a/thirdparty/libsimplewebm/libwebm/AUTHORS.TXT b/thirdparty/libsimplewebm/libwebm/AUTHORS.TXT new file mode 100644 index 00000000000..9686ac13eb0 --- /dev/null +++ b/thirdparty/libsimplewebm/libwebm/AUTHORS.TXT @@ -0,0 +1,4 @@ +# Names should be added to this file like so: +# Name or Organization + +Google Inc. diff --git a/thirdparty/libsimplewebm/libwebm/LICENSE.TXT b/thirdparty/libsimplewebm/libwebm/LICENSE.TXT new file mode 100644 index 00000000000..7a6f99547d4 --- /dev/null +++ b/thirdparty/libsimplewebm/libwebm/LICENSE.TXT @@ -0,0 +1,30 @@ +Copyright (c) 2010, Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of Google nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/thirdparty/libsimplewebm/libwebm/PATENTS.TXT b/thirdparty/libsimplewebm/libwebm/PATENTS.TXT new file mode 100644 index 00000000000..caedf607e95 --- /dev/null +++ b/thirdparty/libsimplewebm/libwebm/PATENTS.TXT @@ -0,0 +1,23 @@ +Additional IP Rights Grant (Patents) +------------------------------------ + +"These implementations" means the copyrightable works that implement the WebM +codecs distributed by Google as part of the WebM Project. + +Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge, +royalty-free, irrevocable (except as stated in this section) patent license to +make, have made, use, offer to sell, sell, import, transfer, and otherwise +run, modify and propagate the contents of these implementations of WebM, where +such license applies only to those patent claims, both currently owned by +Google and acquired in the future, licensable by Google that are necessarily +infringed by these implementations of WebM. This grant does not include claims +that would be infringed only as a consequence of further modification of these +implementations. If you or your agent or exclusive licensee institute or order +or agree to the institution of patent litigation or any other patent +enforcement activity against any entity (including a cross-claim or +counterclaim in a lawsuit) alleging that any of these implementations of WebM +or any code incorporated within any of these implementations of WebM +constitute direct or contributory patent infringement, or inducement of +patent infringement, then any patent rights granted to you under this License +for these implementations of WebM shall terminate as of the date such +litigation is filed. diff --git a/thirdparty/libsimplewebm/libwebm/README.libvpx b/thirdparty/libsimplewebm/libwebm/README.libvpx new file mode 100644 index 00000000000..ae62f365253 --- /dev/null +++ b/thirdparty/libsimplewebm/libwebm/README.libvpx @@ -0,0 +1,11 @@ +URL: https://chromium.googlesource.com/webm/libwebm +Version: 32d5ac49414a8914ec1e1f285f3f927c6e8ec29d +License: BSD +License File: LICENSE.txt + +Description: +libwebm is used to handle WebM container I/O. + +Local Changes: +* Removed: "mkvmuxer", "hdr_util", "file_util", "mkv_reader". +* Make "~IMkvRerader()" public. diff --git a/thirdparty/libsimplewebm/libwebm/common/webmids.h b/thirdparty/libsimplewebm/libwebm/common/webmids.h new file mode 100644 index 00000000000..32a0c5fb911 --- /dev/null +++ b/thirdparty/libsimplewebm/libwebm/common/webmids.h @@ -0,0 +1,184 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#ifndef COMMON_WEBMIDS_H_ +#define COMMON_WEBMIDS_H_ + +namespace libwebm { + +enum MkvId { + kMkvEBML = 0x1A45DFA3, + kMkvEBMLVersion = 0x4286, + kMkvEBMLReadVersion = 0x42F7, + kMkvEBMLMaxIDLength = 0x42F2, + kMkvEBMLMaxSizeLength = 0x42F3, + kMkvDocType = 0x4282, + kMkvDocTypeVersion = 0x4287, + kMkvDocTypeReadVersion = 0x4285, + kMkvVoid = 0xEC, + kMkvSignatureSlot = 0x1B538667, + kMkvSignatureAlgo = 0x7E8A, + kMkvSignatureHash = 0x7E9A, + kMkvSignaturePublicKey = 0x7EA5, + kMkvSignature = 0x7EB5, + kMkvSignatureElements = 0x7E5B, + kMkvSignatureElementList = 0x7E7B, + kMkvSignedElement = 0x6532, + // segment + kMkvSegment = 0x18538067, + // Meta Seek Information + kMkvSeekHead = 0x114D9B74, + kMkvSeek = 0x4DBB, + kMkvSeekID = 0x53AB, + kMkvSeekPosition = 0x53AC, + // Segment Information + kMkvInfo = 0x1549A966, + kMkvTimecodeScale = 0x2AD7B1, + kMkvDuration = 0x4489, + kMkvDateUTC = 0x4461, + kMkvTitle = 0x7BA9, + kMkvMuxingApp = 0x4D80, + kMkvWritingApp = 0x5741, + // Cluster + kMkvCluster = 0x1F43B675, + kMkvTimecode = 0xE7, + kMkvPrevSize = 0xAB, + kMkvBlockGroup = 0xA0, + kMkvBlock = 0xA1, + kMkvBlockDuration = 0x9B, + kMkvReferenceBlock = 0xFB, + kMkvLaceNumber = 0xCC, + kMkvSimpleBlock = 0xA3, + kMkvBlockAdditions = 0x75A1, + kMkvBlockMore = 0xA6, + kMkvBlockAddID = 0xEE, + kMkvBlockAdditional = 0xA5, + kMkvDiscardPadding = 0x75A2, + // Track + kMkvTracks = 0x1654AE6B, + kMkvTrackEntry = 0xAE, + kMkvTrackNumber = 0xD7, + kMkvTrackUID = 0x73C5, + kMkvTrackType = 0x83, + kMkvFlagEnabled = 0xB9, + kMkvFlagDefault = 0x88, + kMkvFlagForced = 0x55AA, + kMkvFlagLacing = 0x9C, + kMkvDefaultDuration = 0x23E383, + kMkvMaxBlockAdditionID = 0x55EE, + kMkvName = 0x536E, + kMkvLanguage = 0x22B59C, + kMkvCodecID = 0x86, + kMkvCodecPrivate = 0x63A2, + kMkvCodecName = 0x258688, + kMkvCodecDelay = 0x56AA, + kMkvSeekPreRoll = 0x56BB, + // video + kMkvVideo = 0xE0, + kMkvFlagInterlaced = 0x9A, + kMkvStereoMode = 0x53B8, + kMkvAlphaMode = 0x53C0, + kMkvPixelWidth = 0xB0, + kMkvPixelHeight = 0xBA, + kMkvPixelCropBottom = 0x54AA, + kMkvPixelCropTop = 0x54BB, + kMkvPixelCropLeft = 0x54CC, + kMkvPixelCropRight = 0x54DD, + kMkvDisplayWidth = 0x54B0, + kMkvDisplayHeight = 0x54BA, + kMkvDisplayUnit = 0x54B2, + kMkvAspectRatioType = 0x54B3, + kMkvFrameRate = 0x2383E3, + // end video + // colour + kMkvColour = 0x55B0, + kMkvMatrixCoefficients = 0x55B1, + kMkvBitsPerChannel = 0x55B2, + kMkvChromaSubsamplingHorz = 0x55B3, + kMkvChromaSubsamplingVert = 0x55B4, + kMkvCbSubsamplingHorz = 0x55B5, + kMkvCbSubsamplingVert = 0x55B6, + kMkvChromaSitingHorz = 0x55B7, + kMkvChromaSitingVert = 0x55B8, + kMkvRange = 0x55B9, + kMkvTransferCharacteristics = 0x55BA, + kMkvPrimaries = 0x55BB, + kMkvMaxCLL = 0x55BC, + kMkvMaxFALL = 0x55BD, + // mastering metadata + kMkvMasteringMetadata = 0x55D0, + kMkvPrimaryRChromaticityX = 0x55D1, + kMkvPrimaryRChromaticityY = 0x55D2, + kMkvPrimaryGChromaticityX = 0x55D3, + kMkvPrimaryGChromaticityY = 0x55D4, + kMkvPrimaryBChromaticityX = 0x55D5, + kMkvPrimaryBChromaticityY = 0x55D6, + kMkvWhitePointChromaticityX = 0x55D7, + kMkvWhitePointChromaticityY = 0x55D8, + kMkvLuminanceMax = 0x55D9, + kMkvLuminanceMin = 0x55DA, + // end mastering metadata + // end colour + // audio + kMkvAudio = 0xE1, + kMkvSamplingFrequency = 0xB5, + kMkvOutputSamplingFrequency = 0x78B5, + kMkvChannels = 0x9F, + kMkvBitDepth = 0x6264, + // end audio + // ContentEncodings + kMkvContentEncodings = 0x6D80, + kMkvContentEncoding = 0x6240, + kMkvContentEncodingOrder = 0x5031, + kMkvContentEncodingScope = 0x5032, + kMkvContentEncodingType = 0x5033, + kMkvContentCompression = 0x5034, + kMkvContentCompAlgo = 0x4254, + kMkvContentCompSettings = 0x4255, + kMkvContentEncryption = 0x5035, + kMkvContentEncAlgo = 0x47E1, + kMkvContentEncKeyID = 0x47E2, + kMkvContentSignature = 0x47E3, + kMkvContentSigKeyID = 0x47E4, + kMkvContentSigAlgo = 0x47E5, + kMkvContentSigHashAlgo = 0x47E6, + kMkvContentEncAESSettings = 0x47E7, + kMkvAESSettingsCipherMode = 0x47E8, + kMkvAESSettingsCipherInitData = 0x47E9, + // end ContentEncodings + // Cueing Data + kMkvCues = 0x1C53BB6B, + kMkvCuePoint = 0xBB, + kMkvCueTime = 0xB3, + kMkvCueTrackPositions = 0xB7, + kMkvCueTrack = 0xF7, + kMkvCueClusterPosition = 0xF1, + kMkvCueBlockNumber = 0x5378, + // Chapters + kMkvChapters = 0x1043A770, + kMkvEditionEntry = 0x45B9, + kMkvChapterAtom = 0xB6, + kMkvChapterUID = 0x73C4, + kMkvChapterStringUID = 0x5654, + kMkvChapterTimeStart = 0x91, + kMkvChapterTimeEnd = 0x92, + kMkvChapterDisplay = 0x80, + kMkvChapString = 0x85, + kMkvChapLanguage = 0x437C, + kMkvChapCountry = 0x437E, + // Tags + kMkvTags = 0x1254C367, + kMkvTag = 0x7373, + kMkvSimpleTag = 0x67C8, + kMkvTagName = 0x45A3, + kMkvTagString = 0x4487 +}; + +} // namespace libwebm + +#endif // COMMON_WEBMIDS_H_ diff --git a/thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h b/thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h new file mode 100644 index 00000000000..e5db121605f --- /dev/null +++ b/thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h @@ -0,0 +1,28 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#ifndef MKVMUXER_MKVMUXERTYPES_H_ +#define MKVMUXER_MKVMUXERTYPES_H_ + +namespace mkvmuxer { +typedef unsigned char uint8; +typedef short int16; +typedef int int32; +typedef unsigned int uint32; +typedef long long int64; +typedef unsigned long long uint64; +} // namespace mkvmuxer + +// Copied from Chromium basictypes.h +// A macro to disallow the copy constructor and operator= functions +// This should be used in the private: declarations for a class +#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + +#endif // MKVMUXER_MKVMUXERTYPES_HPP_ diff --git a/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc b/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc new file mode 100644 index 00000000000..bda67a57582 --- /dev/null +++ b/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc @@ -0,0 +1,7831 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +#include "mkvparser/mkvparser.h" + +#if defined(_MSC_VER) && _MSC_VER < 1800 +#include // _isnan() / _finite() +#define MSC_COMPAT +#endif + +#include +#include +#include +#include +#include + +#include "common/webmids.h" + +namespace mkvparser { +const float MasteringMetadata::kValueNotPresent = FLT_MAX; +const long long Colour::kValueNotPresent = LLONG_MAX; + +#ifdef MSC_COMPAT +inline bool isnan(double val) { return !!_isnan(val); } +inline bool isinf(double val) { return !_finite(val); } +#endif // MSC_COMPAT + +template +class my_auto_ptr { + my_auto_ptr(const my_auto_ptr &); + T *operator =(const my_auto_ptr &); + + T *m_ptr; +public: + my_auto_ptr(T *ptr) : + m_ptr(ptr) + {} + my_auto_ptr() : + m_ptr(NULL) + {} + ~my_auto_ptr() { + delete m_ptr; + } + + T *release() { + T *ptr = m_ptr; + m_ptr = NULL; + return ptr; + } + + T *operator ->() const { + return m_ptr; + } +}; + +IMkvReader::~IMkvReader() {} + +template +Type* SafeArrayAlloc(unsigned long long num_elements, + unsigned long long element_size) { + if (num_elements == 0 || element_size == 0) + return NULL; + + const size_t kMaxAllocSize = 0x80000000; // 2GiB + const unsigned long long num_bytes = num_elements * element_size; + if (element_size > (kMaxAllocSize / num_elements)) + return NULL; + if (num_bytes != static_cast(num_bytes)) + return NULL; + + return new Type[static_cast(num_bytes)]; +} + +void GetVersion(int& major, int& minor, int& build, int& revision) { + major = 1; + minor = 0; + build = 0; + revision = 30; +} + +long long ReadUInt(IMkvReader* pReader, long long pos, long& len) { + if (!pReader || pos < 0) + return E_FILE_FORMAT_INVALID; + + len = 1; + unsigned char b; + int status = pReader->Read(pos, 1, &b); + + if (status < 0) // error or underflow + return status; + + if (status > 0) // interpreted as "underflow" + return E_BUFFER_NOT_FULL; + + if (b == 0) // we can't handle u-int values larger than 8 bytes + return E_FILE_FORMAT_INVALID; + + unsigned char m = 0x80; + + while (!(b & m)) { + m >>= 1; + ++len; + } + + long long result = b & (~m); + ++pos; + + for (int i = 1; i < len; ++i) { + status = pReader->Read(pos, 1, &b); + + if (status < 0) { + len = 1; + return status; + } + + if (status > 0) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result <<= 8; + result |= b; + + ++pos; + } + + return result; +} + +// Reads an EBML ID and returns it. +// An ID must at least 1 byte long, cannot exceed 4, and its value must be +// greater than 0. +// See known EBML values and EBMLMaxIDLength: +// http://www.matroska.org/technical/specs/index.html +// Returns the ID, or a value less than 0 to report an error while reading the +// ID. +long long ReadID(IMkvReader* pReader, long long pos, long& len) { + if (pReader == NULL || pos < 0) + return E_FILE_FORMAT_INVALID; + + // Read the first byte. The length in bytes of the ID is determined by + // finding the first set bit in the first byte of the ID. + unsigned char temp_byte = 0; + int read_status = pReader->Read(pos, 1, &temp_byte); + + if (read_status < 0) + return E_FILE_FORMAT_INVALID; + else if (read_status > 0) // No data to read. + return E_BUFFER_NOT_FULL; + + if (temp_byte == 0) // ID length > 8 bytes; invalid file. + return E_FILE_FORMAT_INVALID; + + int bit_pos = 0; + const int kMaxIdLengthInBytes = 4; + const int kCheckByte = 0x80; + + // Find the first bit that's set. + bool found_bit = false; + for (; bit_pos < kMaxIdLengthInBytes; ++bit_pos) { + if ((kCheckByte >> bit_pos) & temp_byte) { + found_bit = true; + break; + } + } + + if (!found_bit) { + // The value is too large to be a valid ID. + return E_FILE_FORMAT_INVALID; + } + + // Read the remaining bytes of the ID (if any). + const int id_length = bit_pos + 1; + long long ebml_id = temp_byte; + for (int i = 1; i < id_length; ++i) { + ebml_id <<= 8; + read_status = pReader->Read(pos + i, 1, &temp_byte); + + if (read_status < 0) + return E_FILE_FORMAT_INVALID; + else if (read_status > 0) + return E_BUFFER_NOT_FULL; + + ebml_id |= temp_byte; + } + + len = id_length; + return ebml_id; +} + +long long GetUIntLength(IMkvReader* pReader, long long pos, long& len) { + if (!pReader || pos < 0) + return E_FILE_FORMAT_INVALID; + + long long total, available; + + int status = pReader->Length(&total, &available); + if (status < 0 || (total >= 0 && available > total)) + return E_FILE_FORMAT_INVALID; + + len = 1; + + if (pos >= available) + return pos; // too few bytes available + + unsigned char b; + + status = pReader->Read(pos, 1, &b); + + if (status != 0) + return status; + + if (b == 0) // we can't handle u-int values larger than 8 bytes + return E_FILE_FORMAT_INVALID; + + unsigned char m = 0x80; + + while (!(b & m)) { + m >>= 1; + ++len; + } + + return 0; // success +} + +// TODO(vigneshv): This function assumes that unsigned values never have their +// high bit set. +long long UnserializeUInt(IMkvReader* pReader, long long pos, long long size) { + if (!pReader || pos < 0 || (size <= 0) || (size > 8)) + return E_FILE_FORMAT_INVALID; + + long long result = 0; + + for (long long i = 0; i < size; ++i) { + unsigned char b; + + const long status = pReader->Read(pos, 1, &b); + + if (status < 0) + return status; + + result <<= 8; + result |= b; + + ++pos; + } + + return result; +} + +long UnserializeFloat(IMkvReader* pReader, long long pos, long long size_, + double& result) { + if (!pReader || pos < 0 || ((size_ != 4) && (size_ != 8))) + return E_FILE_FORMAT_INVALID; + + const long size = static_cast(size_); + + unsigned char buf[8]; + + const int status = pReader->Read(pos, size, buf); + + if (status < 0) // error + return status; + + if (size == 4) { + union { + float f; + unsigned long ff; + }; + + ff = 0; + + for (int i = 0;;) { + ff |= buf[i]; + + if (++i >= 4) + break; + + ff <<= 8; + } + + result = f; + } else { + union { + double d; + unsigned long long dd; + }; + + dd = 0; + + for (int i = 0;;) { + dd |= buf[i]; + + if (++i >= 8) + break; + + dd <<= 8; + } + + result = d; + } + + if (isinf(result) || isnan(result)) + return E_FILE_FORMAT_INVALID; + + return 0; +} + +long UnserializeInt(IMkvReader* pReader, long long pos, long long size, + long long& result_ref) { + if (!pReader || pos < 0 || size < 1 || size > 8) + return E_FILE_FORMAT_INVALID; + + signed char first_byte = 0; + const long status = pReader->Read(pos, 1, (unsigned char*)&first_byte); + + if (status < 0) + return status; + + unsigned long long result = first_byte; + ++pos; + + for (long i = 1; i < size; ++i) { + unsigned char b; + + const long status = pReader->Read(pos, 1, &b); + + if (status < 0) + return status; + + result <<= 8; + result |= b; + + ++pos; + } + + result_ref = static_cast(result); + return 0; +} + +long UnserializeString(IMkvReader* pReader, long long pos, long long size, + char*& str) { + delete[] str; + str = NULL; + + if (size >= LONG_MAX || size < 0) + return E_FILE_FORMAT_INVALID; + + // +1 for '\0' terminator + const long required_size = static_cast(size) + 1; + + str = SafeArrayAlloc(1, required_size); + if (str == NULL) + return E_FILE_FORMAT_INVALID; + + unsigned char* const buf = reinterpret_cast(str); + + const long status = pReader->Read(pos, static_cast(size), buf); + + if (status) { + delete[] str; + str = NULL; + + return status; + } + + str[required_size - 1] = '\0'; + return 0; +} + +long ParseElementHeader(IMkvReader* pReader, long long& pos, long long stop, + long long& id, long long& size) { + if (stop >= 0 && pos >= stop) + return E_FILE_FORMAT_INVALID; + + long len; + + id = ReadID(pReader, pos, len); + + if (id < 0) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume id + + if (stop >= 0 && pos >= stop) + return E_FILE_FORMAT_INVALID; + + size = ReadUInt(pReader, pos, len); + + if (size < 0 || len < 1 || len > 8) { + // Invalid: Negative payload size, negative or 0 length integer, or integer + // larger than 64 bits (libwebm cannot handle them). + return E_FILE_FORMAT_INVALID; + } + + // Avoid rolling over pos when very close to LLONG_MAX. + const unsigned long long rollover_check = + static_cast(pos) + len; + if (rollover_check > LLONG_MAX) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume length of size + + // pos now designates payload + + if (stop >= 0 && pos > stop) + return E_FILE_FORMAT_INVALID; + + return 0; // success +} + +bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, + long long& val) { + if (!pReader || pos < 0) + return false; + + long long total = 0; + long long available = 0; + + const long status = pReader->Length(&total, &available); + if (status < 0 || (total >= 0 && available > total)) + return false; + + long len = 0; + + const long long id = ReadID(pReader, pos, len); + if (id < 0 || (available - pos) > len) + return false; + + if (static_cast(id) != expected_id) + return false; + + pos += len; // consume id + + const long long size = ReadUInt(pReader, pos, len); + if (size < 0 || size > 8 || len < 1 || len > 8 || (available - pos) > len) + return false; + + pos += len; // consume length of size of payload + + val = UnserializeUInt(pReader, pos, size); + if (val < 0) + return false; + + pos += size; // consume size of payload + + return true; +} + +bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, + unsigned char*& buf, size_t& buflen) { + if (!pReader || pos < 0) + return false; + + long long total = 0; + long long available = 0; + + long status = pReader->Length(&total, &available); + if (status < 0 || (total >= 0 && available > total)) + return false; + + long len = 0; + const long long id = ReadID(pReader, pos, len); + if (id < 0 || (available - pos) > len) + return false; + + if (static_cast(id) != expected_id) + return false; + + pos += len; // consume id + + const long long size = ReadUInt(pReader, pos, len); + if (size < 0 || len <= 0 || len > 8 || (available - pos) > len) + return false; + + unsigned long long rollover_check = + static_cast(pos) + len; + if (rollover_check > LLONG_MAX) + return false; + + pos += len; // consume length of size of payload + + rollover_check = static_cast(pos) + size; + if (rollover_check > LLONG_MAX) + return false; + + if ((pos + size) > available) + return false; + + if (size >= LONG_MAX) + return false; + + const long buflen_ = static_cast(size); + + buf = SafeArrayAlloc(1, buflen_); + if (!buf) + return false; + + status = pReader->Read(pos, buflen_, buf); + if (status != 0) + return false; + + buflen = buflen_; + + pos += size; // consume size of payload + return true; +} + +EBMLHeader::EBMLHeader() : m_docType(NULL) { Init(); } + +EBMLHeader::~EBMLHeader() { delete[] m_docType; } + +void EBMLHeader::Init() { + m_version = 1; + m_readVersion = 1; + m_maxIdLength = 4; + m_maxSizeLength = 8; + + if (m_docType) { + delete[] m_docType; + m_docType = NULL; + } + + m_docTypeVersion = 1; + m_docTypeReadVersion = 1; +} + +long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { + if (!pReader) + return E_FILE_FORMAT_INVALID; + + long long total, available; + + long status = pReader->Length(&total, &available); + + if (status < 0) // error + return status; + + pos = 0; + + // Scan until we find what looks like the first byte of the EBML header. + const long long kMaxScanBytes = (available >= 1024) ? 1024 : available; + const unsigned char kEbmlByte0 = 0x1A; + unsigned char scan_byte = 0; + + while (pos < kMaxScanBytes) { + status = pReader->Read(pos, 1, &scan_byte); + + if (status < 0) // error + return status; + else if (status > 0) + return E_BUFFER_NOT_FULL; + + if (scan_byte == kEbmlByte0) + break; + + ++pos; + } + + long len = 0; + const long long ebml_id = ReadID(pReader, pos, len); + + if (ebml_id == E_BUFFER_NOT_FULL) + return E_BUFFER_NOT_FULL; + + if (len != 4 || ebml_id != libwebm::kMkvEBML) + return E_FILE_FORMAT_INVALID; + + // Move read pos forward to the EBML header size field. + pos += 4; + + // Read length of size field. + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return E_FILE_FORMAT_INVALID; + else if (result > 0) // need more data + return E_BUFFER_NOT_FULL; + + if (len < 1 || len > 8) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && ((total - pos) < len)) + return E_FILE_FORMAT_INVALID; + + if ((available - pos) < len) + return pos + len; // try again later + + // Read the EBML header size. + result = ReadUInt(pReader, pos, len); + + if (result < 0) // error + return result; + + pos += len; // consume size field + + // pos now designates start of payload + + if ((total >= 0) && ((total - pos) < result)) + return E_FILE_FORMAT_INVALID; + + if ((available - pos) < result) + return pos + result; + + const long long end = pos + result; + + Init(); + + while (pos < end) { + long long id, size; + + status = ParseElementHeader(pReader, pos, end, id, size); + + if (status < 0) // error + return status; + + if (size == 0) + return E_FILE_FORMAT_INVALID; + + if (id == libwebm::kMkvEBMLVersion) { + m_version = UnserializeUInt(pReader, pos, size); + + if (m_version <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvEBMLReadVersion) { + m_readVersion = UnserializeUInt(pReader, pos, size); + + if (m_readVersion <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvEBMLMaxIDLength) { + m_maxIdLength = UnserializeUInt(pReader, pos, size); + + if (m_maxIdLength <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvEBMLMaxSizeLength) { + m_maxSizeLength = UnserializeUInt(pReader, pos, size); + + if (m_maxSizeLength <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvDocType) { + if (m_docType) + return E_FILE_FORMAT_INVALID; + + status = UnserializeString(pReader, pos, size, m_docType); + + if (status) // error + return status; + } else if (id == libwebm::kMkvDocTypeVersion) { + m_docTypeVersion = UnserializeUInt(pReader, pos, size); + + if (m_docTypeVersion <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvDocTypeReadVersion) { + m_docTypeReadVersion = UnserializeUInt(pReader, pos, size); + + if (m_docTypeReadVersion <= 0) + return E_FILE_FORMAT_INVALID; + } + + pos += size; + } + + if (pos != end) + return E_FILE_FORMAT_INVALID; + + // Make sure DocType, DocTypeReadVersion, and DocTypeVersion are valid. + if (m_docType == NULL || m_docTypeReadVersion <= 0 || m_docTypeVersion <= 0) + return E_FILE_FORMAT_INVALID; + + // Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid. + if (m_maxIdLength <= 0 || m_maxIdLength > 4 || m_maxSizeLength <= 0 || + m_maxSizeLength > 8) + return E_FILE_FORMAT_INVALID; + + return 0; +} + +Segment::Segment(IMkvReader* pReader, long long elem_start, + // long long elem_size, + long long start, long long size) + : m_pReader(pReader), + m_element_start(elem_start), + // m_element_size(elem_size), + m_start(start), + m_size(size), + m_pos(start), + m_pUnknownSize(0), + m_pSeekHead(NULL), + m_pInfo(NULL), + m_pTracks(NULL), + m_pCues(NULL), + m_pChapters(NULL), + m_pTags(NULL), + m_clusters(NULL), + m_clusterCount(0), + m_clusterPreloadCount(0), + m_clusterSize(0) {} + +Segment::~Segment() { + const long count = m_clusterCount + m_clusterPreloadCount; + + Cluster** i = m_clusters; + Cluster** j = m_clusters + count; + + while (i != j) { + Cluster* const p = *i++; + delete p; + } + + delete[] m_clusters; + + delete m_pTracks; + delete m_pInfo; + delete m_pCues; + delete m_pChapters; + delete m_pTags; + delete m_pSeekHead; +} + +long long Segment::CreateInstance(IMkvReader* pReader, long long pos, + Segment*& pSegment) { + if (pReader == NULL || pos < 0) + return E_PARSE_FAILED; + + pSegment = NULL; + + long long total, available; + + const long status = pReader->Length(&total, &available); + + if (status < 0) // error + return status; + + if (available < 0) + return -1; + + if ((total >= 0) && (available > total)) + return -1; + + // I would assume that in practice this loop would execute + // exactly once, but we allow for other elements (e.g. Void) + // to immediately follow the EBML header. This is fine for + // the source filter case (since the entire file is available), + // but in the splitter case over a network we should probably + // just give up early. We could for example decide only to + // execute this loop a maximum of, say, 10 times. + // TODO: + // There is an implied "give up early" by only parsing up + // to the available limit. We do do that, but only if the + // total file size is unknown. We could decide to always + // use what's available as our limit (irrespective of whether + // we happen to know the total file length). This would have + // as its sense "parse this much of the file before giving up", + // which a slightly different sense from "try to parse up to + // 10 EMBL elements before giving up". + + for (;;) { + if ((total >= 0) && (pos >= total)) + return E_FILE_FORMAT_INVALID; + + // Read ID + long len; + long long result = GetUIntLength(pReader, pos, len); + + if (result) // error, or too few available bytes + return result; + + if ((total >= 0) && ((pos + len) > total)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > available) + return pos + len; + + const long long idpos = pos; + const long long id = ReadID(pReader, pos, len); + + if (id < 0) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume ID + + // Read Size + + result = GetUIntLength(pReader, pos, len); + + if (result) // error, or too few available bytes + return result; + + if ((total >= 0) && ((pos + len) > total)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > available) + return pos + len; + + long long size = ReadUInt(pReader, pos, len); + + if (size < 0) // error + return size; + + pos += len; // consume length of size of element + + // Pos now points to start of payload + + // Handle "unknown size" for live streaming of webm files. + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (id == libwebm::kMkvSegment) { + if (size == unknown_size) + size = -1; + + else if (total < 0) + size = -1; + + else if ((pos + size) > total) + size = -1; + + pSegment = new Segment(pReader, idpos, pos, size); + + return 0; // success + } + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && ((pos + size) > total)) + return E_FILE_FORMAT_INVALID; + + if ((pos + size) > available) + return pos + size; + + pos += size; // consume payload + } +} + +long long Segment::ParseHeaders() { + // Outermost (level 0) segment object has been constructed, + // and pos designates start of payload. We need to find the + // inner (level 1) elements. + long long total, available; + + const int status = m_pReader->Length(&total, &available); + + if (status < 0) // error + return status; + + if (total > 0 && available > total) + return E_FILE_FORMAT_INVALID; + + const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; + + if ((segment_stop >= 0 && total >= 0 && segment_stop > total) || + (segment_stop >= 0 && m_pos > segment_stop)) { + return E_FILE_FORMAT_INVALID; + } + + for (;;) { + if ((total >= 0) && (m_pos >= total)) + break; + + if ((segment_stop >= 0) && (m_pos >= segment_stop)) + break; + + long long pos = m_pos; + const long long element_start = pos; + + // Avoid rolling over pos when very close to LLONG_MAX. + unsigned long long rollover_check = pos + 1ULL; + if (rollover_check > LLONG_MAX) + return E_FILE_FORMAT_INVALID; + + if ((pos + 1) > available) + return (pos + 1); + + long len; + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return result; + + if (result > 0) { + // MkvReader doesn't have enough data to satisfy this read attempt. + return (pos + 1); + } + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > available) + return pos + len; + + const long long idpos = pos; + const long long id = ReadID(m_pReader, idpos, len); + + if (id < 0) + return E_FILE_FORMAT_INVALID; + + if (id == libwebm::kMkvCluster) + break; + + pos += len; // consume ID + + if ((pos + 1) > available) + return (pos + 1); + + // Read Size + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return result; + + if (result > 0) { + // MkvReader doesn't have enough data to satisfy this read attempt. + return (pos + 1); + } + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > available) + return pos + len; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0 || len < 1 || len > 8) { + // TODO(tomfinegan): ReadUInt should return an error when len is < 1 or + // len > 8 is true instead of checking this _everywhere_. + return size; + } + + pos += len; // consume length of size of element + + // Avoid rolling over pos when very close to LLONG_MAX. + rollover_check = static_cast(pos) + size; + if (rollover_check > LLONG_MAX) + return E_FILE_FORMAT_INVALID; + + const long long element_size = size + pos - element_start; + + // Pos now points to start of payload + + if ((segment_stop >= 0) && ((pos + size) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + // We read EBML elements either in total or nothing at all. + + if ((pos + size) > available) + return pos + size; + + if (id == libwebm::kMkvInfo) { + if (m_pInfo) + return E_FILE_FORMAT_INVALID; + + m_pInfo = new SegmentInfo(this, pos, size, element_start, element_size); + + const long status = m_pInfo->Parse(); + + if (status) + return status; + } else if (id == libwebm::kMkvTracks) { + if (m_pTracks) + return E_FILE_FORMAT_INVALID; + + m_pTracks = new Tracks(this, pos, size, element_start, element_size); + + const long status = m_pTracks->Parse(); + + if (status) + return status; + } else if (id == libwebm::kMkvCues) { + if (m_pCues == NULL) { + m_pCues = new Cues(this, pos, size, element_start, element_size); + } + } else if (id == libwebm::kMkvSeekHead) { + if (m_pSeekHead == NULL) { + m_pSeekHead = new SeekHead(this, pos, size, element_start, element_size); + + const long status = m_pSeekHead->Parse(); + + if (status) + return status; + } + } else if (id == libwebm::kMkvChapters) { + if (m_pChapters == NULL) { + m_pChapters = new Chapters(this, pos, size, element_start, element_size); + + const long status = m_pChapters->Parse(); + + if (status) + return status; + } + } else if (id == libwebm::kMkvTags) { + if (m_pTags == NULL) { + m_pTags = new Tags(this, pos, size, element_start, element_size); + + const long status = m_pTags->Parse(); + + if (status) + return status; + } + } + + m_pos = pos + size; // consume payload + } + + if (segment_stop >= 0 && m_pos > segment_stop) + return E_FILE_FORMAT_INVALID; + + if (m_pInfo == NULL) // TODO: liberalize this behavior + return E_FILE_FORMAT_INVALID; + + if (m_pTracks == NULL) + return E_FILE_FORMAT_INVALID; + + return 0; // success +} + +long Segment::LoadCluster(long long& pos, long& len) { + for (;;) { + const long result = DoLoadCluster(pos, len); + + if (result <= 1) + return result; + } +} + +long Segment::DoLoadCluster(long long& pos, long& len) { + if (m_pos < 0) + return DoLoadClusterUnknownSize(pos, len); + + long long total, avail; + + long status = m_pReader->Length(&total, &avail); + + if (status < 0) // error + return status; + + if (total >= 0 && avail > total) + return E_FILE_FORMAT_INVALID; + + const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; + + long long cluster_off = -1; // offset relative to start of segment + long long cluster_size = -1; // size of cluster payload + + for (;;) { + if ((total >= 0) && (m_pos >= total)) + return 1; // no more clusters + + if ((segment_stop >= 0) && (m_pos >= segment_stop)) + return 1; // no more clusters + + pos = m_pos; + + // Read ID + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long idpos = pos; + const long long id = ReadID(m_pReader, idpos, len); + + if (id < 0) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume ID + + // Read Size + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) // error + return static_cast(size); + + pos += len; // consume length of size of element + + // pos now points to start of payload + + if (size == 0) { + // Missing element payload: move on. + m_pos = pos; + continue; + } + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if ((segment_stop >= 0) && (size != unknown_size) && + ((pos + size) > segment_stop)) { + return E_FILE_FORMAT_INVALID; + } + + if (id == libwebm::kMkvCues) { + if (size == unknown_size) { + // Cues element of unknown size: Not supported. + return E_FILE_FORMAT_INVALID; + } + + if (m_pCues == NULL) { + const long long element_size = (pos - idpos) + size; + + m_pCues = new Cues(this, pos, size, idpos, element_size); + } + + m_pos = pos + size; // consume payload + continue; + } + + if (id != libwebm::kMkvCluster) { + // Besides the Segment, Libwebm allows only cluster elements of unknown + // size. Fail the parse upon encountering a non-cluster element reporting + // unknown size. + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + m_pos = pos + size; // consume payload + continue; + } + + // We have a cluster. + + cluster_off = idpos - m_start; // relative pos + + if (size != unknown_size) + cluster_size = size; + + break; + } + + if (cluster_off < 0) { + // No cluster, die. + return E_FILE_FORMAT_INVALID; + } + + long long pos_; + long len_; + + status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_); + + if (status < 0) { // error, or underflow + pos = pos_; + len = len_; + + return status; + } + + // status == 0 means "no block entries found" + // status > 0 means "found at least one block entry" + + // TODO: + // The issue here is that the segment increments its own + // pos ptr past the most recent cluster parsed, and then + // starts from there to parse the next cluster. If we + // don't know the size of the current cluster, then we + // must either parse its payload (as we do below), looking + // for the cluster (or cues) ID to terminate the parse. + // This isn't really what we want: rather, we really need + // a way to create the curr cluster object immediately. + // The pity is that cluster::parse can determine its own + // boundary, and we largely duplicate that same logic here. + // + // Maybe we need to get rid of our look-ahead preloading + // in source::parse??? + // + // As we're parsing the blocks in the curr cluster + //(in cluster::parse), we should have some way to signal + // to the segment that we have determined the boundary, + // so it can adjust its own segment::m_pos member. + // + // The problem is that we're asserting in asyncreadinit, + // because we adjust the pos down to the curr seek pos, + // and the resulting adjusted len is > 2GB. I'm suspicious + // that this is even correct, but even if it is, we can't + // be loading that much data in the cache anyway. + + const long idx = m_clusterCount; + + if (m_clusterPreloadCount > 0) { + if (idx >= m_clusterSize) + return E_FILE_FORMAT_INVALID; + + Cluster* const pCluster = m_clusters[idx]; + if (pCluster == NULL || pCluster->m_index >= 0) + return E_FILE_FORMAT_INVALID; + + const long long off = pCluster->GetPosition(); + if (off < 0) + return E_FILE_FORMAT_INVALID; + + if (off == cluster_off) { // preloaded already + if (status == 0) // no entries found + return E_FILE_FORMAT_INVALID; + + if (cluster_size >= 0) + pos += cluster_size; + else { + const long long element_size = pCluster->GetElementSize(); + + if (element_size <= 0) + return E_FILE_FORMAT_INVALID; // TODO: handle this case + + pos = pCluster->m_element_start + element_size; + } + + pCluster->m_index = idx; // move from preloaded to loaded + ++m_clusterCount; + --m_clusterPreloadCount; + + m_pos = pos; // consume payload + if (segment_stop >= 0 && m_pos > segment_stop) + return E_FILE_FORMAT_INVALID; + + return 0; // success + } + } + + if (status == 0) { // no entries found + if (cluster_size >= 0) + pos += cluster_size; + + if ((total >= 0) && (pos >= total)) { + m_pos = total; + return 1; // no more clusters + } + + if ((segment_stop >= 0) && (pos >= segment_stop)) { + m_pos = segment_stop; + return 1; // no more clusters + } + + m_pos = pos; + return 2; // try again + } + + // status > 0 means we have an entry + + Cluster* const pCluster = Cluster::Create(this, idx, cluster_off); + if (pCluster == NULL) + return -1; + + if (!AppendCluster(pCluster)) { + delete pCluster; + return -1; + } + + if (cluster_size >= 0) { + pos += cluster_size; + + m_pos = pos; + + if (segment_stop > 0 && m_pos > segment_stop) + return E_FILE_FORMAT_INVALID; + + return 0; + } + + m_pUnknownSize = pCluster; + m_pos = -pos; + + return 0; // partial success, since we have a new cluster + + // status == 0 means "no block entries found" + // pos designates start of payload + // m_pos has NOT been adjusted yet (in case we need to come back here) +} + +long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) { + if (m_pos >= 0 || m_pUnknownSize == NULL) + return E_PARSE_FAILED; + + const long status = m_pUnknownSize->Parse(pos, len); + + if (status < 0) // error or underflow + return status; + + if (status == 0) // parsed a block + return 2; // continue parsing + + const long long start = m_pUnknownSize->m_element_start; + const long long size = m_pUnknownSize->GetElementSize(); + + if (size < 0) + return E_FILE_FORMAT_INVALID; + + pos = start + size; + m_pos = pos; + + m_pUnknownSize = 0; + + return 2; // continue parsing +} + +bool Segment::AppendCluster(Cluster* pCluster) { + if (pCluster == NULL || pCluster->m_index < 0) + return false; + + const long count = m_clusterCount + m_clusterPreloadCount; + + long& size = m_clusterSize; + const long idx = pCluster->m_index; + + if (size < count || idx != m_clusterCount) + return false; + + if (count >= size) { + const long n = (size <= 0) ? 2048 : 2 * size; + + Cluster** const qq = new Cluster*[n]; + + Cluster** q = qq; + Cluster** p = m_clusters; + Cluster** const pp = p + count; + + while (p != pp) + *q++ = *p++; + + delete[] m_clusters; + + m_clusters = qq; + size = n; + } + + if (m_clusterPreloadCount > 0) { + Cluster** const p = m_clusters + m_clusterCount; + if (*p == NULL || (*p)->m_index >= 0) + return false; + + Cluster** q = p + m_clusterPreloadCount; + if (q >= (m_clusters + size)) + return false; + + for (;;) { + Cluster** const qq = q - 1; + if ((*qq)->m_index >= 0) + return false; + + *q = *qq; + q = qq; + + if (q == p) + break; + } + } + + m_clusters[idx] = pCluster; + ++m_clusterCount; + return true; +} + +bool Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) { + if (pCluster == NULL || pCluster->m_index >= 0 || idx < m_clusterCount) + return false; + + const long count = m_clusterCount + m_clusterPreloadCount; + + long& size = m_clusterSize; + if (size < count) + return false; + + if (count >= size) { + const long n = (size <= 0) ? 2048 : 2 * size; + + Cluster** const qq = new Cluster*[n]; + Cluster** q = qq; + + Cluster** p = m_clusters; + Cluster** const pp = p + count; + + while (p != pp) + *q++ = *p++; + + delete[] m_clusters; + + m_clusters = qq; + size = n; + } + + if (m_clusters == NULL) + return false; + + Cluster** const p = m_clusters + idx; + + Cluster** q = m_clusters + count; + if (q < p || q >= (m_clusters + size)) + return false; + + while (q > p) { + Cluster** const qq = q - 1; + + if ((*qq)->m_index >= 0) + return false; + + *q = *qq; + q = qq; + } + + m_clusters[idx] = pCluster; + ++m_clusterPreloadCount; + return true; +} + +long Segment::Load() { + if (m_clusters != NULL || m_clusterSize != 0 || m_clusterCount != 0) + return E_PARSE_FAILED; + + // Outermost (level 0) segment object has been constructed, + // and pos designates start of payload. We need to find the + // inner (level 1) elements. + + const long long header_status = ParseHeaders(); + + if (header_status < 0) // error + return static_cast(header_status); + + if (header_status > 0) // underflow + return E_BUFFER_NOT_FULL; + + if (m_pInfo == NULL || m_pTracks == NULL) + return E_FILE_FORMAT_INVALID; + + for (;;) { + const long status = LoadCluster(); + + if (status < 0) // error + return status; + + if (status >= 1) // no more clusters + return 0; + } +} + +SeekHead::SeekHead(Segment* pSegment, long long start, long long size_, + long long element_start, long long element_size) + : m_pSegment(pSegment), + m_start(start), + m_size(size_), + m_element_start(element_start), + m_element_size(element_size), + m_entries(0), + m_entry_count(0), + m_void_elements(0), + m_void_element_count(0) {} + +SeekHead::~SeekHead() { + delete[] m_entries; + delete[] m_void_elements; +} + +long SeekHead::Parse() { + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long pos = m_start; + const long long stop = m_start + m_size; + + // first count the seek head entries + + int entry_count = 0; + int void_element_count = 0; + + while (pos < stop) { + long long id, size; + + const long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) // error + return status; + + if (id == libwebm::kMkvSeek) + ++entry_count; + else if (id == libwebm::kMkvVoid) + ++void_element_count; + + pos += size; // consume payload + + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + + m_entries = new Entry[entry_count]; + + m_void_elements = new VoidElement[void_element_count]; + + // now parse the entries and void elements + + Entry* pEntry = m_entries; + VoidElement* pVoidElement = m_void_elements; + + pos = m_start; + + while (pos < stop) { + const long long idpos = pos; + + long long id, size; + + const long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) // error + return status; + + if (id == libwebm::kMkvSeek) { + if (ParseEntry(pReader, pos, size, pEntry)) { + Entry& e = *pEntry++; + + e.element_start = idpos; + e.element_size = (pos + size) - idpos; + } + } else if (id == libwebm::kMkvVoid) { + VoidElement& e = *pVoidElement++; + + e.element_start = idpos; + e.element_size = (pos + size) - idpos; + } + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + + ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries); + assert(count_ >= 0); + assert(count_ <= entry_count); + + m_entry_count = static_cast(count_); + + count_ = ptrdiff_t(pVoidElement - m_void_elements); + assert(count_ >= 0); + assert(count_ <= void_element_count); + + m_void_element_count = static_cast(count_); + + return 0; +} + +int SeekHead::GetCount() const { return m_entry_count; } + +const SeekHead::Entry* SeekHead::GetEntry(int idx) const { + if (idx < 0) + return 0; + + if (idx >= m_entry_count) + return 0; + + return m_entries + idx; +} + +int SeekHead::GetVoidElementCount() const { return m_void_element_count; } + +const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const { + if (idx < 0) + return 0; + + if (idx >= m_void_element_count) + return 0; + + return m_void_elements + idx; +} + +long Segment::ParseCues(long long off, long long& pos, long& len) { + if (m_pCues) + return 0; // success + + if (off < 0) + return -1; + + long long total, avail; + + const int status = m_pReader->Length(&total, &avail); + + if (status < 0) // error + return status; + + assert((total < 0) || (avail <= total)); + + pos = m_start + off; + + if ((total < 0) || (pos >= total)) + return 1; // don't bother parsing cues + + const long long element_start = pos; + const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // underflow (weird) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long idpos = pos; + + const long long id = ReadID(m_pReader, idpos, len); + + if (id != libwebm::kMkvCues) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume ID + assert((segment_stop < 0) || (pos <= segment_stop)); + + // Read Size + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // underflow (weird) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) // error + return static_cast(size); + + if (size == 0) // weird, although technically not illegal + return 1; // done + + pos += len; // consume length of size of element + assert((segment_stop < 0) || (pos <= segment_stop)); + + // Pos now points to start of payload + + const long long element_stop = pos + size; + + if ((segment_stop >= 0) && (element_stop > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && (element_stop > total)) + return 1; // don't bother parsing anymore + + len = static_cast(size); + + if (element_stop > avail) + return E_BUFFER_NOT_FULL; + + const long long element_size = element_stop - element_start; + + m_pCues = new Cues(this, pos, size, element_start, element_size); + + return 0; // success +} + +bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_, + Entry* pEntry) { + if (size_ <= 0) + return false; + + long long pos = start; + const long long stop = start + size_; + + long len; + + // parse the container for the level-1 element ID + + const long long seekIdId = ReadID(pReader, pos, len); + if (seekIdId < 0) + return false; + + if (seekIdId != libwebm::kMkvSeekID) + return false; + + if ((pos + len) > stop) + return false; + + pos += len; // consume SeekID id + + const long long seekIdSize = ReadUInt(pReader, pos, len); + + if (seekIdSize <= 0) + return false; + + if ((pos + len) > stop) + return false; + + pos += len; // consume size of field + + if ((pos + seekIdSize) > stop) + return false; + + // Note that the SeekId payload really is serialized + // as a "Matroska integer", not as a plain binary value. + // In fact, Matroska requires that ID values in the + // stream exactly match the binary representation as listed + // in the Matroska specification. + // + // This parser is more liberal, and permits IDs to have + // any width. (This could make the representation in the stream + // different from what's in the spec, but it doesn't matter here, + // since we always normalize "Matroska integer" values.) + + pEntry->id = ReadUInt(pReader, pos, len); // payload + + if (pEntry->id <= 0) + return false; + + if (len != seekIdSize) + return false; + + pos += seekIdSize; // consume SeekID payload + + const long long seekPosId = ReadID(pReader, pos, len); + + if (seekPosId != libwebm::kMkvSeekPosition) + return false; + + if ((pos + len) > stop) + return false; + + pos += len; // consume id + + const long long seekPosSize = ReadUInt(pReader, pos, len); + + if (seekPosSize <= 0) + return false; + + if ((pos + len) > stop) + return false; + + pos += len; // consume size + + if ((pos + seekPosSize) > stop) + return false; + + pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize); + + if (pEntry->pos < 0) + return false; + + pos += seekPosSize; // consume payload + + if (pos != stop) + return false; + + return true; +} + +Cues::Cues(Segment* pSegment, long long start_, long long size_, + long long element_start, long long element_size) + : m_pSegment(pSegment), + m_start(start_), + m_size(size_), + m_element_start(element_start), + m_element_size(element_size), + m_cue_points(NULL), + m_count(0), + m_preload_count(0), + m_pos(start_) {} + +Cues::~Cues() { + const long n = m_count + m_preload_count; + + CuePoint** p = m_cue_points; + CuePoint** const q = p + n; + + while (p != q) { + CuePoint* const pCP = *p++; + assert(pCP); + + delete pCP; + } + + delete[] m_cue_points; +} + +long Cues::GetCount() const { + if (m_cue_points == NULL) + return -1; + + return m_count; // TODO: really ignore preload count? +} + +bool Cues::DoneParsing() const { + const long long stop = m_start + m_size; + return (m_pos >= stop); +} + +bool Cues::Init() const { + if (m_cue_points) + return true; + + if (m_count != 0 || m_preload_count != 0) + return false; + + IMkvReader* const pReader = m_pSegment->m_pReader; + + const long long stop = m_start + m_size; + long long pos = m_start; + + long cue_points_size = 0; + + while (pos < stop) { + const long long idpos = pos; + + long len; + + const long long id = ReadID(pReader, pos, len); + if (id < 0 || (pos + len) > stop) { + return false; + } + + pos += len; // consume ID + + const long long size = ReadUInt(pReader, pos, len); + if (size < 0 || (pos + len > stop)) { + return false; + } + + pos += len; // consume Size field + if (pos + size > stop) { + return false; + } + + if (id == libwebm::kMkvCuePoint) { + if (!PreloadCuePoint(cue_points_size, idpos)) + return false; + } + + pos += size; // skip payload + } + return true; +} + +bool Cues::PreloadCuePoint(long& cue_points_size, long long pos) const { + if (m_count != 0) + return false; + + if (m_preload_count >= cue_points_size) { + const long n = (cue_points_size <= 0) ? 2048 : 2 * cue_points_size; + + CuePoint** const qq = new CuePoint*[n]; + + CuePoint** q = qq; // beginning of target + + CuePoint** p = m_cue_points; // beginning of source + CuePoint** const pp = p + m_preload_count; // end of source + + while (p != pp) + *q++ = *p++; + + delete[] m_cue_points; + + m_cue_points = qq; + cue_points_size = n; + } + + CuePoint* const pCP = new CuePoint(m_preload_count, pos); + + m_cue_points[m_preload_count++] = pCP; + return true; +} + +bool Cues::LoadCuePoint() const { + const long long stop = m_start + m_size; + + if (m_pos >= stop) + return false; // nothing else to do + + if (!Init()) { + m_pos = stop; + return false; + } + + IMkvReader* const pReader = m_pSegment->m_pReader; + + while (m_pos < stop) { + const long long idpos = m_pos; + + long len; + + const long long id = ReadID(pReader, m_pos, len); + if (id < 0 || (m_pos + len) > stop) + return false; + + m_pos += len; // consume ID + + const long long size = ReadUInt(pReader, m_pos, len); + if (size < 0 || (m_pos + len) > stop) + return false; + + m_pos += len; // consume Size field + if ((m_pos + size) > stop) + return false; + + if (id != libwebm::kMkvCuePoint) { + m_pos += size; // consume payload + if (m_pos > stop) + return false; + + continue; + } + + if (m_preload_count < 1) + return false; + + CuePoint* const pCP = m_cue_points[m_count]; + if (!pCP || (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))) + return false; + + if (!pCP->Load(pReader)) { + m_pos = stop; + return false; + } + ++m_count; + --m_preload_count; + + m_pos += size; // consume payload + if (m_pos > stop) + return false; + + return true; // yes, we loaded a cue point + } + + return false; // no, we did not load a cue point +} + +bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP, + const CuePoint::TrackPosition*& pTP) const { + if (time_ns < 0 || pTrack == NULL || m_cue_points == NULL || m_count == 0) + return false; + + CuePoint** const ii = m_cue_points; + CuePoint** i = ii; + + CuePoint** const jj = ii + m_count; + CuePoint** j = jj; + + pCP = *i; + if (pCP == NULL) + return false; + + if (time_ns <= pCP->GetTime(m_pSegment)) { + pTP = pCP->Find(pTrack); + return (pTP != NULL); + } + + while (i < j) { + // INVARIANT: + //[ii, i) <= time_ns + //[i, j) ? + //[j, jj) > time_ns + + CuePoint** const k = i + (j - i) / 2; + if (k >= jj) + return false; + + CuePoint* const pCP = *k; + if (pCP == NULL) + return false; + + const long long t = pCP->GetTime(m_pSegment); + + if (t <= time_ns) + i = k + 1; + else + j = k; + + if (i > j) + return false; + } + + if (i != j || i > jj || i <= ii) + return false; + + pCP = *--i; + + if (pCP == NULL || pCP->GetTime(m_pSegment) > time_ns) + return false; + + // TODO: here and elsewhere, it's probably not correct to search + // for the cue point with this time, and then search for a matching + // track. In principle, the matching track could be on some earlier + // cue point, and with our current algorithm, we'd miss it. To make + // this bullet-proof, we'd need to create a secondary structure, + // with a list of cue points that apply to a track, and then search + // that track-based structure for a matching cue point. + + pTP = pCP->Find(pTrack); + return (pTP != NULL); +} + +const CuePoint* Cues::GetFirst() const { + if (m_cue_points == NULL || m_count == 0) + return NULL; + + CuePoint* const* const pp = m_cue_points; + if (pp == NULL) + return NULL; + + CuePoint* const pCP = pp[0]; + if (pCP == NULL || pCP->GetTimeCode() < 0) + return NULL; + + return pCP; +} + +const CuePoint* Cues::GetLast() const { + if (m_cue_points == NULL || m_count <= 0) + return NULL; + + const long index = m_count - 1; + + CuePoint* const* const pp = m_cue_points; + if (pp == NULL) + return NULL; + + CuePoint* const pCP = pp[index]; + if (pCP == NULL || pCP->GetTimeCode() < 0) + return NULL; + + return pCP; +} + +const CuePoint* Cues::GetNext(const CuePoint* pCurr) const { + if (pCurr == NULL || pCurr->GetTimeCode() < 0 || m_cue_points == NULL || + m_count < 1) { + return NULL; + } + + long index = pCurr->m_index; + if (index >= m_count) + return NULL; + + CuePoint* const* const pp = m_cue_points; + if (pp == NULL || pp[index] != pCurr) + return NULL; + + ++index; + + if (index >= m_count) + return NULL; + + CuePoint* const pNext = pp[index]; + + if (pNext == NULL || pNext->GetTimeCode() < 0) + return NULL; + + return pNext; +} + +const BlockEntry* Cues::GetBlock(const CuePoint* pCP, + const CuePoint::TrackPosition* pTP) const { + if (pCP == NULL || pTP == NULL) + return NULL; + + return m_pSegment->GetBlock(*pCP, *pTP); +} + +const BlockEntry* Segment::GetBlock(const CuePoint& cp, + const CuePoint::TrackPosition& tp) { + Cluster** const ii = m_clusters; + Cluster** i = ii; + + const long count = m_clusterCount + m_clusterPreloadCount; + + Cluster** const jj = ii + count; + Cluster** j = jj; + + while (i < j) { + // INVARIANT: + //[ii, i) < pTP->m_pos + //[i, j) ? + //[j, jj) > pTP->m_pos + + Cluster** const k = i + (j - i) / 2; + assert(k < jj); + + Cluster* const pCluster = *k; + assert(pCluster); + + // const long long pos_ = pCluster->m_pos; + // assert(pos_); + // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); + + const long long pos = pCluster->GetPosition(); + assert(pos >= 0); + + if (pos < tp.m_pos) + i = k + 1; + else if (pos > tp.m_pos) + j = k; + else + return pCluster->GetEntry(cp, tp); + } + + assert(i == j); + // assert(Cluster::HasBlockEntries(this, tp.m_pos)); + + Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1); + if (pCluster == NULL) + return NULL; + + const ptrdiff_t idx = i - m_clusters; + + if (!PreloadCluster(pCluster, idx)) { + delete pCluster; + return NULL; + } + assert(m_clusters); + assert(m_clusterPreloadCount > 0); + assert(m_clusters[idx] == pCluster); + + return pCluster->GetEntry(cp, tp); +} + +const Cluster* Segment::FindOrPreloadCluster(long long requested_pos) { + if (requested_pos < 0) + return 0; + + Cluster** const ii = m_clusters; + Cluster** i = ii; + + const long count = m_clusterCount + m_clusterPreloadCount; + + Cluster** const jj = ii + count; + Cluster** j = jj; + + while (i < j) { + // INVARIANT: + //[ii, i) < pTP->m_pos + //[i, j) ? + //[j, jj) > pTP->m_pos + + Cluster** const k = i + (j - i) / 2; + assert(k < jj); + + Cluster* const pCluster = *k; + assert(pCluster); + + // const long long pos_ = pCluster->m_pos; + // assert(pos_); + // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); + + const long long pos = pCluster->GetPosition(); + assert(pos >= 0); + + if (pos < requested_pos) + i = k + 1; + else if (pos > requested_pos) + j = k; + else + return pCluster; + } + + assert(i == j); + // assert(Cluster::HasBlockEntries(this, tp.m_pos)); + + Cluster* const pCluster = Cluster::Create(this, -1, requested_pos); + if (pCluster == NULL) + return NULL; + + const ptrdiff_t idx = i - m_clusters; + + if (!PreloadCluster(pCluster, idx)) { + delete pCluster; + return NULL; + } + assert(m_clusters); + assert(m_clusterPreloadCount > 0); + assert(m_clusters[idx] == pCluster); + + return pCluster; +} + +CuePoint::CuePoint(long idx, long long pos) + : m_element_start(0), + m_element_size(0), + m_index(idx), + m_timecode(-1 * pos), + m_track_positions(NULL), + m_track_positions_count(0) { + assert(pos > 0); +} + +CuePoint::~CuePoint() { delete[] m_track_positions; } + +bool CuePoint::Load(IMkvReader* pReader) { + // odbgstream os; + // os << "CuePoint::Load(begin): timecode=" << m_timecode << endl; + + if (m_timecode >= 0) // already loaded + return true; + + assert(m_track_positions == NULL); + assert(m_track_positions_count == 0); + + long long pos_ = -m_timecode; + const long long element_start = pos_; + + long long stop; + + { + long len; + + const long long id = ReadID(pReader, pos_, len); + if (id != libwebm::kMkvCuePoint) + return false; + + pos_ += len; // consume ID + + const long long size = ReadUInt(pReader, pos_, len); + assert(size >= 0); + + pos_ += len; // consume Size field + // pos_ now points to start of payload + + stop = pos_ + size; + } + + const long long element_size = stop - element_start; + + long long pos = pos_; + + // First count number of track positions + + while (pos < stop) { + long len; + + const long long id = ReadID(pReader, pos, len); + if ((id < 0) || (pos + len > stop)) { + return false; + } + + pos += len; // consume ID + + const long long size = ReadUInt(pReader, pos, len); + if ((size < 0) || (pos + len > stop)) { + return false; + } + + pos += len; // consume Size field + if ((pos + size) > stop) { + return false; + } + + if (id == libwebm::kMkvCueTime) + m_timecode = UnserializeUInt(pReader, pos, size); + + else if (id == libwebm::kMkvCueTrackPositions) + ++m_track_positions_count; + + pos += size; // consume payload + } + + if (m_timecode < 0 || m_track_positions_count <= 0) { + return false; + } + + // os << "CuePoint::Load(cont'd): idpos=" << idpos + // << " timecode=" << m_timecode + // << endl; + + m_track_positions = new TrackPosition[m_track_positions_count]; + + // Now parse track positions + + TrackPosition* p = m_track_positions; + pos = pos_; + + while (pos < stop) { + long len; + + const long long id = ReadID(pReader, pos, len); + if (id < 0 || (pos + len) > stop) + return false; + + pos += len; // consume ID + + const long long size = ReadUInt(pReader, pos, len); + assert(size >= 0); + assert((pos + len) <= stop); + + pos += len; // consume Size field + assert((pos + size) <= stop); + + if (id == libwebm::kMkvCueTrackPositions) { + TrackPosition& tp = *p++; + if (!tp.Parse(pReader, pos, size)) { + return false; + } + } + + pos += size; // consume payload + if (pos > stop) + return false; + } + + assert(size_t(p - m_track_positions) == m_track_positions_count); + + m_element_start = element_start; + m_element_size = element_size; + + return true; +} + +bool CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_, + long long size_) { + const long long stop = start_ + size_; + long long pos = start_; + + m_track = -1; + m_pos = -1; + m_block = 1; // default + + while (pos < stop) { + long len; + + const long long id = ReadID(pReader, pos, len); + if ((id < 0) || ((pos + len) > stop)) { + return false; + } + + pos += len; // consume ID + + const long long size = ReadUInt(pReader, pos, len); + if ((size < 0) || ((pos + len) > stop)) { + return false; + } + + pos += len; // consume Size field + if ((pos + size) > stop) { + return false; + } + + if (id == libwebm::kMkvCueTrack) + m_track = UnserializeUInt(pReader, pos, size); + else if (id == libwebm::kMkvCueClusterPosition) + m_pos = UnserializeUInt(pReader, pos, size); + else if (id == libwebm::kMkvCueBlockNumber) + m_block = UnserializeUInt(pReader, pos, size); + + pos += size; // consume payload + } + + if ((m_pos < 0) || (m_track <= 0)) { + return false; + } + + return true; +} + +const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const { + assert(pTrack); + + const long long n = pTrack->GetNumber(); + + const TrackPosition* i = m_track_positions; + const TrackPosition* const j = i + m_track_positions_count; + + while (i != j) { + const TrackPosition& p = *i++; + + if (p.m_track == n) + return &p; + } + + return NULL; // no matching track number found +} + +long long CuePoint::GetTimeCode() const { return m_timecode; } + +long long CuePoint::GetTime(const Segment* pSegment) const { + assert(pSegment); + assert(m_timecode >= 0); + + const SegmentInfo* const pInfo = pSegment->GetInfo(); + assert(pInfo); + + const long long scale = pInfo->GetTimeCodeScale(); + assert(scale >= 1); + + const long long time = scale * m_timecode; + + return time; +} + +bool Segment::DoneParsing() const { + if (m_size < 0) { + long long total, avail; + + const int status = m_pReader->Length(&total, &avail); + + if (status < 0) // error + return true; // must assume done + + if (total < 0) + return false; // assume live stream + + return (m_pos >= total); + } + + const long long stop = m_start + m_size; + + return (m_pos >= stop); +} + +const Cluster* Segment::GetFirst() const { + if ((m_clusters == NULL) || (m_clusterCount <= 0)) + return &m_eos; + + Cluster* const pCluster = m_clusters[0]; + assert(pCluster); + + return pCluster; +} + +const Cluster* Segment::GetLast() const { + if ((m_clusters == NULL) || (m_clusterCount <= 0)) + return &m_eos; + + const long idx = m_clusterCount - 1; + + Cluster* const pCluster = m_clusters[idx]; + assert(pCluster); + + return pCluster; +} + +unsigned long Segment::GetCount() const { return m_clusterCount; } + +const Cluster* Segment::GetNext(const Cluster* pCurr) { + assert(pCurr); + assert(pCurr != &m_eos); + assert(m_clusters); + + long idx = pCurr->m_index; + + if (idx >= 0) { + assert(m_clusterCount > 0); + assert(idx < m_clusterCount); + assert(pCurr == m_clusters[idx]); + + ++idx; + + if (idx >= m_clusterCount) + return &m_eos; // caller will LoadCluster as desired + + Cluster* const pNext = m_clusters[idx]; + assert(pNext); + assert(pNext->m_index >= 0); + assert(pNext->m_index == idx); + + return pNext; + } + + assert(m_clusterPreloadCount > 0); + + long long pos = pCurr->m_element_start; + + assert(m_size >= 0); // TODO + const long long stop = m_start + m_size; // end of segment + + { + long len; + + long long result = GetUIntLength(m_pReader, pos, len); + assert(result == 0); + assert((pos + len) <= stop); // TODO + if (result != 0) + return NULL; + + const long long id = ReadID(m_pReader, pos, len); + if (id != libwebm::kMkvCluster) + return NULL; + + pos += len; // consume ID + + // Read Size + result = GetUIntLength(m_pReader, pos, len); + assert(result == 0); // TODO + assert((pos + len) <= stop); // TODO + + const long long size = ReadUInt(m_pReader, pos, len); + assert(size > 0); // TODO + // assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); + + pos += len; // consume length of size of element + assert((pos + size) <= stop); // TODO + + // Pos now points to start of payload + + pos += size; // consume payload + } + + long long off_next = 0; + + while (pos < stop) { + long len; + + long long result = GetUIntLength(m_pReader, pos, len); + assert(result == 0); + assert((pos + len) <= stop); // TODO + if (result != 0) + return NULL; + + const long long idpos = pos; // pos of next (potential) cluster + + const long long id = ReadID(m_pReader, idpos, len); + if (id < 0) + return NULL; + + pos += len; // consume ID + + // Read Size + result = GetUIntLength(m_pReader, pos, len); + assert(result == 0); // TODO + assert((pos + len) <= stop); // TODO + + const long long size = ReadUInt(m_pReader, pos, len); + assert(size >= 0); // TODO + + pos += len; // consume length of size of element + assert((pos + size) <= stop); // TODO + + // Pos now points to start of payload + + if (size == 0) // weird + continue; + + if (id == libwebm::kMkvCluster) { + const long long off_next_ = idpos - m_start; + + long long pos_; + long len_; + + const long status = Cluster::HasBlockEntries(this, off_next_, pos_, len_); + + assert(status >= 0); + + if (status > 0) { + off_next = off_next_; + break; + } + } + + pos += size; // consume payload + } + + if (off_next <= 0) + return 0; + + Cluster** const ii = m_clusters + m_clusterCount; + Cluster** i = ii; + + Cluster** const jj = ii + m_clusterPreloadCount; + Cluster** j = jj; + + while (i < j) { + // INVARIANT: + //[0, i) < pos_next + //[i, j) ? + //[j, jj) > pos_next + + Cluster** const k = i + (j - i) / 2; + assert(k < jj); + + Cluster* const pNext = *k; + assert(pNext); + assert(pNext->m_index < 0); + + // const long long pos_ = pNext->m_pos; + // assert(pos_); + // pos = pos_ * ((pos_ < 0) ? -1 : 1); + + pos = pNext->GetPosition(); + + if (pos < off_next) + i = k + 1; + else if (pos > off_next) + j = k; + else + return pNext; + } + + assert(i == j); + + Cluster* const pNext = Cluster::Create(this, -1, off_next); + if (pNext == NULL) + return NULL; + + const ptrdiff_t idx_next = i - m_clusters; // insertion position + + if (!PreloadCluster(pNext, idx_next)) { + delete pNext; + return NULL; + } + assert(m_clusters); + assert(idx_next < m_clusterSize); + assert(m_clusters[idx_next] == pNext); + + return pNext; +} + +long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult, + long long& pos, long& len) { + assert(pCurr); + assert(!pCurr->EOS()); + assert(m_clusters); + + pResult = 0; + + if (pCurr->m_index >= 0) { // loaded (not merely preloaded) + assert(m_clusters[pCurr->m_index] == pCurr); + + const long next_idx = pCurr->m_index + 1; + + if (next_idx < m_clusterCount) { + pResult = m_clusters[next_idx]; + return 0; // success + } + + // curr cluster is last among loaded + + const long result = LoadCluster(pos, len); + + if (result < 0) // error or underflow + return result; + + if (result > 0) // no more clusters + { + // pResult = &m_eos; + return 1; + } + + pResult = GetLast(); + return 0; // success + } + + assert(m_pos > 0); + + long long total, avail; + + long status = m_pReader->Length(&total, &avail); + + if (status < 0) // error + return status; + + assert((total < 0) || (avail <= total)); + + const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; + + // interrogate curr cluster + + pos = pCurr->m_element_start; + + if (pCurr->m_element_size >= 0) + pos += pCurr->m_element_size; + else { + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadUInt(m_pReader, pos, len); + + if (id != libwebm::kMkvCluster) + return -1; + + pos += len; // consume ID + + // Read Size + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) // error + return static_cast(size); + + pos += len; // consume size field + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) // TODO: should never happen + return E_FILE_FORMAT_INVALID; // TODO: resolve this + + // assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); + + if ((segment_stop >= 0) && ((pos + size) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + // Pos now points to start of payload + + pos += size; // consume payload (that is, the current cluster) + if (segment_stop >= 0 && pos > segment_stop) + return E_FILE_FORMAT_INVALID; + + // By consuming the payload, we are assuming that the curr + // cluster isn't interesting. That is, we don't bother checking + // whether the payload of the curr cluster is less than what + // happens to be available (obtained via IMkvReader::Length). + // Presumably the caller has already dispensed with the current + // cluster, and really does want the next cluster. + } + + // pos now points to just beyond the last fully-loaded cluster + + for (;;) { + const long status = DoParseNext(pResult, pos, len); + + if (status <= 1) + return status; + } +} + +long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { + long long total, avail; + + long status = m_pReader->Length(&total, &avail); + + if (status < 0) // error + return status; + + assert((total < 0) || (avail <= total)); + + const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; + + // Parse next cluster. This is strictly a parsing activity. + // Creation of a new cluster object happens later, after the + // parsing is done. + + long long off_next = 0; + long long cluster_size = -1; + + for (;;) { + if ((total >= 0) && (pos >= total)) + return 1; // EOF + + if ((segment_stop >= 0) && (pos >= segment_stop)) + return 1; // EOF + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long idpos = pos; // absolute + const long long idoff = pos - m_start; // relative + + const long long id = ReadID(m_pReader, idpos, len); // absolute + + if (id < 0) // error + return static_cast(id); + + if (id == 0) // weird + return -1; // generic error + + pos += len; // consume ID + + // Read Size + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) // error + return static_cast(size); + + pos += len; // consume length of size of element + + // Pos now points to start of payload + + if (size == 0) // weird + continue; + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if ((segment_stop >= 0) && (size != unknown_size) && + ((pos + size) > segment_stop)) { + return E_FILE_FORMAT_INVALID; + } + + if (id == libwebm::kMkvCues) { + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + const long long element_stop = pos + size; + + if ((segment_stop >= 0) && (element_stop > segment_stop)) + return E_FILE_FORMAT_INVALID; + + const long long element_start = idpos; + const long long element_size = element_stop - element_start; + + if (m_pCues == NULL) { + m_pCues = new Cues(this, pos, size, element_start, element_size); + } + + pos += size; // consume payload + if (segment_stop >= 0 && pos > segment_stop) + return E_FILE_FORMAT_INVALID; + + continue; + } + + if (id != libwebm::kMkvCluster) { // not a Cluster ID + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + pos += size; // consume payload + if (segment_stop >= 0 && pos > segment_stop) + return E_FILE_FORMAT_INVALID; + + continue; + } + + // We have a cluster. + off_next = idoff; + + if (size != unknown_size) + cluster_size = size; + + break; + } + + assert(off_next > 0); // have cluster + + // We have parsed the next cluster. + // We have not created a cluster object yet. What we need + // to do now is determine whether it has already be preloaded + //(in which case, an object for this cluster has already been + // created), and if not, create a new cluster object. + + Cluster** const ii = m_clusters + m_clusterCount; + Cluster** i = ii; + + Cluster** const jj = ii + m_clusterPreloadCount; + Cluster** j = jj; + + while (i < j) { + // INVARIANT: + //[0, i) < pos_next + //[i, j) ? + //[j, jj) > pos_next + + Cluster** const k = i + (j - i) / 2; + assert(k < jj); + + const Cluster* const pNext = *k; + assert(pNext); + assert(pNext->m_index < 0); + + pos = pNext->GetPosition(); + assert(pos >= 0); + + if (pos < off_next) + i = k + 1; + else if (pos > off_next) + j = k; + else { + pResult = pNext; + return 0; // success + } + } + + assert(i == j); + + long long pos_; + long len_; + + status = Cluster::HasBlockEntries(this, off_next, pos_, len_); + + if (status < 0) { // error or underflow + pos = pos_; + len = len_; + + return status; + } + + if (status > 0) { // means "found at least one block entry" + Cluster* const pNext = Cluster::Create(this, + -1, // preloaded + off_next); + if (pNext == NULL) + return -1; + + const ptrdiff_t idx_next = i - m_clusters; // insertion position + + if (!PreloadCluster(pNext, idx_next)) { + delete pNext; + return -1; + } + assert(m_clusters); + assert(idx_next < m_clusterSize); + assert(m_clusters[idx_next] == pNext); + + pResult = pNext; + return 0; // success + } + + // status == 0 means "no block entries found" + + if (cluster_size < 0) { // unknown size + const long long payload_pos = pos; // absolute pos of cluster payload + + for (;;) { // determine cluster size + if ((total >= 0) && (pos >= total)) + break; + + if ((segment_stop >= 0) && (pos >= segment_stop)) + break; // no more clusters + + // Read ID + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long idpos = pos; + const long long id = ReadID(m_pReader, idpos, len); + + if (id < 0) // error (or underflow) + return static_cast(id); + + // This is the distinguished set of ID's we use to determine + // that we have exhausted the sub-element's inside the cluster + // whose ID we parsed earlier. + + if (id == libwebm::kMkvCluster || id == libwebm::kMkvCues) + break; + + pos += len; // consume ID (of sub-element) + + // Read Size + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) // error + return static_cast(size); + + pos += len; // consume size field of element + + // pos now points to start of sub-element's payload + + if (size == 0) // weird + continue; + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; // not allowed for sub-elements + + if ((segment_stop >= 0) && ((pos + size) > segment_stop)) // weird + return E_FILE_FORMAT_INVALID; + + pos += size; // consume payload of sub-element + if (segment_stop >= 0 && pos > segment_stop) + return E_FILE_FORMAT_INVALID; + } // determine cluster size + + cluster_size = pos - payload_pos; + assert(cluster_size >= 0); // TODO: handle cluster_size = 0 + + pos = payload_pos; // reset and re-parse original cluster + } + + pos += cluster_size; // consume payload + if (segment_stop >= 0 && pos > segment_stop) + return E_FILE_FORMAT_INVALID; + + return 2; // try to find a cluster that follows next +} + +const Cluster* Segment::FindCluster(long long time_ns) const { + if ((m_clusters == NULL) || (m_clusterCount <= 0)) + return &m_eos; + + { + Cluster* const pCluster = m_clusters[0]; + assert(pCluster); + assert(pCluster->m_index == 0); + + if (time_ns <= pCluster->GetTime()) + return pCluster; + } + + // Binary search of cluster array + + long i = 0; + long j = m_clusterCount; + + while (i < j) { + // INVARIANT: + //[0, i) <= time_ns + //[i, j) ? + //[j, m_clusterCount) > time_ns + + const long k = i + (j - i) / 2; + assert(k < m_clusterCount); + + Cluster* const pCluster = m_clusters[k]; + assert(pCluster); + assert(pCluster->m_index == k); + + const long long t = pCluster->GetTime(); + + if (t <= time_ns) + i = k + 1; + else + j = k; + + assert(i <= j); + } + + assert(i == j); + assert(i > 0); + assert(i <= m_clusterCount); + + const long k = i - 1; + + Cluster* const pCluster = m_clusters[k]; + assert(pCluster); + assert(pCluster->m_index == k); + assert(pCluster->GetTime() <= time_ns); + + return pCluster; +} + +const Tracks* Segment::GetTracks() const { return m_pTracks; } +const SegmentInfo* Segment::GetInfo() const { return m_pInfo; } +const Cues* Segment::GetCues() const { return m_pCues; } +const Chapters* Segment::GetChapters() const { return m_pChapters; } +const Tags* Segment::GetTags() const { return m_pTags; } +const SeekHead* Segment::GetSeekHead() const { return m_pSeekHead; } + +long long Segment::GetDuration() const { + assert(m_pInfo); + return m_pInfo->GetDuration(); +} + +Chapters::Chapters(Segment* pSegment, long long payload_start, + long long payload_size, long long element_start, + long long element_size) + : m_pSegment(pSegment), + m_start(payload_start), + m_size(payload_size), + m_element_start(element_start), + m_element_size(element_size), + m_editions(NULL), + m_editions_size(0), + m_editions_count(0) {} + +Chapters::~Chapters() { + while (m_editions_count > 0) { + Edition& e = m_editions[--m_editions_count]; + e.Clear(); + } + delete[] m_editions; +} + +long Chapters::Parse() { + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long pos = m_start; // payload start + const long long stop = pos + m_size; // payload stop + + while (pos < stop) { + long long id, size; + + long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) // error + return status; + + if (size == 0) // weird + continue; + + if (id == libwebm::kMkvEditionEntry) { + status = ParseEdition(pos, size); + + if (status < 0) // error + return status; + } + + pos += size; + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + return 0; +} + +int Chapters::GetEditionCount() const { return m_editions_count; } + +const Chapters::Edition* Chapters::GetEdition(int idx) const { + if (idx < 0) + return NULL; + + if (idx >= m_editions_count) + return NULL; + + return m_editions + idx; +} + +bool Chapters::ExpandEditionsArray() { + if (m_editions_size > m_editions_count) + return true; // nothing else to do + + const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size; + + Edition* const editions = new Edition[size]; + + for (int idx = 0; idx < m_editions_count; ++idx) { + m_editions[idx].ShallowCopy(editions[idx]); + } + + delete[] m_editions; + m_editions = editions; + + m_editions_size = size; + return true; +} + +long Chapters::ParseEdition(long long pos, long long size) { + if (!ExpandEditionsArray()) + return -1; + + Edition& e = m_editions[m_editions_count++]; + e.Init(); + + return e.Parse(m_pSegment->m_pReader, pos, size); +} + +Chapters::Edition::Edition() {} + +Chapters::Edition::~Edition() {} + +int Chapters::Edition::GetAtomCount() const { return m_atoms_count; } + +const Chapters::Atom* Chapters::Edition::GetAtom(int index) const { + if (index < 0) + return NULL; + + if (index >= m_atoms_count) + return NULL; + + return m_atoms + index; +} + +void Chapters::Edition::Init() { + m_atoms = NULL; + m_atoms_size = 0; + m_atoms_count = 0; +} + +void Chapters::Edition::ShallowCopy(Edition& rhs) const { + rhs.m_atoms = m_atoms; + rhs.m_atoms_size = m_atoms_size; + rhs.m_atoms_count = m_atoms_count; +} + +void Chapters::Edition::Clear() { + while (m_atoms_count > 0) { + Atom& a = m_atoms[--m_atoms_count]; + a.Clear(); + } + + delete[] m_atoms; + m_atoms = NULL; + + m_atoms_size = 0; +} + +long Chapters::Edition::Parse(IMkvReader* pReader, long long pos, + long long size) { + const long long stop = pos + size; + + while (pos < stop) { + long long id, size; + + long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) // error + return status; + + if (size == 0) + continue; + + if (id == libwebm::kMkvChapterAtom) { + status = ParseAtom(pReader, pos, size); + + if (status < 0) // error + return status; + } + + pos += size; + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + return 0; +} + +long Chapters::Edition::ParseAtom(IMkvReader* pReader, long long pos, + long long size) { + if (!ExpandAtomsArray()) + return -1; + + Atom& a = m_atoms[m_atoms_count++]; + a.Init(); + + return a.Parse(pReader, pos, size); +} + +bool Chapters::Edition::ExpandAtomsArray() { + if (m_atoms_size > m_atoms_count) + return true; // nothing else to do + + const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size; + + Atom* const atoms = new Atom[size]; + + for (int idx = 0; idx < m_atoms_count; ++idx) { + m_atoms[idx].ShallowCopy(atoms[idx]); + } + + delete[] m_atoms; + m_atoms = atoms; + + m_atoms_size = size; + return true; +} + +Chapters::Atom::Atom() {} + +Chapters::Atom::~Atom() {} + +unsigned long long Chapters::Atom::GetUID() const { return m_uid; } + +const char* Chapters::Atom::GetStringUID() const { return m_string_uid; } + +long long Chapters::Atom::GetStartTimecode() const { return m_start_timecode; } + +long long Chapters::Atom::GetStopTimecode() const { return m_stop_timecode; } + +long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const { + return GetTime(pChapters, m_start_timecode); +} + +long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const { + return GetTime(pChapters, m_stop_timecode); +} + +int Chapters::Atom::GetDisplayCount() const { return m_displays_count; } + +const Chapters::Display* Chapters::Atom::GetDisplay(int index) const { + if (index < 0) + return NULL; + + if (index >= m_displays_count) + return NULL; + + return m_displays + index; +} + +void Chapters::Atom::Init() { + m_string_uid = NULL; + m_uid = 0; + m_start_timecode = -1; + m_stop_timecode = -1; + + m_displays = NULL; + m_displays_size = 0; + m_displays_count = 0; +} + +void Chapters::Atom::ShallowCopy(Atom& rhs) const { + rhs.m_string_uid = m_string_uid; + rhs.m_uid = m_uid; + rhs.m_start_timecode = m_start_timecode; + rhs.m_stop_timecode = m_stop_timecode; + + rhs.m_displays = m_displays; + rhs.m_displays_size = m_displays_size; + rhs.m_displays_count = m_displays_count; +} + +void Chapters::Atom::Clear() { + delete[] m_string_uid; + m_string_uid = NULL; + + while (m_displays_count > 0) { + Display& d = m_displays[--m_displays_count]; + d.Clear(); + } + + delete[] m_displays; + m_displays = NULL; + + m_displays_size = 0; +} + +long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) { + const long long stop = pos + size; + + while (pos < stop) { + long long id, size; + + long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) // error + return status; + + if (size == 0) // 0 length payload, skip. + continue; + + if (id == libwebm::kMkvChapterDisplay) { + status = ParseDisplay(pReader, pos, size); + + if (status < 0) // error + return status; + } else if (id == libwebm::kMkvChapterStringUID) { + status = UnserializeString(pReader, pos, size, m_string_uid); + + if (status < 0) // error + return status; + } else if (id == libwebm::kMkvChapterUID) { + long long val; + status = UnserializeInt(pReader, pos, size, val); + + if (status < 0) // error + return status; + + m_uid = static_cast(val); + } else if (id == libwebm::kMkvChapterTimeStart) { + const long long val = UnserializeUInt(pReader, pos, size); + + if (val < 0) // error + return static_cast(val); + + m_start_timecode = val; + } else if (id == libwebm::kMkvChapterTimeEnd) { + const long long val = UnserializeUInt(pReader, pos, size); + + if (val < 0) // error + return static_cast(val); + + m_stop_timecode = val; + } + + pos += size; + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + return 0; +} + +long long Chapters::Atom::GetTime(const Chapters* pChapters, + long long timecode) { + if (pChapters == NULL) + return -1; + + Segment* const pSegment = pChapters->m_pSegment; + + if (pSegment == NULL) // weird + return -1; + + const SegmentInfo* const pInfo = pSegment->GetInfo(); + + if (pInfo == NULL) + return -1; + + const long long timecode_scale = pInfo->GetTimeCodeScale(); + + if (timecode_scale < 1) // weird + return -1; + + if (timecode < 0) + return -1; + + const long long result = timecode_scale * timecode; + + return result; +} + +long Chapters::Atom::ParseDisplay(IMkvReader* pReader, long long pos, + long long size) { + if (!ExpandDisplaysArray()) + return -1; + + Display& d = m_displays[m_displays_count++]; + d.Init(); + + return d.Parse(pReader, pos, size); +} + +bool Chapters::Atom::ExpandDisplaysArray() { + if (m_displays_size > m_displays_count) + return true; // nothing else to do + + const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size; + + Display* const displays = new Display[size]; + + for (int idx = 0; idx < m_displays_count; ++idx) { + m_displays[idx].ShallowCopy(displays[idx]); + } + + delete[] m_displays; + m_displays = displays; + + m_displays_size = size; + return true; +} + +Chapters::Display::Display() {} + +Chapters::Display::~Display() {} + +const char* Chapters::Display::GetString() const { return m_string; } + +const char* Chapters::Display::GetLanguage() const { return m_language; } + +const char* Chapters::Display::GetCountry() const { return m_country; } + +void Chapters::Display::Init() { + m_string = NULL; + m_language = NULL; + m_country = NULL; +} + +void Chapters::Display::ShallowCopy(Display& rhs) const { + rhs.m_string = m_string; + rhs.m_language = m_language; + rhs.m_country = m_country; +} + +void Chapters::Display::Clear() { + delete[] m_string; + m_string = NULL; + + delete[] m_language; + m_language = NULL; + + delete[] m_country; + m_country = NULL; +} + +long Chapters::Display::Parse(IMkvReader* pReader, long long pos, + long long size) { + const long long stop = pos + size; + + while (pos < stop) { + long long id, size; + + long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) // error + return status; + + if (size == 0) // No payload. + continue; + + if (id == libwebm::kMkvChapString) { + status = UnserializeString(pReader, pos, size, m_string); + + if (status) + return status; + } else if (id == libwebm::kMkvChapLanguage) { + status = UnserializeString(pReader, pos, size, m_language); + + if (status) + return status; + } else if (id == libwebm::kMkvChapCountry) { + status = UnserializeString(pReader, pos, size, m_country); + + if (status) + return status; + } + + pos += size; + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + return 0; +} + +Tags::Tags(Segment* pSegment, long long payload_start, long long payload_size, + long long element_start, long long element_size) + : m_pSegment(pSegment), + m_start(payload_start), + m_size(payload_size), + m_element_start(element_start), + m_element_size(element_size), + m_tags(NULL), + m_tags_size(0), + m_tags_count(0) {} + +Tags::~Tags() { + while (m_tags_count > 0) { + Tag& t = m_tags[--m_tags_count]; + t.Clear(); + } + delete[] m_tags; +} + +long Tags::Parse() { + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long pos = m_start; // payload start + const long long stop = pos + m_size; // payload stop + + while (pos < stop) { + long long id, size; + + long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) + return status; + + if (size == 0) // 0 length tag, read another + continue; + + if (id == libwebm::kMkvTag) { + status = ParseTag(pos, size); + + if (status < 0) + return status; + } + + pos += size; + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + + return 0; +} + +int Tags::GetTagCount() const { return m_tags_count; } + +const Tags::Tag* Tags::GetTag(int idx) const { + if (idx < 0) + return NULL; + + if (idx >= m_tags_count) + return NULL; + + return m_tags + idx; +} + +bool Tags::ExpandTagsArray() { + if (m_tags_size > m_tags_count) + return true; // nothing else to do + + const int size = (m_tags_size == 0) ? 1 : 2 * m_tags_size; + + Tag* const tags = new Tag[size]; + + for (int idx = 0; idx < m_tags_count; ++idx) { + m_tags[idx].ShallowCopy(tags[idx]); + } + + delete[] m_tags; + m_tags = tags; + + m_tags_size = size; + return true; +} + +long Tags::ParseTag(long long pos, long long size) { + if (!ExpandTagsArray()) + return -1; + + Tag& t = m_tags[m_tags_count++]; + t.Init(); + + return t.Parse(m_pSegment->m_pReader, pos, size); +} + +Tags::Tag::Tag() {} + +Tags::Tag::~Tag() {} + +int Tags::Tag::GetSimpleTagCount() const { return m_simple_tags_count; } + +const Tags::SimpleTag* Tags::Tag::GetSimpleTag(int index) const { + if (index < 0) + return NULL; + + if (index >= m_simple_tags_count) + return NULL; + + return m_simple_tags + index; +} + +void Tags::Tag::Init() { + m_simple_tags = NULL; + m_simple_tags_size = 0; + m_simple_tags_count = 0; +} + +void Tags::Tag::ShallowCopy(Tag& rhs) const { + rhs.m_simple_tags = m_simple_tags; + rhs.m_simple_tags_size = m_simple_tags_size; + rhs.m_simple_tags_count = m_simple_tags_count; +} + +void Tags::Tag::Clear() { + while (m_simple_tags_count > 0) { + SimpleTag& d = m_simple_tags[--m_simple_tags_count]; + d.Clear(); + } + + delete[] m_simple_tags; + m_simple_tags = NULL; + + m_simple_tags_size = 0; +} + +long Tags::Tag::Parse(IMkvReader* pReader, long long pos, long long size) { + const long long stop = pos + size; + + while (pos < stop) { + long long id, size; + + long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) + return status; + + if (size == 0) // 0 length tag, read another + continue; + + if (id == libwebm::kMkvSimpleTag) { + status = ParseSimpleTag(pReader, pos, size); + + if (status < 0) + return status; + } + + pos += size; + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + return 0; +} + +long Tags::Tag::ParseSimpleTag(IMkvReader* pReader, long long pos, + long long size) { + if (!ExpandSimpleTagsArray()) + return -1; + + SimpleTag& st = m_simple_tags[m_simple_tags_count++]; + st.Init(); + + return st.Parse(pReader, pos, size); +} + +bool Tags::Tag::ExpandSimpleTagsArray() { + if (m_simple_tags_size > m_simple_tags_count) + return true; // nothing else to do + + const int size = (m_simple_tags_size == 0) ? 1 : 2 * m_simple_tags_size; + + SimpleTag* const displays = new SimpleTag[size]; + + for (int idx = 0; idx < m_simple_tags_count; ++idx) { + m_simple_tags[idx].ShallowCopy(displays[idx]); + } + + delete[] m_simple_tags; + m_simple_tags = displays; + + m_simple_tags_size = size; + return true; +} + +Tags::SimpleTag::SimpleTag() {} + +Tags::SimpleTag::~SimpleTag() {} + +const char* Tags::SimpleTag::GetTagName() const { return m_tag_name; } + +const char* Tags::SimpleTag::GetTagString() const { return m_tag_string; } + +void Tags::SimpleTag::Init() { + m_tag_name = NULL; + m_tag_string = NULL; +} + +void Tags::SimpleTag::ShallowCopy(SimpleTag& rhs) const { + rhs.m_tag_name = m_tag_name; + rhs.m_tag_string = m_tag_string; +} + +void Tags::SimpleTag::Clear() { + delete[] m_tag_name; + m_tag_name = NULL; + + delete[] m_tag_string; + m_tag_string = NULL; +} + +long Tags::SimpleTag::Parse(IMkvReader* pReader, long long pos, + long long size) { + const long long stop = pos + size; + + while (pos < stop) { + long long id, size; + + long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) // error + return status; + + if (size == 0) // weird + continue; + + if (id == libwebm::kMkvTagName) { + status = UnserializeString(pReader, pos, size, m_tag_name); + + if (status) + return status; + } else if (id == libwebm::kMkvTagString) { + status = UnserializeString(pReader, pos, size, m_tag_string); + + if (status) + return status; + } + + pos += size; + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + return 0; +} + +SegmentInfo::SegmentInfo(Segment* pSegment, long long start, long long size_, + long long element_start, long long element_size) + : m_pSegment(pSegment), + m_start(start), + m_size(size_), + m_element_start(element_start), + m_element_size(element_size), + m_pMuxingAppAsUTF8(NULL), + m_pWritingAppAsUTF8(NULL), + m_pTitleAsUTF8(NULL) {} + +SegmentInfo::~SegmentInfo() { + delete[] m_pMuxingAppAsUTF8; + m_pMuxingAppAsUTF8 = NULL; + + delete[] m_pWritingAppAsUTF8; + m_pWritingAppAsUTF8 = NULL; + + delete[] m_pTitleAsUTF8; + m_pTitleAsUTF8 = NULL; +} + +long SegmentInfo::Parse() { + assert(m_pMuxingAppAsUTF8 == NULL); + assert(m_pWritingAppAsUTF8 == NULL); + assert(m_pTitleAsUTF8 == NULL); + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long pos = m_start; + const long long stop = m_start + m_size; + + m_timecodeScale = 1000000; + m_duration = -1; + + while (pos < stop) { + long long id, size; + + const long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) // error + return status; + + if (id == libwebm::kMkvTimecodeScale) { + m_timecodeScale = UnserializeUInt(pReader, pos, size); + + if (m_timecodeScale <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvDuration) { + const long status = UnserializeFloat(pReader, pos, size, m_duration); + + if (status < 0) + return status; + + if (m_duration < 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvMuxingApp) { + const long status = + UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8); + + if (status) + return status; + } else if (id == libwebm::kMkvWritingApp) { + const long status = + UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8); + + if (status) + return status; + } else if (id == libwebm::kMkvTitle) { + const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8); + + if (status) + return status; + } + + pos += size; + + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + const double rollover_check = m_duration * m_timecodeScale; + if (rollover_check > LLONG_MAX) + return E_FILE_FORMAT_INVALID; + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + + return 0; +} + +long long SegmentInfo::GetTimeCodeScale() const { return m_timecodeScale; } + +long long SegmentInfo::GetDuration() const { + if (m_duration < 0) + return -1; + + assert(m_timecodeScale >= 1); + + const double dd = double(m_duration) * double(m_timecodeScale); + const long long d = static_cast(dd); + + return d; +} + +const char* SegmentInfo::GetMuxingAppAsUTF8() const { + return m_pMuxingAppAsUTF8; +} + +const char* SegmentInfo::GetWritingAppAsUTF8() const { + return m_pWritingAppAsUTF8; +} + +const char* SegmentInfo::GetTitleAsUTF8() const { return m_pTitleAsUTF8; } + +/////////////////////////////////////////////////////////////// +// ContentEncoding element +ContentEncoding::ContentCompression::ContentCompression() + : algo(0), settings(NULL), settings_len(0) {} + +ContentEncoding::ContentCompression::~ContentCompression() { + delete[] settings; +} + +ContentEncoding::ContentEncryption::ContentEncryption() + : algo(0), + key_id(NULL), + key_id_len(0), + signature(NULL), + signature_len(0), + sig_key_id(NULL), + sig_key_id_len(0), + sig_algo(0), + sig_hash_algo(0) {} + +ContentEncoding::ContentEncryption::~ContentEncryption() { + delete[] key_id; + delete[] signature; + delete[] sig_key_id; +} + +ContentEncoding::ContentEncoding() + : compression_entries_(NULL), + compression_entries_end_(NULL), + encryption_entries_(NULL), + encryption_entries_end_(NULL), + encoding_order_(0), + encoding_scope_(1), + encoding_type_(0) {} + +ContentEncoding::~ContentEncoding() { + ContentCompression** comp_i = compression_entries_; + ContentCompression** const comp_j = compression_entries_end_; + + while (comp_i != comp_j) { + ContentCompression* const comp = *comp_i++; + delete comp; + } + + delete[] compression_entries_; + + ContentEncryption** enc_i = encryption_entries_; + ContentEncryption** const enc_j = encryption_entries_end_; + + while (enc_i != enc_j) { + ContentEncryption* const enc = *enc_i++; + delete enc; + } + + delete[] encryption_entries_; +} + +const ContentEncoding::ContentCompression* + ContentEncoding::GetCompressionByIndex(unsigned long idx) const { + const ptrdiff_t count = compression_entries_end_ - compression_entries_; + assert(count >= 0); + + if (idx >= static_cast(count)) + return NULL; + + return compression_entries_[idx]; +} + +unsigned long ContentEncoding::GetCompressionCount() const { + const ptrdiff_t count = compression_entries_end_ - compression_entries_; + assert(count >= 0); + + return static_cast(count); +} + +const ContentEncoding::ContentEncryption* ContentEncoding::GetEncryptionByIndex( + unsigned long idx) const { + const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; + assert(count >= 0); + + if (idx >= static_cast(count)) + return NULL; + + return encryption_entries_[idx]; +} + +unsigned long ContentEncoding::GetEncryptionCount() const { + const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; + assert(count >= 0); + + return static_cast(count); +} + +long ContentEncoding::ParseContentEncAESSettingsEntry( + long long start, long long size, IMkvReader* pReader, + ContentEncAESSettings* aes) { + assert(pReader); + assert(aes); + + long long pos = start; + const long long stop = start + size; + + while (pos < stop) { + long long id, size; + const long status = ParseElementHeader(pReader, pos, stop, id, size); + if (status < 0) // error + return status; + + if (id == libwebm::kMkvAESSettingsCipherMode) { + aes->cipher_mode = UnserializeUInt(pReader, pos, size); + if (aes->cipher_mode != 1) + return E_FILE_FORMAT_INVALID; + } + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + return 0; +} + +long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, + IMkvReader* pReader) { + assert(pReader); + + long long pos = start; + const long long stop = start + size; + + // Count ContentCompression and ContentEncryption elements. + int compression_count = 0; + int encryption_count = 0; + + while (pos < stop) { + long long id, size; + const long status = ParseElementHeader(pReader, pos, stop, id, size); + if (status < 0) // error + return status; + + if (id == libwebm::kMkvContentCompression) + ++compression_count; + + if (id == libwebm::kMkvContentEncryption) + ++encryption_count; + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (compression_count <= 0 && encryption_count <= 0) + return -1; + + if (compression_count > 0) { + compression_entries_ = new ContentCompression*[compression_count]; + compression_entries_end_ = compression_entries_; + } + + if (encryption_count > 0) { + encryption_entries_ = new ContentEncryption*[encryption_count]; + encryption_entries_end_ = encryption_entries_; + } + + pos = start; + while (pos < stop) { + long long id, size; + long status = ParseElementHeader(pReader, pos, stop, id, size); + if (status < 0) // error + return status; + + if (id == libwebm::kMkvContentEncodingOrder) { + encoding_order_ = UnserializeUInt(pReader, pos, size); + } else if (id == libwebm::kMkvContentEncodingScope) { + encoding_scope_ = UnserializeUInt(pReader, pos, size); + if (encoding_scope_ < 1) + return -1; + } else if (id == libwebm::kMkvContentEncodingType) { + encoding_type_ = UnserializeUInt(pReader, pos, size); + } else if (id == libwebm::kMkvContentCompression) { + ContentCompression* const compression = new ContentCompression(); + + status = ParseCompressionEntry(pos, size, pReader, compression); + if (status) { + delete compression; + return status; + } + *compression_entries_end_++ = compression; + } else if (id == libwebm::kMkvContentEncryption) { + ContentEncryption* const encryption = new ContentEncryption(); + + status = ParseEncryptionEntry(pos, size, pReader, encryption); + if (status) { + delete encryption; + return status; + } + *encryption_entries_end_++ = encryption; + } + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + return 0; +} + +long ContentEncoding::ParseCompressionEntry(long long start, long long size, + IMkvReader* pReader, + ContentCompression* compression) { + assert(pReader); + assert(compression); + + long long pos = start; + const long long stop = start + size; + + bool valid = false; + + while (pos < stop) { + long long id, size; + const long status = ParseElementHeader(pReader, pos, stop, id, size); + if (status < 0) // error + return status; + + if (id == libwebm::kMkvContentCompAlgo) { + long long algo = UnserializeUInt(pReader, pos, size); + if (algo < 0) + return E_FILE_FORMAT_INVALID; + compression->algo = algo; + valid = true; + } else if (id == libwebm::kMkvContentCompSettings) { + if (size <= 0) + return E_FILE_FORMAT_INVALID; + + const size_t buflen = static_cast(size); + unsigned char* buf = SafeArrayAlloc(1, buflen); + if (buf == NULL) + return -1; + + const int read_status = + pReader->Read(pos, static_cast(buflen), buf); + if (read_status) { + delete[] buf; + return status; + } + + compression->settings = buf; + compression->settings_len = buflen; + } + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + // ContentCompAlgo is mandatory + if (!valid) + return E_FILE_FORMAT_INVALID; + + return 0; +} + +long ContentEncoding::ParseEncryptionEntry(long long start, long long size, + IMkvReader* pReader, + ContentEncryption* encryption) { + assert(pReader); + assert(encryption); + + long long pos = start; + const long long stop = start + size; + + while (pos < stop) { + long long id, size; + const long status = ParseElementHeader(pReader, pos, stop, id, size); + if (status < 0) // error + return status; + + if (id == libwebm::kMkvContentEncAlgo) { + encryption->algo = UnserializeUInt(pReader, pos, size); + if (encryption->algo != 5) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvContentEncKeyID) { + delete[] encryption->key_id; + encryption->key_id = NULL; + encryption->key_id_len = 0; + + if (size <= 0) + return E_FILE_FORMAT_INVALID; + + const size_t buflen = static_cast(size); + unsigned char* buf = SafeArrayAlloc(1, buflen); + if (buf == NULL) + return -1; + + const int read_status = + pReader->Read(pos, static_cast(buflen), buf); + if (read_status) { + delete[] buf; + return status; + } + + encryption->key_id = buf; + encryption->key_id_len = buflen; + } else if (id == libwebm::kMkvContentSignature) { + delete[] encryption->signature; + encryption->signature = NULL; + encryption->signature_len = 0; + + if (size <= 0) + return E_FILE_FORMAT_INVALID; + + const size_t buflen = static_cast(size); + unsigned char* buf = SafeArrayAlloc(1, buflen); + if (buf == NULL) + return -1; + + const int read_status = + pReader->Read(pos, static_cast(buflen), buf); + if (read_status) { + delete[] buf; + return status; + } + + encryption->signature = buf; + encryption->signature_len = buflen; + } else if (id == libwebm::kMkvContentSigKeyID) { + delete[] encryption->sig_key_id; + encryption->sig_key_id = NULL; + encryption->sig_key_id_len = 0; + + if (size <= 0) + return E_FILE_FORMAT_INVALID; + + const size_t buflen = static_cast(size); + unsigned char* buf = SafeArrayAlloc(1, buflen); + if (buf == NULL) + return -1; + + const int read_status = + pReader->Read(pos, static_cast(buflen), buf); + if (read_status) { + delete[] buf; + return status; + } + + encryption->sig_key_id = buf; + encryption->sig_key_id_len = buflen; + } else if (id == libwebm::kMkvContentSigAlgo) { + encryption->sig_algo = UnserializeUInt(pReader, pos, size); + } else if (id == libwebm::kMkvContentSigHashAlgo) { + encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size); + } else if (id == libwebm::kMkvContentEncAESSettings) { + const long status = ParseContentEncAESSettingsEntry( + pos, size, pReader, &encryption->aes_settings); + if (status) + return status; + } + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + return 0; +} + +Track::Track(Segment* pSegment, long long element_start, long long element_size) + : m_pSegment(pSegment), + m_element_start(element_start), + m_element_size(element_size), + content_encoding_entries_(NULL), + content_encoding_entries_end_(NULL) {} + +Track::~Track() { + Info& info = const_cast(m_info); + info.Clear(); + + ContentEncoding** i = content_encoding_entries_; + ContentEncoding** const j = content_encoding_entries_end_; + + while (i != j) { + ContentEncoding* const encoding = *i++; + delete encoding; + } + + delete[] content_encoding_entries_; +} + +long Track::Create(Segment* pSegment, const Info& info, long long element_start, + long long element_size, Track*& pResult) { + if (pResult) + return -1; + + Track* const pTrack = new Track(pSegment, element_start, element_size); + + const int status = info.Copy(pTrack->m_info); + + if (status) { // error + delete pTrack; + return status; + } + + pResult = pTrack; + return 0; // success +} + +Track::Info::Info() + : uid(0), + defaultDuration(0), + codecDelay(0), + seekPreRoll(0), + nameAsUTF8(NULL), + language(NULL), + codecId(NULL), + codecNameAsUTF8(NULL), + codecPrivate(NULL), + codecPrivateSize(0), + lacing(false) {} + +Track::Info::~Info() { Clear(); } + +void Track::Info::Clear() { + delete[] nameAsUTF8; + nameAsUTF8 = NULL; + + delete[] language; + language = NULL; + + delete[] codecId; + codecId = NULL; + + delete[] codecPrivate; + codecPrivate = NULL; + codecPrivateSize = 0; + + delete[] codecNameAsUTF8; + codecNameAsUTF8 = NULL; +} + +int Track::Info::CopyStr(char* Info::*str, Info& dst_) const { + if (str == static_cast(NULL)) + return -1; + + char*& dst = dst_.*str; + + if (dst) // should be NULL already + return -1; + + const char* const src = this->*str; + + if (src == NULL) + return 0; + + const size_t len = strlen(src); + + dst = SafeArrayAlloc(1, len + 1); + + if (dst == NULL) + return -1; + + strcpy(dst, src); + + return 0; +} + +int Track::Info::Copy(Info& dst) const { + if (&dst == this) + return 0; + + dst.type = type; + dst.number = number; + dst.defaultDuration = defaultDuration; + dst.codecDelay = codecDelay; + dst.seekPreRoll = seekPreRoll; + dst.uid = uid; + dst.lacing = lacing; + dst.settings = settings; + + // We now copy the string member variables from src to dst. + // This involves memory allocation so in principle the operation + // can fail (indeed, that's why we have Info::Copy), so we must + // report this to the caller. An error return from this function + // therefore implies that the copy was only partially successful. + + if (int status = CopyStr(&Info::nameAsUTF8, dst)) + return status; + + if (int status = CopyStr(&Info::language, dst)) + return status; + + if (int status = CopyStr(&Info::codecId, dst)) + return status; + + if (int status = CopyStr(&Info::codecNameAsUTF8, dst)) + return status; + + if (codecPrivateSize > 0) { + if (codecPrivate == NULL) + return -1; + + if (dst.codecPrivate) + return -1; + + if (dst.codecPrivateSize != 0) + return -1; + + dst.codecPrivate = SafeArrayAlloc(1, codecPrivateSize); + + if (dst.codecPrivate == NULL) + return -1; + + memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize); + dst.codecPrivateSize = codecPrivateSize; + } + + return 0; +} + +const BlockEntry* Track::GetEOS() const { return &m_eos; } + +long Track::GetType() const { return m_info.type; } + +long Track::GetNumber() const { return m_info.number; } + +unsigned long long Track::GetUid() const { return m_info.uid; } + +const char* Track::GetNameAsUTF8() const { return m_info.nameAsUTF8; } + +const char* Track::GetLanguage() const { return m_info.language; } + +const char* Track::GetCodecNameAsUTF8() const { return m_info.codecNameAsUTF8; } + +const char* Track::GetCodecId() const { return m_info.codecId; } + +const unsigned char* Track::GetCodecPrivate(size_t& size) const { + size = m_info.codecPrivateSize; + return m_info.codecPrivate; +} + +bool Track::GetLacing() const { return m_info.lacing; } + +unsigned long long Track::GetDefaultDuration() const { + return m_info.defaultDuration; +} + +unsigned long long Track::GetCodecDelay() const { return m_info.codecDelay; } + +unsigned long long Track::GetSeekPreRoll() const { return m_info.seekPreRoll; } + +long Track::GetFirst(const BlockEntry*& pBlockEntry) const { + const Cluster* pCluster = m_pSegment->GetFirst(); + + for (int i = 0;;) { + if (pCluster == NULL) { + pBlockEntry = GetEOS(); + return 1; + } + + if (pCluster->EOS()) { + if (m_pSegment->DoneParsing()) { + pBlockEntry = GetEOS(); + return 1; + } + + pBlockEntry = 0; + return E_BUFFER_NOT_FULL; + } + + long status = pCluster->GetFirst(pBlockEntry); + + if (status < 0) // error + return status; + + if (pBlockEntry == 0) { // empty cluster + pCluster = m_pSegment->GetNext(pCluster); + continue; + } + + for (;;) { + const Block* const pBlock = pBlockEntry->GetBlock(); + assert(pBlock); + + const long long tn = pBlock->GetTrackNumber(); + + if ((tn == m_info.number) && VetEntry(pBlockEntry)) + return 0; + + const BlockEntry* pNextEntry; + + status = pCluster->GetNext(pBlockEntry, pNextEntry); + + if (status < 0) // error + return status; + + if (pNextEntry == 0) + break; + + pBlockEntry = pNextEntry; + } + + ++i; + + if (i >= 100) + break; + + pCluster = m_pSegment->GetNext(pCluster); + } + + // NOTE: if we get here, it means that we didn't find a block with + // a matching track number. We interpret that as an error (which + // might be too conservative). + + pBlockEntry = GetEOS(); // so we can return a non-NULL value + return 1; +} + +long Track::GetNext(const BlockEntry* pCurrEntry, + const BlockEntry*& pNextEntry) const { + assert(pCurrEntry); + assert(!pCurrEntry->EOS()); //? + + const Block* const pCurrBlock = pCurrEntry->GetBlock(); + assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number); + if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number) + return -1; + + const Cluster* pCluster = pCurrEntry->GetCluster(); + assert(pCluster); + assert(!pCluster->EOS()); + + long status = pCluster->GetNext(pCurrEntry, pNextEntry); + + if (status < 0) // error + return status; + + for (int i = 0;;) { + while (pNextEntry) { + const Block* const pNextBlock = pNextEntry->GetBlock(); + assert(pNextBlock); + + if (pNextBlock->GetTrackNumber() == m_info.number) + return 0; + + pCurrEntry = pNextEntry; + + status = pCluster->GetNext(pCurrEntry, pNextEntry); + + if (status < 0) // error + return status; + } + + pCluster = m_pSegment->GetNext(pCluster); + + if (pCluster == NULL) { + pNextEntry = GetEOS(); + return 1; + } + + if (pCluster->EOS()) { + if (m_pSegment->DoneParsing()) { + pNextEntry = GetEOS(); + return 1; + } + + // TODO: there is a potential O(n^2) problem here: we tell the + // caller to (pre)load another cluster, which he does, but then he + // calls GetNext again, which repeats the same search. This is + // a pathological case, since the only way it can happen is if + // there exists a long sequence of clusters none of which contain a + // block from this track. One way around this problem is for the + // caller to be smarter when he loads another cluster: don't call + // us back until you have a cluster that contains a block from this + // track. (Of course, that's not cheap either, since our caller + // would have to scan the each cluster as it's loaded, so that + // would just push back the problem.) + + pNextEntry = NULL; + return E_BUFFER_NOT_FULL; + } + + status = pCluster->GetFirst(pNextEntry); + + if (status < 0) // error + return status; + + if (pNextEntry == NULL) // empty cluster + continue; + + ++i; + + if (i >= 100) + break; + } + + // NOTE: if we get here, it means that we didn't find a block with + // a matching track number after lots of searching, so we give + // up trying. + + pNextEntry = GetEOS(); // so we can return a non-NULL value + return 1; +} + +bool Track::VetEntry(const BlockEntry* pBlockEntry) const { + assert(pBlockEntry); + const Block* const pBlock = pBlockEntry->GetBlock(); + assert(pBlock); + assert(pBlock->GetTrackNumber() == m_info.number); + if (!pBlock || pBlock->GetTrackNumber() != m_info.number) + return false; + + // This function is used during a seek to determine whether the + // frame is a valid seek target. This default function simply + // returns true, which means all frames are valid seek targets. + // It gets overridden by the VideoTrack class, because only video + // keyframes can be used as seek target. + + return true; +} + +long Track::Seek(long long time_ns, const BlockEntry*& pResult) const { + const long status = GetFirst(pResult); + + if (status < 0) // buffer underflow, etc + return status; + + assert(pResult); + + if (pResult->EOS()) + return 0; + + const Cluster* pCluster = pResult->GetCluster(); + assert(pCluster); + assert(pCluster->GetIndex() >= 0); + + if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) + return 0; + + Cluster** const clusters = m_pSegment->m_clusters; + assert(clusters); + + const long count = m_pSegment->GetCount(); // loaded only, not preloaded + assert(count > 0); + + Cluster** const i = clusters + pCluster->GetIndex(); + assert(i); + assert(*i == pCluster); + assert(pCluster->GetTime() <= time_ns); + + Cluster** const j = clusters + count; + + Cluster** lo = i; + Cluster** hi = j; + + while (lo < hi) { + // INVARIANT: + //[i, lo) <= time_ns + //[lo, hi) ? + //[hi, j) > time_ns + + Cluster** const mid = lo + (hi - lo) / 2; + assert(mid < hi); + + pCluster = *mid; + assert(pCluster); + assert(pCluster->GetIndex() >= 0); + assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); + + const long long t = pCluster->GetTime(); + + if (t <= time_ns) + lo = mid + 1; + else + hi = mid; + + assert(lo <= hi); + } + + assert(lo == hi); + assert(lo > i); + assert(lo <= j); + + while (lo > i) { + pCluster = *--lo; + assert(pCluster); + assert(pCluster->GetTime() <= time_ns); + + pResult = pCluster->GetEntry(this); + + if ((pResult != 0) && !pResult->EOS()) + return 0; + + // landed on empty cluster (no entries) + } + + pResult = GetEOS(); // weird + return 0; +} + +const ContentEncoding* Track::GetContentEncodingByIndex( + unsigned long idx) const { + const ptrdiff_t count = + content_encoding_entries_end_ - content_encoding_entries_; + assert(count >= 0); + + if (idx >= static_cast(count)) + return NULL; + + return content_encoding_entries_[idx]; +} + +unsigned long Track::GetContentEncodingCount() const { + const ptrdiff_t count = + content_encoding_entries_end_ - content_encoding_entries_; + assert(count >= 0); + + return static_cast(count); +} + +long Track::ParseContentEncodingsEntry(long long start, long long size) { + IMkvReader* const pReader = m_pSegment->m_pReader; + assert(pReader); + + long long pos = start; + const long long stop = start + size; + + // Count ContentEncoding elements. + int count = 0; + while (pos < stop) { + long long id, size; + const long status = ParseElementHeader(pReader, pos, stop, id, size); + if (status < 0) // error + return status; + + // pos now designates start of element + if (id == libwebm::kMkvContentEncoding) + ++count; + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (count <= 0) + return -1; + + content_encoding_entries_ = new ContentEncoding*[count]; + content_encoding_entries_end_ = content_encoding_entries_; + + pos = start; + while (pos < stop) { + long long id, size; + long status = ParseElementHeader(pReader, pos, stop, id, size); + if (status < 0) // error + return status; + + // pos now designates start of element + if (id == libwebm::kMkvContentEncoding) { + ContentEncoding* const content_encoding = new ContentEncoding(); + + status = content_encoding->ParseContentEncodingEntry(pos, size, pReader); + if (status) { + delete content_encoding; + return status; + } + + *content_encoding_entries_end_++ = content_encoding; + } + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + + return 0; +} + +Track::EOSBlock::EOSBlock() : BlockEntry(NULL, LONG_MIN) {} + +BlockEntry::Kind Track::EOSBlock::GetKind() const { return kBlockEOS; } + +const Block* Track::EOSBlock::GetBlock() const { return NULL; } + +bool PrimaryChromaticity::Parse(IMkvReader* reader, long long read_pos, + long long value_size, bool is_x, + PrimaryChromaticity** chromaticity) { + if (!reader) + return false; + + my_auto_ptr chromaticity_ptr(*chromaticity ? *chromaticity : new PrimaryChromaticity()); + + float* value = is_x ? &chromaticity_ptr->x : &chromaticity_ptr->y; + + double parser_value = 0; + const long long value_parse_status = + UnserializeFloat(reader, read_pos, value_size, parser_value); + + *value = static_cast(parser_value); + + if (value_parse_status < 0 || *value < 0.0 || *value > 1.0) + return false; + + *chromaticity = chromaticity_ptr.release(); + return true; +} + +bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start, + long long mm_size, MasteringMetadata** mm) { + if (!reader || *mm) + return false; + + my_auto_ptr mm_ptr(new MasteringMetadata()); + + const long long mm_end = mm_start + mm_size; + long long read_pos = mm_start; + + while (read_pos < mm_end) { + long long child_id = 0; + long long child_size = 0; + + const long long status = + ParseElementHeader(reader, read_pos, mm_end, child_id, child_size); + if (status < 0) + return false; + + if (child_id == libwebm::kMkvLuminanceMax) { + double value = 0; + const long long value_parse_status = + UnserializeFloat(reader, read_pos, child_size, value); + mm_ptr->luminance_max = static_cast(value); + if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 || + mm_ptr->luminance_max > 9999.99) { + return false; + } + } else if (child_id == libwebm::kMkvLuminanceMin) { + double value = 0; + const long long value_parse_status = + UnserializeFloat(reader, read_pos, child_size, value); + mm_ptr->luminance_min = static_cast(value); + if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 || + mm_ptr->luminance_min > 999.9999) { + return false; + } + } else { + bool is_x = false; + PrimaryChromaticity** chromaticity; + switch (child_id) { + case libwebm::kMkvPrimaryRChromaticityX: + case libwebm::kMkvPrimaryRChromaticityY: + is_x = child_id == libwebm::kMkvPrimaryRChromaticityX; + chromaticity = &mm_ptr->r; + break; + case libwebm::kMkvPrimaryGChromaticityX: + case libwebm::kMkvPrimaryGChromaticityY: + is_x = child_id == libwebm::kMkvPrimaryGChromaticityX; + chromaticity = &mm_ptr->g; + break; + case libwebm::kMkvPrimaryBChromaticityX: + case libwebm::kMkvPrimaryBChromaticityY: + is_x = child_id == libwebm::kMkvPrimaryBChromaticityX; + chromaticity = &mm_ptr->b; + break; + case libwebm::kMkvWhitePointChromaticityX: + case libwebm::kMkvWhitePointChromaticityY: + is_x = child_id == libwebm::kMkvWhitePointChromaticityX; + chromaticity = &mm_ptr->white_point; + break; + default: + return false; + } + const bool value_parse_status = PrimaryChromaticity::Parse( + reader, read_pos, child_size, is_x, chromaticity); + if (!value_parse_status) + return false; + } + + read_pos += child_size; + if (read_pos > mm_end) + return false; + } + + *mm = mm_ptr.release(); + return true; +} + +bool Colour::Parse(IMkvReader* reader, long long colour_start, + long long colour_size, Colour** colour) { + if (!reader || *colour) + return false; + + my_auto_ptr colour_ptr(new Colour()); + + const long long colour_end = colour_start + colour_size; + long long read_pos = colour_start; + + while (read_pos < colour_end) { + long long child_id = 0; + long long child_size = 0; + + const long status = + ParseElementHeader(reader, read_pos, colour_end, child_id, child_size); + if (status < 0) + return false; + + if (child_id == libwebm::kMkvMatrixCoefficients) { + colour_ptr->matrix_coefficients = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->matrix_coefficients < 0) + return false; + } else if (child_id == libwebm::kMkvBitsPerChannel) { + colour_ptr->bits_per_channel = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->bits_per_channel < 0) + return false; + } else if (child_id == libwebm::kMkvChromaSubsamplingHorz) { + colour_ptr->chroma_subsampling_horz = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->chroma_subsampling_horz < 0) + return false; + } else if (child_id == libwebm::kMkvChromaSubsamplingVert) { + colour_ptr->chroma_subsampling_vert = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->chroma_subsampling_vert < 0) + return false; + } else if (child_id == libwebm::kMkvCbSubsamplingHorz) { + colour_ptr->cb_subsampling_horz = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->cb_subsampling_horz < 0) + return false; + } else if (child_id == libwebm::kMkvCbSubsamplingVert) { + colour_ptr->cb_subsampling_vert = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->cb_subsampling_vert < 0) + return false; + } else if (child_id == libwebm::kMkvChromaSitingHorz) { + colour_ptr->chroma_siting_horz = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->chroma_siting_horz < 0) + return false; + } else if (child_id == libwebm::kMkvChromaSitingVert) { + colour_ptr->chroma_siting_vert = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->chroma_siting_vert < 0) + return false; + } else if (child_id == libwebm::kMkvRange) { + colour_ptr->range = UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->range < 0) + return false; + } else if (child_id == libwebm::kMkvTransferCharacteristics) { + colour_ptr->transfer_characteristics = + UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->transfer_characteristics < 0) + return false; + } else if (child_id == libwebm::kMkvPrimaries) { + colour_ptr->primaries = UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->primaries < 0) + return false; + } else if (child_id == libwebm::kMkvMaxCLL) { + colour_ptr->max_cll = UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->max_cll < 0) + return false; + } else if (child_id == libwebm::kMkvMaxFALL) { + colour_ptr->max_fall = UnserializeUInt(reader, read_pos, child_size); + if (colour_ptr->max_fall < 0) + return false; + } else if (child_id == libwebm::kMkvMasteringMetadata) { + if (!MasteringMetadata::Parse(reader, read_pos, child_size, + &colour_ptr->mastering_metadata)) + return false; + } else { + return false; + } + + read_pos += child_size; + if (read_pos > colour_end) + return false; + } + *colour = colour_ptr.release(); + return true; +} + +VideoTrack::VideoTrack(Segment* pSegment, long long element_start, + long long element_size) + : Track(pSegment, element_start, element_size), m_colour(NULL) {} + +VideoTrack::~VideoTrack() { delete m_colour; } + +long VideoTrack::Parse(Segment* pSegment, const Info& info, + long long element_start, long long element_size, + VideoTrack*& pResult) { + if (pResult) + return -1; + + if (info.type != Track::kVideo) + return -1; + + long long width = 0; + long long height = 0; + long long display_width = 0; + long long display_height = 0; + long long display_unit = 0; + long long stereo_mode = 0; + + double rate = 0.0; + + IMkvReader* const pReader = pSegment->m_pReader; + + const Settings& s = info.settings; + assert(s.start >= 0); + assert(s.size >= 0); + + long long pos = s.start; + assert(pos >= 0); + + const long long stop = pos + s.size; + + Colour* colour = NULL; + + while (pos < stop) { + long long id, size; + + const long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) // error + return status; + + if (id == libwebm::kMkvPixelWidth) { + width = UnserializeUInt(pReader, pos, size); + + if (width <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvPixelHeight) { + height = UnserializeUInt(pReader, pos, size); + + if (height <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvDisplayWidth) { + display_width = UnserializeUInt(pReader, pos, size); + + if (display_width <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvDisplayHeight) { + display_height = UnserializeUInt(pReader, pos, size); + + if (display_height <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvDisplayUnit) { + display_unit = UnserializeUInt(pReader, pos, size); + + if (display_unit < 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvStereoMode) { + stereo_mode = UnserializeUInt(pReader, pos, size); + + if (stereo_mode < 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvFrameRate) { + const long status = UnserializeFloat(pReader, pos, size, rate); + + if (status < 0) + return status; + + if (rate <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvColour) { + if (!Colour::Parse(pReader, pos, size, &colour)) + return E_FILE_FORMAT_INVALID; + } + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + + VideoTrack* const pTrack = new VideoTrack(pSegment, element_start, element_size); + + const int status = info.Copy(pTrack->m_info); + + if (status) { // error + delete pTrack; + return status; + } + + pTrack->m_width = width; + pTrack->m_height = height; + pTrack->m_display_width = display_width; + pTrack->m_display_height = display_height; + pTrack->m_display_unit = display_unit; + pTrack->m_stereo_mode = stereo_mode; + pTrack->m_rate = rate; + pTrack->m_colour = colour; + + pResult = pTrack; + return 0; // success +} + +bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const { + return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey(); +} + +long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const { + const long status = GetFirst(pResult); + + if (status < 0) // buffer underflow, etc + return status; + + assert(pResult); + + if (pResult->EOS()) + return 0; + + const Cluster* pCluster = pResult->GetCluster(); + assert(pCluster); + assert(pCluster->GetIndex() >= 0); + + if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) + return 0; + + Cluster** const clusters = m_pSegment->m_clusters; + assert(clusters); + + const long count = m_pSegment->GetCount(); // loaded only, not pre-loaded + assert(count > 0); + + Cluster** const i = clusters + pCluster->GetIndex(); + assert(i); + assert(*i == pCluster); + assert(pCluster->GetTime() <= time_ns); + + Cluster** const j = clusters + count; + + Cluster** lo = i; + Cluster** hi = j; + + while (lo < hi) { + // INVARIANT: + //[i, lo) <= time_ns + //[lo, hi) ? + //[hi, j) > time_ns + + Cluster** const mid = lo + (hi - lo) / 2; + assert(mid < hi); + + pCluster = *mid; + assert(pCluster); + assert(pCluster->GetIndex() >= 0); + assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); + + const long long t = pCluster->GetTime(); + + if (t <= time_ns) + lo = mid + 1; + else + hi = mid; + + assert(lo <= hi); + } + + assert(lo == hi); + assert(lo > i); + assert(lo <= j); + + pCluster = *--lo; + assert(pCluster); + assert(pCluster->GetTime() <= time_ns); + + pResult = pCluster->GetEntry(this, time_ns); + + if ((pResult != 0) && !pResult->EOS()) // found a keyframe + return 0; + + while (lo != i) { + pCluster = *--lo; + assert(pCluster); + assert(pCluster->GetTime() <= time_ns); + + pResult = pCluster->GetEntry(this, time_ns); + + if ((pResult != 0) && !pResult->EOS()) + return 0; + } + + // weird: we're on the first cluster, but no keyframe found + // should never happen but we must return something anyway + + pResult = GetEOS(); + return 0; +} + +Colour* VideoTrack::GetColour() const { return m_colour; } + +long long VideoTrack::GetWidth() const { return m_width; } + +long long VideoTrack::GetHeight() const { return m_height; } + +long long VideoTrack::GetDisplayWidth() const { + return m_display_width > 0 ? m_display_width : GetWidth(); +} + +long long VideoTrack::GetDisplayHeight() const { + return m_display_height > 0 ? m_display_height : GetHeight(); +} + +long long VideoTrack::GetDisplayUnit() const { return m_display_unit; } + +long long VideoTrack::GetStereoMode() const { return m_stereo_mode; } + +double VideoTrack::GetFrameRate() const { return m_rate; } + +AudioTrack::AudioTrack(Segment* pSegment, long long element_start, + long long element_size) + : Track(pSegment, element_start, element_size) {} + +long AudioTrack::Parse(Segment* pSegment, const Info& info, + long long element_start, long long element_size, + AudioTrack*& pResult) { + if (pResult) + return -1; + + if (info.type != Track::kAudio) + return -1; + + IMkvReader* const pReader = pSegment->m_pReader; + + const Settings& s = info.settings; + assert(s.start >= 0); + assert(s.size >= 0); + + long long pos = s.start; + assert(pos >= 0); + + const long long stop = pos + s.size; + + double rate = 8000.0; // MKV default + long long channels = 1; + long long bit_depth = 0; + + while (pos < stop) { + long long id, size; + + long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) // error + return status; + + if (id == libwebm::kMkvSamplingFrequency) { + status = UnserializeFloat(pReader, pos, size, rate); + + if (status < 0) + return status; + + if (rate <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvChannels) { + channels = UnserializeUInt(pReader, pos, size); + + if (channels <= 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvBitDepth) { + bit_depth = UnserializeUInt(pReader, pos, size); + + if (bit_depth <= 0) + return E_FILE_FORMAT_INVALID; + } + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + + AudioTrack* const pTrack = new AudioTrack(pSegment, element_start, element_size); + + const int status = info.Copy(pTrack->m_info); + + if (status) { + delete pTrack; + return status; + } + + pTrack->m_rate = rate; + pTrack->m_channels = channels; + pTrack->m_bitDepth = bit_depth; + + pResult = pTrack; + return 0; // success +} + +double AudioTrack::GetSamplingRate() const { return m_rate; } + +long long AudioTrack::GetChannels() const { return m_channels; } + +long long AudioTrack::GetBitDepth() const { return m_bitDepth; } + +Tracks::Tracks(Segment* pSegment, long long start, long long size_, + long long element_start, long long element_size) + : m_pSegment(pSegment), + m_start(start), + m_size(size_), + m_element_start(element_start), + m_element_size(element_size), + m_trackEntries(NULL), + m_trackEntriesEnd(NULL) {} + +long Tracks::Parse() { + assert(m_trackEntries == NULL); + assert(m_trackEntriesEnd == NULL); + + const long long stop = m_start + m_size; + IMkvReader* const pReader = m_pSegment->m_pReader; + + int count = 0; + long long pos = m_start; + + while (pos < stop) { + long long id, size; + + const long status = ParseElementHeader(pReader, pos, stop, id, size); + + if (status < 0) // error + return status; + + if (size == 0) // weird + continue; + + if (id == libwebm::kMkvTrackEntry) + ++count; + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + + if (count <= 0) + return 0; // success + + m_trackEntries = new Track*[count]; + m_trackEntriesEnd = m_trackEntries; + + pos = m_start; + + while (pos < stop) { + const long long element_start = pos; + + long long id, payload_size; + + const long status = + ParseElementHeader(pReader, pos, stop, id, payload_size); + + if (status < 0) // error + return status; + + if (payload_size == 0) // weird + continue; + + const long long payload_stop = pos + payload_size; + assert(payload_stop <= stop); // checked in ParseElement + + const long long element_size = payload_stop - element_start; + + if (id == libwebm::kMkvTrackEntry) { + Track*& pTrack = *m_trackEntriesEnd; + pTrack = NULL; + + const long status = ParseTrackEntry(pos, payload_size, element_start, + element_size, pTrack); + if (status) + return status; + + if (pTrack) + ++m_trackEntriesEnd; + } + + pos = payload_stop; + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + + return 0; // success +} + +unsigned long Tracks::GetTracksCount() const { + const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries; + assert(result >= 0); + + return static_cast(result); +} + +long Tracks::ParseTrackEntry(long long track_start, long long track_size, + long long element_start, long long element_size, + Track*& pResult) const { + if (pResult) + return -1; + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long pos = track_start; + const long long track_stop = track_start + track_size; + + Track::Info info; + + info.type = 0; + info.number = 0; + info.uid = 0; + info.defaultDuration = 0; + + Track::Settings v; + v.start = -1; + v.size = -1; + + Track::Settings a; + a.start = -1; + a.size = -1; + + Track::Settings e; // content_encodings_settings; + e.start = -1; + e.size = -1; + + long long lacing = 1; // default is true + + while (pos < track_stop) { + long long id, size; + + const long status = ParseElementHeader(pReader, pos, track_stop, id, size); + + if (status < 0) // error + return status; + + if (size < 0) + return E_FILE_FORMAT_INVALID; + + const long long start = pos; + + if (id == libwebm::kMkvVideo) { + v.start = start; + v.size = size; + } else if (id == libwebm::kMkvAudio) { + a.start = start; + a.size = size; + } else if (id == libwebm::kMkvContentEncodings) { + e.start = start; + e.size = size; + } else if (id == libwebm::kMkvTrackUID) { + if (size > 8) + return E_FILE_FORMAT_INVALID; + + info.uid = 0; + + long long pos_ = start; + const long long pos_end = start + size; + + while (pos_ != pos_end) { + unsigned char b; + + const int status = pReader->Read(pos_, 1, &b); + + if (status) + return status; + + info.uid <<= 8; + info.uid |= b; + + ++pos_; + } + } else if (id == libwebm::kMkvTrackNumber) { + const long long num = UnserializeUInt(pReader, pos, size); + + if ((num <= 0) || (num > 127)) + return E_FILE_FORMAT_INVALID; + + info.number = static_cast(num); + } else if (id == libwebm::kMkvTrackType) { + const long long type = UnserializeUInt(pReader, pos, size); + + if ((type <= 0) || (type > 254)) + return E_FILE_FORMAT_INVALID; + + info.type = static_cast(type); + } else if (id == libwebm::kMkvName) { + const long status = + UnserializeString(pReader, pos, size, info.nameAsUTF8); + + if (status) + return status; + } else if (id == libwebm::kMkvLanguage) { + const long status = UnserializeString(pReader, pos, size, info.language); + + if (status) + return status; + } else if (id == libwebm::kMkvDefaultDuration) { + const long long duration = UnserializeUInt(pReader, pos, size); + + if (duration < 0) + return E_FILE_FORMAT_INVALID; + + info.defaultDuration = static_cast(duration); + } else if (id == libwebm::kMkvCodecID) { + const long status = UnserializeString(pReader, pos, size, info.codecId); + + if (status) + return status; + } else if (id == libwebm::kMkvFlagLacing) { + lacing = UnserializeUInt(pReader, pos, size); + + if ((lacing < 0) || (lacing > 1)) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvCodecPrivate) { + delete[] info.codecPrivate; + info.codecPrivate = NULL; + info.codecPrivateSize = 0; + + const size_t buflen = static_cast(size); + + if (buflen) { + unsigned char* buf = SafeArrayAlloc(1, buflen); + + if (buf == NULL) + return -1; + + const int status = pReader->Read(pos, static_cast(buflen), buf); + + if (status) { + delete[] buf; + return status; + } + + info.codecPrivate = buf; + info.codecPrivateSize = buflen; + } + } else if (id == libwebm::kMkvCodecName) { + const long status = + UnserializeString(pReader, pos, size, info.codecNameAsUTF8); + + if (status) + return status; + } else if (id == libwebm::kMkvCodecDelay) { + info.codecDelay = UnserializeUInt(pReader, pos, size); + } else if (id == libwebm::kMkvSeekPreRoll) { + info.seekPreRoll = UnserializeUInt(pReader, pos, size); + } + + pos += size; // consume payload + if (pos > track_stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != track_stop) + return E_FILE_FORMAT_INVALID; + + if (info.number <= 0) // not specified + return E_FILE_FORMAT_INVALID; + + if (GetTrackByNumber(info.number)) + return E_FILE_FORMAT_INVALID; + + if (info.type <= 0) // not specified + return E_FILE_FORMAT_INVALID; + + info.lacing = (lacing > 0) ? true : false; + + if (info.type == Track::kVideo) { + if (v.start < 0) + return E_FILE_FORMAT_INVALID; + + if (a.start >= 0) + return E_FILE_FORMAT_INVALID; + + info.settings = v; + + VideoTrack* pTrack = NULL; + + const long status = VideoTrack::Parse(m_pSegment, info, element_start, + element_size, pTrack); + + if (status) + return status; + + pResult = pTrack; + assert(pResult); + + if (e.start >= 0) + pResult->ParseContentEncodingsEntry(e.start, e.size); + } else if (info.type == Track::kAudio) { + if (a.start < 0) + return E_FILE_FORMAT_INVALID; + + if (v.start >= 0) + return E_FILE_FORMAT_INVALID; + + info.settings = a; + + AudioTrack* pTrack = NULL; + + const long status = AudioTrack::Parse(m_pSegment, info, element_start, + element_size, pTrack); + + if (status) + return status; + + pResult = pTrack; + assert(pResult); + + if (e.start >= 0) + pResult->ParseContentEncodingsEntry(e.start, e.size); + } else { + // neither video nor audio - probably metadata or subtitles + + if (a.start >= 0) + return E_FILE_FORMAT_INVALID; + + if (v.start >= 0) + return E_FILE_FORMAT_INVALID; + + if (info.type == Track::kMetadata && e.start >= 0) + return E_FILE_FORMAT_INVALID; + + info.settings.start = -1; + info.settings.size = 0; + + Track* pTrack = NULL; + + const long status = + Track::Create(m_pSegment, info, element_start, element_size, pTrack); + + if (status) + return status; + + pResult = pTrack; + assert(pResult); + } + + return 0; // success +} + +Tracks::~Tracks() { + Track** i = m_trackEntries; + Track** const j = m_trackEntriesEnd; + + while (i != j) { + Track* const pTrack = *i++; + delete pTrack; + } + + delete[] m_trackEntries; +} + +const Track* Tracks::GetTrackByNumber(long tn) const { + if (tn < 0) + return NULL; + + Track** i = m_trackEntries; + Track** const j = m_trackEntriesEnd; + + while (i != j) { + Track* const pTrack = *i++; + + if (pTrack == NULL) + continue; + + if (tn == pTrack->GetNumber()) + return pTrack; + } + + return NULL; // not found +} + +const Track* Tracks::GetTrackByIndex(unsigned long idx) const { + const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries; + + if (idx >= static_cast(count)) + return NULL; + + return m_trackEntries[idx]; +} + +long Cluster::Load(long long& pos, long& len) const { + if (m_pSegment == NULL) + return E_PARSE_FAILED; + + if (m_timecode >= 0) // at least partially loaded + return 0; + + if (m_pos != m_element_start || m_element_size >= 0) + return E_PARSE_FAILED; + + IMkvReader* const pReader = m_pSegment->m_pReader; + long long total, avail; + const int status = pReader->Length(&total, &avail); + + if (status < 0) // error + return status; + + if (total >= 0 && (avail > total || m_pos > total)) + return E_FILE_FORMAT_INVALID; + + pos = m_pos; + + long long cluster_size = -1; + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error or underflow + return static_cast(result); + + if (result > 0) + return E_BUFFER_NOT_FULL; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id_ = ReadID(pReader, pos, len); + + if (id_ < 0) // error + return static_cast(id_); + + if (id_ != libwebm::kMkvCluster) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume id + + // read cluster size + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) + return E_BUFFER_NOT_FULL; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) // error + return static_cast(cluster_size); + + if (size == 0) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume length of size of element + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size != unknown_size) + cluster_size = size; + + // pos points to start of payload + long long timecode = -1; + long long new_pos = -1; + bool bBlock = false; + + long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size; + + for (;;) { + if ((cluster_stop >= 0) && (pos >= cluster_stop)) + break; + + // Parse ID + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadID(pReader, pos, len); + + if (id < 0) // error + return static_cast(id); + + if (id == 0) + return E_FILE_FORMAT_INVALID; + + // This is the distinguished set of ID's we use to determine + // that we have exhausted the sub-element's inside the cluster + // whose ID we parsed earlier. + + if (id == libwebm::kMkvCluster) + break; + + if (id == libwebm::kMkvCues) + break; + + pos += len; // consume ID field + + // Parse Size + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) // error + return static_cast(size); + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume size field + + if ((cluster_stop >= 0) && (pos > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + // pos now points to start of payload + + if (size == 0) + continue; + + if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if (id == libwebm::kMkvTimecode) { + len = static_cast(size); + + if ((pos + size) > avail) + return E_BUFFER_NOT_FULL; + + timecode = UnserializeUInt(pReader, pos, size); + + if (timecode < 0) // error (or underflow) + return static_cast(timecode); + + new_pos = pos + size; + + if (bBlock) + break; + } else if (id == libwebm::kMkvBlockGroup) { + bBlock = true; + break; + } else if (id == libwebm::kMkvSimpleBlock) { + bBlock = true; + break; + } + + pos += size; // consume payload + if (cluster_stop >= 0 && pos > cluster_stop) + return E_FILE_FORMAT_INVALID; + } + + if (cluster_stop >= 0 && pos > cluster_stop) + return E_FILE_FORMAT_INVALID; + + if (timecode < 0) // no timecode found + return E_FILE_FORMAT_INVALID; + + if (!bBlock) + return E_FILE_FORMAT_INVALID; + + m_pos = new_pos; // designates position just beyond timecode payload + m_timecode = timecode; // m_timecode >= 0 means we're partially loaded + + if (cluster_size >= 0) + m_element_size = cluster_stop - m_element_start; + + return 0; +} + +long Cluster::Parse(long long& pos, long& len) const { + long status = Load(pos, len); + + if (status < 0) + return status; + + if (m_pos < m_element_start || m_timecode < 0) + return E_PARSE_FAILED; + + const long long cluster_stop = + (m_element_size < 0) ? -1 : m_element_start + m_element_size; + + if ((cluster_stop >= 0) && (m_pos >= cluster_stop)) + return 1; // nothing else to do + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long total, avail; + + status = pReader->Length(&total, &avail); + + if (status < 0) // error + return status; + + if (total >= 0 && avail > total) + return E_FILE_FORMAT_INVALID; + + pos = m_pos; + + for (;;) { + if ((cluster_stop >= 0) && (pos >= cluster_stop)) + break; + + if ((total >= 0) && (pos >= total)) { + if (m_element_size < 0) + m_element_size = pos - m_element_start; + + break; + } + + // Parse ID + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadID(pReader, pos, len); + + if (id < 0) + return E_FILE_FORMAT_INVALID; + + // This is the distinguished set of ID's we use to determine + // that we have exhausted the sub-element's inside the cluster + // whose ID we parsed earlier. + + if ((id == libwebm::kMkvCluster) || (id == libwebm::kMkvCues)) { + if (m_element_size < 0) + m_element_size = pos - m_element_start; + + break; + } + + pos += len; // consume ID field + + // Parse Size + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) // error + return static_cast(size); + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume size field + + if ((cluster_stop >= 0) && (pos > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + // pos now points to start of payload + + if (size == 0) + continue; + + // const long long block_start = pos; + const long long block_stop = pos + size; + + if (cluster_stop >= 0) { + if (block_stop > cluster_stop) { + if (id == libwebm::kMkvBlockGroup || id == libwebm::kMkvSimpleBlock) { + return E_FILE_FORMAT_INVALID; + } + + pos = cluster_stop; + break; + } + } else if ((total >= 0) && (block_stop > total)) { + m_element_size = total - m_element_start; + pos = total; + break; + } else if (block_stop > avail) { + len = static_cast(size); + return E_BUFFER_NOT_FULL; + } + + Cluster* const this_ = const_cast(this); + + if (id == libwebm::kMkvBlockGroup) + return this_->ParseBlockGroup(size, pos, len); + + if (id == libwebm::kMkvSimpleBlock) + return this_->ParseSimpleBlock(size, pos, len); + + pos += size; // consume payload + if (cluster_stop >= 0 && pos > cluster_stop) + return E_FILE_FORMAT_INVALID; + } + + if (m_element_size < 1) + return E_FILE_FORMAT_INVALID; + + m_pos = pos; + if (cluster_stop >= 0 && m_pos > cluster_stop) + return E_FILE_FORMAT_INVALID; + + if (m_entries_count > 0) { + const long idx = m_entries_count - 1; + + const BlockEntry* const pLast = m_entries[idx]; + if (pLast == NULL) + return E_PARSE_FAILED; + + const Block* const pBlock = pLast->GetBlock(); + if (pBlock == NULL) + return E_PARSE_FAILED; + + const long long start = pBlock->m_start; + + if ((total >= 0) && (start > total)) + return E_PARSE_FAILED; // defend against trucated stream + + const long long size = pBlock->m_size; + + const long long stop = start + size; + if (cluster_stop >= 0 && stop > cluster_stop) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && (stop > total)) + return E_PARSE_FAILED; // defend against trucated stream + } + + return 1; // no more entries +} + +long Cluster::ParseSimpleBlock(long long block_size, long long& pos, + long& len) { + const long long block_start = pos; + const long long block_stop = pos + block_size; + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long total, avail; + + long status = pReader->Length(&total, &avail); + + if (status < 0) // error + return status; + + assert((total < 0) || (avail <= total)); + + // parse track number + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // weird + return E_BUFFER_NOT_FULL; + + if ((pos + len) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long track = ReadUInt(pReader, pos, len); + + if (track < 0) // error + return static_cast(track); + + if (track == 0) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume track number + + if ((pos + 2) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + 2) > avail) { + len = 2; + return E_BUFFER_NOT_FULL; + } + + pos += 2; // consume timecode + + if ((pos + 1) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + unsigned char flags; + + status = pReader->Read(pos, 1, &flags); + + if (status < 0) { // error or underflow + len = 1; + return status; + } + + ++pos; // consume flags byte + assert(pos <= avail); + + if (pos >= block_stop) + return E_FILE_FORMAT_INVALID; + + const int lacing = int(flags & 0x06) >> 1; + + if ((lacing != 0) && (block_stop > avail)) { + len = static_cast(block_stop - pos); + return E_BUFFER_NOT_FULL; + } + + status = CreateBlock(libwebm::kMkvSimpleBlock, block_start, block_size, + 0); // DiscardPadding + + if (status != 0) + return status; + + m_pos = block_stop; + + return 0; // success +} + +long Cluster::ParseBlockGroup(long long payload_size, long long& pos, + long& len) { + const long long payload_start = pos; + const long long payload_stop = pos + payload_size; + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long total, avail; + + long status = pReader->Length(&total, &avail); + + if (status < 0) // error + return status; + + assert((total < 0) || (avail <= total)); + + if ((total >= 0) && (payload_stop > total)) + return E_FILE_FORMAT_INVALID; + + if (payload_stop > avail) { + len = static_cast(payload_size); + return E_BUFFER_NOT_FULL; + } + + long long discard_padding = 0; + + while (pos < payload_stop) { + // parse sub-block element ID + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // weird + return E_BUFFER_NOT_FULL; + + if ((pos + len) > payload_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadID(pReader, pos, len); + + if (id < 0) // error + return static_cast(id); + + if (id == 0) // not a valid ID + return E_FILE_FORMAT_INVALID; + + pos += len; // consume ID field + + // Parse Size + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // weird + return E_BUFFER_NOT_FULL; + + if ((pos + len) > payload_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) // error + return static_cast(size); + + pos += len; // consume size field + + // pos now points to start of sub-block group payload + + if (pos > payload_stop) + return E_FILE_FORMAT_INVALID; + + if (size == 0) // weird + continue; + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + if (id == libwebm::kMkvDiscardPadding) { + status = UnserializeInt(pReader, pos, size, discard_padding); + + if (status < 0) // error + return status; + } + + if (id != libwebm::kMkvBlock) { + pos += size; // consume sub-part of block group + + if (pos > payload_stop) + return E_FILE_FORMAT_INVALID; + + continue; + } + + const long long block_stop = pos + size; + + if (block_stop > payload_stop) + return E_FILE_FORMAT_INVALID; + + // parse track number + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // weird + return E_BUFFER_NOT_FULL; + + if ((pos + len) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long track = ReadUInt(pReader, pos, len); + + if (track < 0) // error + return static_cast(track); + + if (track == 0) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume track number + + if ((pos + 2) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + 2) > avail) { + len = 2; + return E_BUFFER_NOT_FULL; + } + + pos += 2; // consume timecode + + if ((pos + 1) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + unsigned char flags; + + status = pReader->Read(pos, 1, &flags); + + if (status < 0) { // error or underflow + len = 1; + return status; + } + + ++pos; // consume flags byte + assert(pos <= avail); + + if (pos >= block_stop) + return E_FILE_FORMAT_INVALID; + + const int lacing = int(flags & 0x06) >> 1; + + if ((lacing != 0) && (block_stop > avail)) { + len = static_cast(block_stop - pos); + return E_BUFFER_NOT_FULL; + } + + pos = block_stop; // consume block-part of block group + if (pos > payload_stop) + return E_FILE_FORMAT_INVALID; + } + + if (pos != payload_stop) + return E_FILE_FORMAT_INVALID; + + status = CreateBlock(libwebm::kMkvBlockGroup, payload_start, payload_size, + discard_padding); + if (status != 0) + return status; + + m_pos = payload_stop; + + return 0; // success +} + +long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const { + assert(m_pos >= m_element_start); + + pEntry = NULL; + + if (index < 0) + return -1; // generic error + + if (m_entries_count < 0) + return E_BUFFER_NOT_FULL; + + assert(m_entries); + assert(m_entries_size > 0); + assert(m_entries_count <= m_entries_size); + + if (index < m_entries_count) { + pEntry = m_entries[index]; + assert(pEntry); + + return 1; // found entry + } + + if (m_element_size < 0) // we don't know cluster end yet + return E_BUFFER_NOT_FULL; // underflow + + const long long element_stop = m_element_start + m_element_size; + + if (m_pos >= element_stop) + return 0; // nothing left to parse + + return E_BUFFER_NOT_FULL; // underflow, since more remains to be parsed +} + +Cluster* Cluster::Create(Segment* pSegment, long idx, long long off) { + if (!pSegment || off < 0) + return NULL; + + const long long element_start = pSegment->m_start + off; + + return new Cluster(pSegment, idx, element_start); +} + +Cluster::Cluster() + : m_pSegment(NULL), + m_element_start(0), + m_index(0), + m_pos(0), + m_element_size(0), + m_timecode(0), + m_entries(NULL), + m_entries_size(0), + m_entries_count(0) // means "no entries" +{} + +Cluster::Cluster(Segment* pSegment, long idx, long long element_start + /* long long element_size */) + : m_pSegment(pSegment), + m_element_start(element_start), + m_index(idx), + m_pos(element_start), + m_element_size(-1 /* element_size */), + m_timecode(-1), + m_entries(NULL), + m_entries_size(0), + m_entries_count(-1) // means "has not been parsed yet" +{} + +Cluster::~Cluster() { + if (m_entries_count <= 0) + return; + + BlockEntry** i = m_entries; + BlockEntry** const j = m_entries + m_entries_count; + + while (i != j) { + BlockEntry* p = *i++; + assert(p); + + delete p; + } + + delete[] m_entries; +} + +bool Cluster::EOS() const { return (m_pSegment == NULL); } + +long Cluster::GetIndex() const { return m_index; } + +long long Cluster::GetPosition() const { + const long long pos = m_element_start - m_pSegment->m_start; + assert(pos >= 0); + + return pos; +} + +long long Cluster::GetElementSize() const { return m_element_size; } + +long Cluster::HasBlockEntries( + const Segment* pSegment, + long long off, // relative to start of segment payload + long long& pos, long& len) { + assert(pSegment); + assert(off >= 0); // relative to segment + + IMkvReader* const pReader = pSegment->m_pReader; + + long long total, avail; + + long status = pReader->Length(&total, &avail); + + if (status < 0) // error + return status; + + assert((total < 0) || (avail <= total)); + + pos = pSegment->m_start + off; // absolute + + if ((total >= 0) && (pos >= total)) + return 0; // we don't even have a complete cluster + + const long long segment_stop = + (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size; + + long long cluster_stop = -1; // interpreted later to mean "unknown size" + + { + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // need more data + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && ((pos + len) > total)) + return 0; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadID(pReader, pos, len); + + if (id < 0) // error + return static_cast(id); + + if (id != libwebm::kMkvCluster) + return E_PARSE_FAILED; + + pos += len; // consume Cluster ID field + + // read size field + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && ((pos + len) > total)) + return 0; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) // error + return static_cast(size); + + if (size == 0) + return 0; // cluster does not have entries + + pos += len; // consume size field + + // pos now points to start of payload + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size != unknown_size) { + cluster_stop = pos + size; + assert(cluster_stop >= 0); + + if ((segment_stop >= 0) && (cluster_stop > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && (cluster_stop > total)) + // return E_FILE_FORMAT_INVALID; //too conservative + return 0; // cluster does not have any entries + } + } + + for (;;) { + if ((cluster_stop >= 0) && (pos >= cluster_stop)) + return 0; // no entries detected + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // need more data + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadID(pReader, pos, len); + + if (id < 0) // error + return static_cast(id); + + // This is the distinguished set of ID's we use to determine + // that we have exhausted the sub-element's inside the cluster + // whose ID we parsed earlier. + + if (id == libwebm::kMkvCluster) + return 0; // no entries found + + if (id == libwebm::kMkvCues) + return 0; // no entries found + + pos += len; // consume id field + + if ((cluster_stop >= 0) && (pos >= cluster_stop)) + return E_FILE_FORMAT_INVALID; + + // read size field + + if ((pos + 1) > avail) { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) // error + return static_cast(result); + + if (result > 0) // underflow + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) // error + return static_cast(size); + + pos += len; // consume size field + + // pos now points to start of payload + + if ((cluster_stop >= 0) && (pos > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if (size == 0) // weird + continue; + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; // not supported inside cluster + + if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if (id == libwebm::kMkvBlockGroup) + return 1; // have at least one entry + + if (id == libwebm::kMkvSimpleBlock) + return 1; // have at least one entry + + pos += size; // consume payload + if (cluster_stop >= 0 && pos > cluster_stop) + return E_FILE_FORMAT_INVALID; + } +} + +long long Cluster::GetTimeCode() const { + long long pos; + long len; + + const long status = Load(pos, len); + + if (status < 0) // error + return status; + + return m_timecode; +} + +long long Cluster::GetTime() const { + const long long tc = GetTimeCode(); + + if (tc < 0) + return tc; + + const SegmentInfo* const pInfo = m_pSegment->GetInfo(); + assert(pInfo); + + const long long scale = pInfo->GetTimeCodeScale(); + assert(scale >= 1); + + const long long t = m_timecode * scale; + + return t; +} + +long long Cluster::GetFirstTime() const { + const BlockEntry* pEntry; + + const long status = GetFirst(pEntry); + + if (status < 0) // error + return status; + + if (pEntry == NULL) // empty cluster + return GetTime(); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + return pBlock->GetTime(this); +} + +long long Cluster::GetLastTime() const { + const BlockEntry* pEntry; + + const long status = GetLast(pEntry); + + if (status < 0) // error + return status; + + if (pEntry == NULL) // empty cluster + return GetTime(); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + return pBlock->GetTime(this); +} + +long Cluster::CreateBlock(long long id, + long long pos, // absolute pos of payload + long long size, long long discard_padding) { + if (id != libwebm::kMkvBlockGroup && id != libwebm::kMkvSimpleBlock) + return E_PARSE_FAILED; + + if (m_entries_count < 0) { // haven't parsed anything yet + assert(m_entries == NULL); + assert(m_entries_size == 0); + + m_entries_size = 1024; + m_entries = new BlockEntry*[m_entries_size]; + + m_entries_count = 0; + } else { + assert(m_entries); + assert(m_entries_size > 0); + assert(m_entries_count <= m_entries_size); + + if (m_entries_count >= m_entries_size) { + const long entries_size = 2 * m_entries_size; + + BlockEntry** const entries = new BlockEntry*[entries_size]; + + BlockEntry** src = m_entries; + BlockEntry** const src_end = src + m_entries_count; + + BlockEntry** dst = entries; + + while (src != src_end) + *dst++ = *src++; + + delete[] m_entries; + + m_entries = entries; + m_entries_size = entries_size; + } + } + + if (id == libwebm::kMkvBlockGroup) + return CreateBlockGroup(pos, size, discard_padding); + else + return CreateSimpleBlock(pos, size); +} + +long Cluster::CreateBlockGroup(long long start_offset, long long size, + long long discard_padding) { + assert(m_entries); + assert(m_entries_size > 0); + assert(m_entries_count >= 0); + assert(m_entries_count < m_entries_size); + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long pos = start_offset; + const long long stop = start_offset + size; + + // For WebM files, there is a bias towards previous reference times + //(in order to support alt-ref frames, which refer back to the previous + // keyframe). Normally a 0 value is not possible, but here we tenatively + // allow 0 as the value of a reference frame, with the interpretation + // that this is a "previous" reference time. + + long long prev = 1; // nonce + long long next = 0; // nonce + long long duration = -1; // really, this is unsigned + + long long bpos = -1; + long long bsize = -1; + + while (pos < stop) { + long len; + const long long id = ReadID(pReader, pos, len); + if (id < 0 || (pos + len) > stop) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume ID + + const long long size = ReadUInt(pReader, pos, len); + assert(size >= 0); // TODO + assert((pos + len) <= stop); + + pos += len; // consume size + + if (id == libwebm::kMkvBlock) { + if (bpos < 0) { // Block ID + bpos = pos; + bsize = size; + } + } else if (id == libwebm::kMkvBlockDuration) { + if (size > 8) + return E_FILE_FORMAT_INVALID; + + duration = UnserializeUInt(pReader, pos, size); + + if (duration < 0) + return E_FILE_FORMAT_INVALID; + } else if (id == libwebm::kMkvReferenceBlock) { + if (size > 8 || size <= 0) + return E_FILE_FORMAT_INVALID; + const long size_ = static_cast(size); + + long long time; + + long status = UnserializeInt(pReader, pos, size_, time); + assert(status == 0); + if (status != 0) + return -1; + + if (time <= 0) // see note above + prev = time; + else + next = time; + } + + pos += size; // consume payload + if (pos > stop) + return E_FILE_FORMAT_INVALID; + } + if (bpos < 0) + return E_FILE_FORMAT_INVALID; + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + assert(bsize >= 0); + + const long idx = m_entries_count; + + BlockEntry** const ppEntry = m_entries + idx; + BlockEntry*& pEntry = *ppEntry; + + pEntry = new BlockGroup(this, idx, bpos, bsize, prev, next, duration, discard_padding); + + BlockGroup* const p = static_cast(pEntry); + + const long status = p->Parse(); + + if (status == 0) { // success + ++m_entries_count; + return 0; + } + + delete pEntry; + pEntry = 0; + + return status; +} + +long Cluster::CreateSimpleBlock(long long st, long long sz) { + assert(m_entries); + assert(m_entries_size > 0); + assert(m_entries_count >= 0); + assert(m_entries_count < m_entries_size); + + const long idx = m_entries_count; + + BlockEntry** const ppEntry = m_entries + idx; + BlockEntry*& pEntry = *ppEntry; + + pEntry = new SimpleBlock(this, idx, st, sz); + + SimpleBlock* const p = static_cast(pEntry); + + const long status = p->Parse(); + + if (status == 0) { + ++m_entries_count; + return 0; + } + + delete pEntry; + pEntry = 0; + + return status; +} + +long Cluster::GetFirst(const BlockEntry*& pFirst) const { + if (m_entries_count <= 0) { + long long pos; + long len; + + const long status = Parse(pos, len); + + if (status < 0) { // error + pFirst = NULL; + return status; + } + + if (m_entries_count <= 0) { // empty cluster + pFirst = NULL; + return 0; + } + } + + assert(m_entries); + + pFirst = m_entries[0]; + assert(pFirst); + + return 0; // success +} + +long Cluster::GetLast(const BlockEntry*& pLast) const { + for (;;) { + long long pos; + long len; + + const long status = Parse(pos, len); + + if (status < 0) { // error + pLast = NULL; + return status; + } + + if (status > 0) // no new block + break; + } + + if (m_entries_count <= 0) { + pLast = NULL; + return 0; + } + + assert(m_entries); + + const long idx = m_entries_count - 1; + + pLast = m_entries[idx]; + assert(pLast); + + return 0; +} + +long Cluster::GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const { + assert(pCurr); + assert(m_entries); + assert(m_entries_count > 0); + + size_t idx = pCurr->GetIndex(); + assert(idx < size_t(m_entries_count)); + assert(m_entries[idx] == pCurr); + + ++idx; + + if (idx >= size_t(m_entries_count)) { + long long pos; + long len; + + const long status = Parse(pos, len); + + if (status < 0) { // error + pNext = NULL; + return status; + } + + if (status > 0) { + pNext = NULL; + return 0; + } + + assert(m_entries); + assert(m_entries_count > 0); + assert(idx < size_t(m_entries_count)); + } + + pNext = m_entries[idx]; + assert(pNext); + + return 0; +} + +long Cluster::GetEntryCount() const { return m_entries_count; } + +const BlockEntry* Cluster::GetEntry(const Track* pTrack, + long long time_ns) const { + assert(pTrack); + + if (m_pSegment == NULL) // this is the special EOS cluster + return pTrack->GetEOS(); + + const BlockEntry* pResult = pTrack->GetEOS(); + + long index = 0; + + for (;;) { + if (index >= m_entries_count) { + long long pos; + long len; + + const long status = Parse(pos, len); + assert(status >= 0); + + if (status > 0) // completely parsed, and no more entries + return pResult; + + if (status < 0) // should never happen + return 0; + + assert(m_entries); + assert(index < m_entries_count); + } + + const BlockEntry* const pEntry = m_entries[index]; + assert(pEntry); + assert(!pEntry->EOS()); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + if (pBlock->GetTrackNumber() != pTrack->GetNumber()) { + ++index; + continue; + } + + if (pTrack->VetEntry(pEntry)) { + if (time_ns < 0) // just want first candidate block + return pEntry; + + const long long ns = pBlock->GetTime(this); + + if (ns > time_ns) + return pResult; + + pResult = pEntry; // have a candidate + } else if (time_ns >= 0) { + const long long ns = pBlock->GetTime(this); + + if (ns > time_ns) + return pResult; + } + + ++index; + } +} + +const BlockEntry* Cluster::GetEntry(const CuePoint& cp, + const CuePoint::TrackPosition& tp) const { + assert(m_pSegment); + const long long tc = cp.GetTimeCode(); + + if (tp.m_block > 0) { + const long block = static_cast(tp.m_block); + const long index = block - 1; + + while (index >= m_entries_count) { + long long pos; + long len; + + const long status = Parse(pos, len); + + if (status < 0) // TODO: can this happen? + return NULL; + + if (status > 0) // nothing remains to be parsed + return NULL; + } + + const BlockEntry* const pEntry = m_entries[index]; + assert(pEntry); + assert(!pEntry->EOS()); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + if ((pBlock->GetTrackNumber() == tp.m_track) && + (pBlock->GetTimeCode(this) == tc)) { + return pEntry; + } + } + + long index = 0; + + for (;;) { + if (index >= m_entries_count) { + long long pos; + long len; + + const long status = Parse(pos, len); + + if (status < 0) // TODO: can this happen? + return NULL; + + if (status > 0) // nothing remains to be parsed + return NULL; + + assert(m_entries); + assert(index < m_entries_count); + } + + const BlockEntry* const pEntry = m_entries[index]; + assert(pEntry); + assert(!pEntry->EOS()); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + if (pBlock->GetTrackNumber() != tp.m_track) { + ++index; + continue; + } + + const long long tc_ = pBlock->GetTimeCode(this); + + if (tc_ < tc) { + ++index; + continue; + } + + if (tc_ > tc) + return NULL; + + const Tracks* const pTracks = m_pSegment->GetTracks(); + assert(pTracks); + + const long tn = static_cast(tp.m_track); + const Track* const pTrack = pTracks->GetTrackByNumber(tn); + + if (pTrack == NULL) + return NULL; + + const long long type = pTrack->GetType(); + + if (type == 2) // audio + return pEntry; + + if (type != 1) // not video + return NULL; + + if (!pBlock->IsKey()) + return NULL; + + return pEntry; + } +} + +BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {} +BlockEntry::~BlockEntry() {} +const Cluster* BlockEntry::GetCluster() const { return m_pCluster; } +long BlockEntry::GetIndex() const { return m_index; } + +SimpleBlock::SimpleBlock(Cluster* pCluster, long idx, long long start, + long long size) + : BlockEntry(pCluster, idx), m_block(start, size, 0) {} + +long SimpleBlock::Parse() { return m_block.Parse(m_pCluster); } +BlockEntry::Kind SimpleBlock::GetKind() const { return kBlockSimple; } +const Block* SimpleBlock::GetBlock() const { return &m_block; } + +BlockGroup::BlockGroup(Cluster* pCluster, long idx, long long block_start, + long long block_size, long long prev, long long next, + long long duration, long long discard_padding) + : BlockEntry(pCluster, idx), + m_block(block_start, block_size, discard_padding), + m_prev(prev), + m_next(next), + m_duration(duration) {} + +long BlockGroup::Parse() { + const long status = m_block.Parse(m_pCluster); + + if (status) + return status; + + m_block.SetKey((m_prev > 0) && (m_next <= 0)); + + return 0; +} + +BlockEntry::Kind BlockGroup::GetKind() const { return kBlockGroup; } +const Block* BlockGroup::GetBlock() const { return &m_block; } +long long BlockGroup::GetPrevTimeCode() const { return m_prev; } +long long BlockGroup::GetNextTimeCode() const { return m_next; } +long long BlockGroup::GetDurationTimeCode() const { return m_duration; } + +Block::Block(long long start, long long size_, long long discard_padding) + : m_start(start), + m_size(size_), + m_track(0), + m_timecode(-1), + m_flags(0), + m_frames(NULL), + m_frame_count(-1), + m_discard_padding(discard_padding) {} + +Block::~Block() { delete[] m_frames; } + +long Block::Parse(const Cluster* pCluster) { + if (pCluster == NULL) + return -1; + + if (pCluster->m_pSegment == NULL) + return -1; + + assert(m_start >= 0); + assert(m_size >= 0); + assert(m_track <= 0); + assert(m_frames == NULL); + assert(m_frame_count <= 0); + + long long pos = m_start; + const long long stop = m_start + m_size; + + long len; + + IMkvReader* const pReader = pCluster->m_pSegment->m_pReader; + + m_track = ReadUInt(pReader, pos, len); + + if (m_track <= 0) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > stop) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume track number + + if ((stop - pos) < 2) + return E_FILE_FORMAT_INVALID; + + long status; + long long value; + + status = UnserializeInt(pReader, pos, 2, value); + + if (status) + return E_FILE_FORMAT_INVALID; + + if (value < SHRT_MIN) + return E_FILE_FORMAT_INVALID; + + if (value > SHRT_MAX) + return E_FILE_FORMAT_INVALID; + + m_timecode = static_cast(value); + + pos += 2; + + if ((stop - pos) <= 0) + return E_FILE_FORMAT_INVALID; + + status = pReader->Read(pos, 1, &m_flags); + + if (status) + return E_FILE_FORMAT_INVALID; + + const int lacing = int(m_flags & 0x06) >> 1; + + ++pos; // consume flags byte + + if (lacing == 0) { // no lacing + if (pos > stop) + return E_FILE_FORMAT_INVALID; + + m_frame_count = 1; + m_frames = new Frame[m_frame_count]; + + Frame& f = m_frames[0]; + f.pos = pos; + + const long long frame_size = stop - pos; + + if (frame_size > LONG_MAX || frame_size <= 0) + return E_FILE_FORMAT_INVALID; + + f.len = static_cast(frame_size); + + return 0; // success + } + + if (pos >= stop) + return E_FILE_FORMAT_INVALID; + + unsigned char biased_count; + + status = pReader->Read(pos, 1, &biased_count); + + if (status) + return E_FILE_FORMAT_INVALID; + + ++pos; // consume frame count + if (pos > stop) + return E_FILE_FORMAT_INVALID; + + m_frame_count = int(biased_count) + 1; + + m_frames = new Frame[m_frame_count]; + + if (!m_frames) + return E_FILE_FORMAT_INVALID; + + if (lacing == 1) { // Xiph + Frame* pf = m_frames; + Frame* const pf_end = pf + m_frame_count; + + long long size = 0; + int frame_count = m_frame_count; + + while (frame_count > 1) { + long frame_size = 0; + + for (;;) { + unsigned char val; + + if (pos >= stop) + return E_FILE_FORMAT_INVALID; + + status = pReader->Read(pos, 1, &val); + + if (status) + return E_FILE_FORMAT_INVALID; + + ++pos; // consume xiph size byte + + frame_size += val; + + if (val < 255) + break; + } + + Frame& f = *pf++; + assert(pf < pf_end); + if (pf >= pf_end) + return E_FILE_FORMAT_INVALID; + + f.pos = 0; // patch later + + if (frame_size <= 0) + return E_FILE_FORMAT_INVALID; + + f.len = frame_size; + size += frame_size; // contribution of this frame + + --frame_count; + } + + if (pf >= pf_end || pos > stop) + return E_FILE_FORMAT_INVALID; + + { + Frame& f = *pf++; + + if (pf != pf_end) + return E_FILE_FORMAT_INVALID; + + f.pos = 0; // patch later + + const long long total_size = stop - pos; + + if (total_size < size) + return E_FILE_FORMAT_INVALID; + + const long long frame_size = total_size - size; + + if (frame_size > LONG_MAX || frame_size <= 0) + return E_FILE_FORMAT_INVALID; + + f.len = static_cast(frame_size); + } + + pf = m_frames; + while (pf != pf_end) { + Frame& f = *pf++; + assert((pos + f.len) <= stop); + + if ((pos + f.len) > stop) + return E_FILE_FORMAT_INVALID; + + f.pos = pos; + pos += f.len; + } + + assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; + + } else if (lacing == 2) { // fixed-size lacing + if (pos >= stop) + return E_FILE_FORMAT_INVALID; + + const long long total_size = stop - pos; + + if ((total_size % m_frame_count) != 0) + return E_FILE_FORMAT_INVALID; + + const long long frame_size = total_size / m_frame_count; + + if (frame_size > LONG_MAX || frame_size <= 0) + return E_FILE_FORMAT_INVALID; + + Frame* pf = m_frames; + Frame* const pf_end = pf + m_frame_count; + + while (pf != pf_end) { + assert((pos + frame_size) <= stop); + if ((pos + frame_size) > stop) + return E_FILE_FORMAT_INVALID; + + Frame& f = *pf++; + + f.pos = pos; + f.len = static_cast(frame_size); + + pos += frame_size; + } + + assert(pos == stop); + if (pos != stop) + return E_FILE_FORMAT_INVALID; + + } else { + assert(lacing == 3); // EBML lacing + + if (pos >= stop) + return E_FILE_FORMAT_INVALID; + + long long size = 0; + int frame_count = m_frame_count; + + long long frame_size = ReadUInt(pReader, pos, len); + + if (frame_size <= 0) + return E_FILE_FORMAT_INVALID; + + if (frame_size > LONG_MAX) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > stop) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume length of size of first frame + + if ((pos + frame_size) > stop) + return E_FILE_FORMAT_INVALID; + + Frame* pf = m_frames; + Frame* const pf_end = pf + m_frame_count; + + { + Frame& curr = *pf; + + curr.pos = 0; // patch later + + curr.len = static_cast(frame_size); + size += curr.len; // contribution of this frame + } + + --frame_count; + + while (frame_count > 1) { + if (pos >= stop) + return E_FILE_FORMAT_INVALID; + + assert(pf < pf_end); + if (pf >= pf_end) + return E_FILE_FORMAT_INVALID; + + const Frame& prev = *pf++; + assert(prev.len == frame_size); + if (prev.len != frame_size) + return E_FILE_FORMAT_INVALID; + + assert(pf < pf_end); + if (pf >= pf_end) + return E_FILE_FORMAT_INVALID; + + Frame& curr = *pf; + + curr.pos = 0; // patch later + + const long long delta_size_ = ReadUInt(pReader, pos, len); + + if (delta_size_ < 0) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > stop) + return E_FILE_FORMAT_INVALID; + + pos += len; // consume length of (delta) size + if (pos > stop) + return E_FILE_FORMAT_INVALID; + + const long exp = 7 * len - 1; + const long long bias = (1LL << exp) - 1LL; + const long long delta_size = delta_size_ - bias; + + frame_size += delta_size; + + if (frame_size <= 0) + return E_FILE_FORMAT_INVALID; + + if (frame_size > LONG_MAX) + return E_FILE_FORMAT_INVALID; + + curr.len = static_cast(frame_size); + size += curr.len; // contribution of this frame + + --frame_count; + } + + // parse last frame + if (frame_count > 0) { + if (pos > stop || pf >= pf_end) + return E_FILE_FORMAT_INVALID; + + const Frame& prev = *pf++; + assert(prev.len == frame_size); + if (prev.len != frame_size) + return E_FILE_FORMAT_INVALID; + + if (pf >= pf_end) + return E_FILE_FORMAT_INVALID; + + Frame& curr = *pf++; + if (pf != pf_end) + return E_FILE_FORMAT_INVALID; + + curr.pos = 0; // patch later + + const long long total_size = stop - pos; + + if (total_size < size) + return E_FILE_FORMAT_INVALID; + + frame_size = total_size - size; + + if (frame_size > LONG_MAX || frame_size <= 0) + return E_FILE_FORMAT_INVALID; + + curr.len = static_cast(frame_size); + } + + pf = m_frames; + while (pf != pf_end) { + Frame& f = *pf++; + assert((pos + f.len) <= stop); + if ((pos + f.len) > stop) + return E_FILE_FORMAT_INVALID; + + f.pos = pos; + pos += f.len; + } + + if (pos != stop) + return E_FILE_FORMAT_INVALID; + } + + return 0; // success +} + +long long Block::GetTimeCode(const Cluster* pCluster) const { + if (pCluster == 0) + return m_timecode; + + const long long tc0 = pCluster->GetTimeCode(); + assert(tc0 >= 0); + + const long long tc = tc0 + m_timecode; + + return tc; // unscaled timecode units +} + +long long Block::GetTime(const Cluster* pCluster) const { + assert(pCluster); + + const long long tc = GetTimeCode(pCluster); + + const Segment* const pSegment = pCluster->m_pSegment; + const SegmentInfo* const pInfo = pSegment->GetInfo(); + assert(pInfo); + + const long long scale = pInfo->GetTimeCodeScale(); + assert(scale >= 1); + + const long long ns = tc * scale; + + return ns; +} + +long long Block::GetTrackNumber() const { return m_track; } + +bool Block::IsKey() const { + return ((m_flags & static_cast(1 << 7)) != 0); +} + +void Block::SetKey(bool bKey) { + if (bKey) + m_flags |= static_cast(1 << 7); + else + m_flags &= 0x7F; +} + +bool Block::IsInvisible() const { return bool(int(m_flags & 0x08) != 0); } + +Block::Lacing Block::GetLacing() const { + const int value = int(m_flags & 0x06) >> 1; + return static_cast(value); +} + +int Block::GetFrameCount() const { return m_frame_count; } + +const Block::Frame& Block::GetFrame(int idx) const { + assert(idx >= 0); + assert(idx < m_frame_count); + + const Frame& f = m_frames[idx]; + assert(f.pos > 0); + assert(f.len > 0); + + return f; +} + +long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const { + assert(pReader); + assert(buf); + + const long status = pReader->Read(pos, len, buf); + return status; +} + +long long Block::GetDiscardPadding() const { return m_discard_padding; } + +} // namespace mkvparser diff --git a/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h b/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h new file mode 100644 index 00000000000..1ef274b162e --- /dev/null +++ b/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h @@ -0,0 +1,1111 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +#ifndef MKVPARSER_MKVPARSER_H_ +#define MKVPARSER_MKVPARSER_H_ + +#include + +namespace mkvparser { + +const int E_PARSE_FAILED = -1; +const int E_FILE_FORMAT_INVALID = -2; +const int E_BUFFER_NOT_FULL = -3; + +class IMkvReader { + public: + virtual int Read(long long pos, long len, unsigned char* buf) = 0; + virtual int Length(long long* total, long long* available) = 0; + + virtual ~IMkvReader(); +}; + +template +Type* SafeArrayAlloc(unsigned long long num_elements, + unsigned long long element_size); +long long GetUIntLength(IMkvReader*, long long, long&); +long long ReadUInt(IMkvReader*, long long, long&); +long long ReadID(IMkvReader* pReader, long long pos, long& len); +long long UnserializeUInt(IMkvReader*, long long pos, long long size); + +long UnserializeFloat(IMkvReader*, long long pos, long long size, double&); +long UnserializeInt(IMkvReader*, long long pos, long long size, + long long& result); + +long UnserializeString(IMkvReader*, long long pos, long long size, char*& str); + +long ParseElementHeader(IMkvReader* pReader, + long long& pos, // consume id and size fields + long long stop, // if you know size of element's parent + long long& id, long long& size); + +bool Match(IMkvReader*, long long&, unsigned long, long long&); +bool Match(IMkvReader*, long long&, unsigned long, unsigned char*&, size_t&); + +void GetVersion(int& major, int& minor, int& build, int& revision); + +struct EBMLHeader { + EBMLHeader(); + ~EBMLHeader(); + long long m_version; + long long m_readVersion; + long long m_maxIdLength; + long long m_maxSizeLength; + char* m_docType; + long long m_docTypeVersion; + long long m_docTypeReadVersion; + + long long Parse(IMkvReader*, long long&); + void Init(); +}; + +class Segment; +class Track; +class Cluster; + +class Block { + Block(const Block&); + Block& operator=(const Block&); + + public: + const long long m_start; + const long long m_size; + + Block(long long start, long long size, long long discard_padding); + ~Block(); + + long Parse(const Cluster*); + + long long GetTrackNumber() const; + long long GetTimeCode(const Cluster*) const; // absolute, but not scaled + long long GetTime(const Cluster*) const; // absolute, and scaled (ns) + bool IsKey() const; + void SetKey(bool); + bool IsInvisible() const; + + enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml }; + Lacing GetLacing() const; + + int GetFrameCount() const; // to index frames: [0, count) + + struct Frame { + long long pos; // absolute offset + long len; + + long Read(IMkvReader*, unsigned char*) const; + }; + + const Frame& GetFrame(int frame_index) const; + + long long GetDiscardPadding() const; + + private: + long long m_track; // Track::Number() + short m_timecode; // relative to cluster + unsigned char m_flags; + + Frame* m_frames; + int m_frame_count; + + protected: + const long long m_discard_padding; +}; + +class BlockEntry { + BlockEntry(const BlockEntry&); + BlockEntry& operator=(const BlockEntry&); + + protected: + BlockEntry(Cluster*, long index); + + public: + virtual ~BlockEntry(); + + bool EOS() const { return (GetKind() == kBlockEOS); } + const Cluster* GetCluster() const; + long GetIndex() const; + virtual const Block* GetBlock() const = 0; + + enum Kind { kBlockEOS, kBlockSimple, kBlockGroup }; + virtual Kind GetKind() const = 0; + + protected: + Cluster* const m_pCluster; + const long m_index; +}; + +class SimpleBlock : public BlockEntry { + SimpleBlock(const SimpleBlock&); + SimpleBlock& operator=(const SimpleBlock&); + + public: + SimpleBlock(Cluster*, long index, long long start, long long size); + long Parse(); + + Kind GetKind() const; + const Block* GetBlock() const; + + protected: + Block m_block; +}; + +class BlockGroup : public BlockEntry { + BlockGroup(const BlockGroup&); + BlockGroup& operator=(const BlockGroup&); + + public: + BlockGroup(Cluster*, long index, + long long block_start, // absolute pos of block's payload + long long block_size, // size of block's payload + long long prev, long long next, long long duration, + long long discard_padding); + + long Parse(); + + Kind GetKind() const; + const Block* GetBlock() const; + + long long GetPrevTimeCode() const; // relative to block's time + long long GetNextTimeCode() const; // as above + long long GetDurationTimeCode() const; + + private: + Block m_block; + const long long m_prev; + const long long m_next; + const long long m_duration; +}; + +/////////////////////////////////////////////////////////////// +// ContentEncoding element +// Elements used to describe if the track data has been encrypted or +// compressed with zlib or header stripping. +class ContentEncoding { + public: + enum { kCTR = 1 }; + + ContentEncoding(); + ~ContentEncoding(); + + // ContentCompression element names + struct ContentCompression { + ContentCompression(); + ~ContentCompression(); + + unsigned long long algo; + unsigned char* settings; + long long settings_len; + }; + + // ContentEncAESSettings element names + struct ContentEncAESSettings { + ContentEncAESSettings() : cipher_mode(kCTR) {} + ~ContentEncAESSettings() {} + + unsigned long long cipher_mode; + }; + + // ContentEncryption element names + struct ContentEncryption { + ContentEncryption(); + ~ContentEncryption(); + + unsigned long long algo; + unsigned char* key_id; + long long key_id_len; + unsigned char* signature; + long long signature_len; + unsigned char* sig_key_id; + long long sig_key_id_len; + unsigned long long sig_algo; + unsigned long long sig_hash_algo; + + ContentEncAESSettings aes_settings; + }; + + // Returns ContentCompression represented by |idx|. Returns NULL if |idx| + // is out of bounds. + const ContentCompression* GetCompressionByIndex(unsigned long idx) const; + + // Returns number of ContentCompression elements in this ContentEncoding + // element. + unsigned long GetCompressionCount() const; + + // Parses the ContentCompression element from |pReader|. |start| is the + // starting offset of the ContentCompression payload. |size| is the size in + // bytes of the ContentCompression payload. |compression| is where the parsed + // values will be stored. + long ParseCompressionEntry(long long start, long long size, + IMkvReader* pReader, + ContentCompression* compression); + + // Returns ContentEncryption represented by |idx|. Returns NULL if |idx| + // is out of bounds. + const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const; + + // Returns number of ContentEncryption elements in this ContentEncoding + // element. + unsigned long GetEncryptionCount() const; + + // Parses the ContentEncAESSettings element from |pReader|. |start| is the + // starting offset of the ContentEncAESSettings payload. |size| is the + // size in bytes of the ContentEncAESSettings payload. |encryption| is + // where the parsed values will be stored. + long ParseContentEncAESSettingsEntry(long long start, long long size, + IMkvReader* pReader, + ContentEncAESSettings* aes); + + // Parses the ContentEncoding element from |pReader|. |start| is the + // starting offset of the ContentEncoding payload. |size| is the size in + // bytes of the ContentEncoding payload. Returns true on success. + long ParseContentEncodingEntry(long long start, long long size, + IMkvReader* pReader); + + // Parses the ContentEncryption element from |pReader|. |start| is the + // starting offset of the ContentEncryption payload. |size| is the size in + // bytes of the ContentEncryption payload. |encryption| is where the parsed + // values will be stored. + long ParseEncryptionEntry(long long start, long long size, + IMkvReader* pReader, ContentEncryption* encryption); + + unsigned long long encoding_order() const { return encoding_order_; } + unsigned long long encoding_scope() const { return encoding_scope_; } + unsigned long long encoding_type() const { return encoding_type_; } + + private: + // Member variables for list of ContentCompression elements. + ContentCompression** compression_entries_; + ContentCompression** compression_entries_end_; + + // Member variables for list of ContentEncryption elements. + ContentEncryption** encryption_entries_; + ContentEncryption** encryption_entries_end_; + + // ContentEncoding element names + unsigned long long encoding_order_; + unsigned long long encoding_scope_; + unsigned long long encoding_type_; + + // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding); + ContentEncoding(const ContentEncoding&); + ContentEncoding& operator=(const ContentEncoding&); +}; + +class Track { + Track(const Track&); + Track& operator=(const Track&); + + public: + class Info; + static long Create(Segment*, const Info&, long long element_start, + long long element_size, Track*&); + + enum Type { kVideo = 1, kAudio = 2, kSubtitle = 0x11, kMetadata = 0x21 }; + + Segment* const m_pSegment; + const long long m_element_start; + const long long m_element_size; + virtual ~Track(); + + long GetType() const; + long GetNumber() const; + unsigned long long GetUid() const; + const char* GetNameAsUTF8() const; + const char* GetLanguage() const; + const char* GetCodecNameAsUTF8() const; + const char* GetCodecId() const; + const unsigned char* GetCodecPrivate(size_t&) const; + bool GetLacing() const; + unsigned long long GetDefaultDuration() const; + unsigned long long GetCodecDelay() const; + unsigned long long GetSeekPreRoll() const; + + const BlockEntry* GetEOS() const; + + struct Settings { + long long start; + long long size; + }; + + class Info { + public: + Info(); + ~Info(); + int Copy(Info&) const; + void Clear(); + long type; + long number; + unsigned long long uid; + unsigned long long defaultDuration; + unsigned long long codecDelay; + unsigned long long seekPreRoll; + char* nameAsUTF8; + char* language; + char* codecId; + char* codecNameAsUTF8; + unsigned char* codecPrivate; + size_t codecPrivateSize; + bool lacing; + Settings settings; + + private: + Info(const Info&); + Info& operator=(const Info&); + int CopyStr(char* Info::*str, Info&) const; + }; + + long GetFirst(const BlockEntry*&) const; + long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const; + virtual bool VetEntry(const BlockEntry*) const; + virtual long Seek(long long time_ns, const BlockEntry*&) const; + + const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const; + unsigned long GetContentEncodingCount() const; + + long ParseContentEncodingsEntry(long long start, long long size); + + protected: + Track(Segment*, long long element_start, long long element_size); + + Info m_info; + + class EOSBlock : public BlockEntry { + public: + EOSBlock(); + + Kind GetKind() const; + const Block* GetBlock() const; + }; + + EOSBlock m_eos; + + private: + ContentEncoding** content_encoding_entries_; + ContentEncoding** content_encoding_entries_end_; +}; + +struct PrimaryChromaticity { + PrimaryChromaticity() : x(0), y(0) {} + ~PrimaryChromaticity() {} + static bool Parse(IMkvReader* reader, long long read_pos, + long long value_size, bool is_x, + PrimaryChromaticity** chromaticity); + float x; + float y; +}; + +struct MasteringMetadata { + static const float kValueNotPresent; + + MasteringMetadata() + : r(NULL), + g(NULL), + b(NULL), + white_point(NULL), + luminance_max(kValueNotPresent), + luminance_min(kValueNotPresent) {} + ~MasteringMetadata() { + delete r; + delete g; + delete b; + delete white_point; + } + + static bool Parse(IMkvReader* reader, long long element_start, + long long element_size, + MasteringMetadata** mastering_metadata); + + PrimaryChromaticity* r; + PrimaryChromaticity* g; + PrimaryChromaticity* b; + PrimaryChromaticity* white_point; + float luminance_max; + float luminance_min; +}; + +struct Colour { + static const long long kValueNotPresent; + + // Unless otherwise noted all values assigned upon construction are the + // equivalent of unspecified/default. + Colour() + : matrix_coefficients(kValueNotPresent), + bits_per_channel(kValueNotPresent), + chroma_subsampling_horz(kValueNotPresent), + chroma_subsampling_vert(kValueNotPresent), + cb_subsampling_horz(kValueNotPresent), + cb_subsampling_vert(kValueNotPresent), + chroma_siting_horz(kValueNotPresent), + chroma_siting_vert(kValueNotPresent), + range(kValueNotPresent), + transfer_characteristics(kValueNotPresent), + primaries(kValueNotPresent), + max_cll(kValueNotPresent), + max_fall(kValueNotPresent), + mastering_metadata(NULL) {} + ~Colour() { + delete mastering_metadata; + mastering_metadata = NULL; + } + + static bool Parse(IMkvReader* reader, long long element_start, + long long element_size, Colour** colour); + + long long matrix_coefficients; + long long bits_per_channel; + long long chroma_subsampling_horz; + long long chroma_subsampling_vert; + long long cb_subsampling_horz; + long long cb_subsampling_vert; + long long chroma_siting_horz; + long long chroma_siting_vert; + long long range; + long long transfer_characteristics; + long long primaries; + long long max_cll; + long long max_fall; + + MasteringMetadata* mastering_metadata; +}; + +class VideoTrack : public Track { + VideoTrack(const VideoTrack&); + VideoTrack& operator=(const VideoTrack&); + + VideoTrack(Segment*, long long element_start, long long element_size); + + public: + virtual ~VideoTrack(); + static long Parse(Segment*, const Info&, long long element_start, + long long element_size, VideoTrack*&); + + long long GetWidth() const; + long long GetHeight() const; + long long GetDisplayWidth() const; + long long GetDisplayHeight() const; + long long GetDisplayUnit() const; + long long GetStereoMode() const; + double GetFrameRate() const; + + bool VetEntry(const BlockEntry*) const; + long Seek(long long time_ns, const BlockEntry*&) const; + + Colour* GetColour() const; + + private: + long long m_width; + long long m_height; + long long m_display_width; + long long m_display_height; + long long m_display_unit; + long long m_stereo_mode; + + double m_rate; + + Colour* m_colour; +}; + +class AudioTrack : public Track { + AudioTrack(const AudioTrack&); + AudioTrack& operator=(const AudioTrack&); + + AudioTrack(Segment*, long long element_start, long long element_size); + + public: + static long Parse(Segment*, const Info&, long long element_start, + long long element_size, AudioTrack*&); + + double GetSamplingRate() const; + long long GetChannels() const; + long long GetBitDepth() const; + + private: + double m_rate; + long long m_channels; + long long m_bitDepth; +}; + +class Tracks { + Tracks(const Tracks&); + Tracks& operator=(const Tracks&); + + public: + Segment* const m_pSegment; + const long long m_start; + const long long m_size; + const long long m_element_start; + const long long m_element_size; + + Tracks(Segment*, long long start, long long size, long long element_start, + long long element_size); + + ~Tracks(); + + long Parse(); + + unsigned long GetTracksCount() const; + + const Track* GetTrackByNumber(long tn) const; + const Track* GetTrackByIndex(unsigned long idx) const; + + private: + Track** m_trackEntries; + Track** m_trackEntriesEnd; + + long ParseTrackEntry(long long payload_start, long long payload_size, + long long element_start, long long element_size, + Track*&) const; +}; + +class Chapters { + Chapters(const Chapters&); + Chapters& operator=(const Chapters&); + + public: + Segment* const m_pSegment; + const long long m_start; + const long long m_size; + const long long m_element_start; + const long long m_element_size; + + Chapters(Segment*, long long payload_start, long long payload_size, + long long element_start, long long element_size); + + ~Chapters(); + + long Parse(); + + class Atom; + class Edition; + + class Display { + friend class Atom; + Display(); + Display(const Display&); + ~Display(); + Display& operator=(const Display&); + + public: + const char* GetString() const; + const char* GetLanguage() const; + const char* GetCountry() const; + + private: + void Init(); + void ShallowCopy(Display&) const; + void Clear(); + long Parse(IMkvReader*, long long pos, long long size); + + char* m_string; + char* m_language; + char* m_country; + }; + + class Atom { + friend class Edition; + Atom(); + Atom(const Atom&); + ~Atom(); + Atom& operator=(const Atom&); + + public: + unsigned long long GetUID() const; + const char* GetStringUID() const; + + long long GetStartTimecode() const; + long long GetStopTimecode() const; + + long long GetStartTime(const Chapters*) const; + long long GetStopTime(const Chapters*) const; + + int GetDisplayCount() const; + const Display* GetDisplay(int index) const; + + private: + void Init(); + void ShallowCopy(Atom&) const; + void Clear(); + long Parse(IMkvReader*, long long pos, long long size); + static long long GetTime(const Chapters*, long long timecode); + + long ParseDisplay(IMkvReader*, long long pos, long long size); + bool ExpandDisplaysArray(); + + char* m_string_uid; + unsigned long long m_uid; + long long m_start_timecode; + long long m_stop_timecode; + + Display* m_displays; + int m_displays_size; + int m_displays_count; + }; + + class Edition { + friend class Chapters; + Edition(); + Edition(const Edition&); + ~Edition(); + Edition& operator=(const Edition&); + + public: + int GetAtomCount() const; + const Atom* GetAtom(int index) const; + + private: + void Init(); + void ShallowCopy(Edition&) const; + void Clear(); + long Parse(IMkvReader*, long long pos, long long size); + + long ParseAtom(IMkvReader*, long long pos, long long size); + bool ExpandAtomsArray(); + + Atom* m_atoms; + int m_atoms_size; + int m_atoms_count; + }; + + int GetEditionCount() const; + const Edition* GetEdition(int index) const; + + private: + long ParseEdition(long long pos, long long size); + bool ExpandEditionsArray(); + + Edition* m_editions; + int m_editions_size; + int m_editions_count; +}; + +class Tags { + Tags(const Tags&); + Tags& operator=(const Tags&); + + public: + Segment* const m_pSegment; + const long long m_start; + const long long m_size; + const long long m_element_start; + const long long m_element_size; + + Tags(Segment*, long long payload_start, long long payload_size, + long long element_start, long long element_size); + + ~Tags(); + + long Parse(); + + class Tag; + class SimpleTag; + + class SimpleTag { + friend class Tag; + SimpleTag(); + SimpleTag(const SimpleTag&); + ~SimpleTag(); + SimpleTag& operator=(const SimpleTag&); + + public: + const char* GetTagName() const; + const char* GetTagString() const; + + private: + void Init(); + void ShallowCopy(SimpleTag&) const; + void Clear(); + long Parse(IMkvReader*, long long pos, long long size); + + char* m_tag_name; + char* m_tag_string; + }; + + class Tag { + friend class Tags; + Tag(); + Tag(const Tag&); + ~Tag(); + Tag& operator=(const Tag&); + + public: + int GetSimpleTagCount() const; + const SimpleTag* GetSimpleTag(int index) const; + + private: + void Init(); + void ShallowCopy(Tag&) const; + void Clear(); + long Parse(IMkvReader*, long long pos, long long size); + + long ParseSimpleTag(IMkvReader*, long long pos, long long size); + bool ExpandSimpleTagsArray(); + + SimpleTag* m_simple_tags; + int m_simple_tags_size; + int m_simple_tags_count; + }; + + int GetTagCount() const; + const Tag* GetTag(int index) const; + + private: + long ParseTag(long long pos, long long size); + bool ExpandTagsArray(); + + Tag* m_tags; + int m_tags_size; + int m_tags_count; +}; + +class SegmentInfo { + SegmentInfo(const SegmentInfo&); + SegmentInfo& operator=(const SegmentInfo&); + + public: + Segment* const m_pSegment; + const long long m_start; + const long long m_size; + const long long m_element_start; + const long long m_element_size; + + SegmentInfo(Segment*, long long start, long long size, + long long element_start, long long element_size); + + ~SegmentInfo(); + + long Parse(); + + long long GetTimeCodeScale() const; + long long GetDuration() const; // scaled + const char* GetMuxingAppAsUTF8() const; + const char* GetWritingAppAsUTF8() const; + const char* GetTitleAsUTF8() const; + + private: + long long m_timecodeScale; + double m_duration; + char* m_pMuxingAppAsUTF8; + char* m_pWritingAppAsUTF8; + char* m_pTitleAsUTF8; +}; + +class SeekHead { + SeekHead(const SeekHead&); + SeekHead& operator=(const SeekHead&); + + public: + Segment* const m_pSegment; + const long long m_start; + const long long m_size; + const long long m_element_start; + const long long m_element_size; + + SeekHead(Segment*, long long start, long long size, long long element_start, + long long element_size); + + ~SeekHead(); + + long Parse(); + + struct Entry { + // the SeekHead entry payload + long long id; + long long pos; + + // absolute pos of SeekEntry ID + long long element_start; + + // SeekEntry ID size + size size + payload + long long element_size; + }; + + int GetCount() const; + const Entry* GetEntry(int idx) const; + + struct VoidElement { + // absolute pos of Void ID + long long element_start; + + // ID size + size size + payload size + long long element_size; + }; + + int GetVoidElementCount() const; + const VoidElement* GetVoidElement(int idx) const; + + private: + Entry* m_entries; + int m_entry_count; + + VoidElement* m_void_elements; + int m_void_element_count; + + static bool ParseEntry(IMkvReader*, + long long pos, // payload + long long size, Entry*); +}; + +class Cues; +class CuePoint { + friend class Cues; + + CuePoint(long, long long); + ~CuePoint(); + + CuePoint(const CuePoint&); + CuePoint& operator=(const CuePoint&); + + public: + long long m_element_start; + long long m_element_size; + + bool Load(IMkvReader*); + + long long GetTimeCode() const; // absolute but unscaled + long long GetTime(const Segment*) const; // absolute and scaled (ns units) + + struct TrackPosition { + long long m_track; + long long m_pos; // of cluster + long long m_block; + // codec_state //defaults to 0 + // reference = clusters containing req'd referenced blocks + // reftime = timecode of the referenced block + + bool Parse(IMkvReader*, long long, long long); + }; + + const TrackPosition* Find(const Track*) const; + + private: + const long m_index; + long long m_timecode; + TrackPosition* m_track_positions; + size_t m_track_positions_count; +}; + +class Cues { + friend class Segment; + + Cues(Segment*, long long start, long long size, long long element_start, + long long element_size); + ~Cues(); + + Cues(const Cues&); + Cues& operator=(const Cues&); + + public: + Segment* const m_pSegment; + const long long m_start; + const long long m_size; + const long long m_element_start; + const long long m_element_size; + + bool Find( // lower bound of time_ns + long long time_ns, const Track*, const CuePoint*&, + const CuePoint::TrackPosition*&) const; + + const CuePoint* GetFirst() const; + const CuePoint* GetLast() const; + const CuePoint* GetNext(const CuePoint*) const; + + const BlockEntry* GetBlock(const CuePoint*, + const CuePoint::TrackPosition*) const; + + bool LoadCuePoint() const; + long GetCount() const; // loaded only + // long GetTotal() const; //loaded + preloaded + bool DoneParsing() const; + + private: + bool Init() const; + bool PreloadCuePoint(long&, long long) const; + + mutable CuePoint** m_cue_points; + mutable long m_count; + mutable long m_preload_count; + mutable long long m_pos; +}; + +class Cluster { + friend class Segment; + + Cluster(const Cluster&); + Cluster& operator=(const Cluster&); + + public: + Segment* const m_pSegment; + + public: + static Cluster* Create(Segment*, + long index, // index in segment + long long off); // offset relative to segment + // long long element_size); + + Cluster(); // EndOfStream + ~Cluster(); + + bool EOS() const; + + long long GetTimeCode() const; // absolute, but not scaled + long long GetTime() const; // absolute, and scaled (nanosecond units) + long long GetFirstTime() const; // time (ns) of first (earliest) block + long long GetLastTime() const; // time (ns) of last (latest) block + + long GetFirst(const BlockEntry*&) const; + long GetLast(const BlockEntry*&) const; + long GetNext(const BlockEntry* curr, const BlockEntry*& next) const; + + const BlockEntry* GetEntry(const Track*, long long ns = -1) const; + const BlockEntry* GetEntry(const CuePoint&, + const CuePoint::TrackPosition&) const; + // const BlockEntry* GetMaxKey(const VideoTrack*) const; + + // static bool HasBlockEntries(const Segment*, long long); + + static long HasBlockEntries(const Segment*, long long idoff, long long& pos, + long& size); + + long GetEntryCount() const; + + long Load(long long& pos, long& size) const; + + long Parse(long long& pos, long& size) const; + long GetEntry(long index, const mkvparser::BlockEntry*&) const; + + protected: + Cluster(Segment*, long index, long long element_start); + // long long element_size); + + public: + const long long m_element_start; + long long GetPosition() const; // offset relative to segment + + long GetIndex() const; + long long GetElementSize() const; + // long long GetPayloadSize() const; + + // long long Unparsed() const; + + private: + long m_index; + mutable long long m_pos; + // mutable long long m_size; + mutable long long m_element_size; + mutable long long m_timecode; + mutable BlockEntry** m_entries; + mutable long m_entries_size; + mutable long m_entries_count; + + long ParseSimpleBlock(long long, long long&, long&); + long ParseBlockGroup(long long, long long&, long&); + + long CreateBlock(long long id, long long pos, long long size, + long long discard_padding); + long CreateBlockGroup(long long start_offset, long long size, + long long discard_padding); + long CreateSimpleBlock(long long, long long); +}; + +class Segment { + friend class Cues; + friend class Track; + friend class VideoTrack; + + Segment(const Segment&); + Segment& operator=(const Segment&); + + private: + Segment(IMkvReader*, long long elem_start, + // long long elem_size, + long long pos, long long size); + + public: + IMkvReader* const m_pReader; + const long long m_element_start; + // const long long m_element_size; + const long long m_start; // posn of segment payload + const long long m_size; // size of segment payload + Cluster m_eos; // TODO: make private? + + static long long CreateInstance(IMkvReader*, long long, Segment*&); + ~Segment(); + + long Load(); // loads headers and all clusters + + // for incremental loading + // long long Unparsed() const; + bool DoneParsing() const; + long long ParseHeaders(); // stops when first cluster is found + // long FindNextCluster(long long& pos, long& size) const; + long LoadCluster(long long& pos, long& size); // load one cluster + long LoadCluster(); + + long ParseNext(const Cluster* pCurr, const Cluster*& pNext, long long& pos, + long& size); + + const SeekHead* GetSeekHead() const; + const Tracks* GetTracks() const; + const SegmentInfo* GetInfo() const; + const Cues* GetCues() const; + const Chapters* GetChapters() const; + const Tags* GetTags() const; + + long long GetDuration() const; + + unsigned long GetCount() const; + const Cluster* GetFirst() const; + const Cluster* GetLast() const; + const Cluster* GetNext(const Cluster*); + + const Cluster* FindCluster(long long time_nanoseconds) const; + // const BlockEntry* Seek(long long time_nanoseconds, const Track*) const; + + const Cluster* FindOrPreloadCluster(long long pos); + + long ParseCues(long long cues_off, // offset relative to start of segment + long long& parse_pos, long& parse_len); + + private: + long long m_pos; // absolute file posn; what has been consumed so far + Cluster* m_pUnknownSize; + + SeekHead* m_pSeekHead; + SegmentInfo* m_pInfo; + Tracks* m_pTracks; + Cues* m_pCues; + Chapters* m_pChapters; + Tags* m_pTags; + Cluster** m_clusters; + long m_clusterCount; // number of entries for which m_index >= 0 + long m_clusterPreloadCount; // number of entries for which m_index < 0 + long m_clusterSize; // array size + + long DoLoadCluster(long long&, long&); + long DoLoadClusterUnknownSize(long long&, long&); + long DoParseNext(const Cluster*&, long long&, long&); + + bool AppendCluster(Cluster*); + bool PreloadCluster(Cluster*, ptrdiff_t); + + // void ParseSeekHead(long long pos, long long size); + // void ParseSeekEntry(long long pos, long long size); + // void ParseCues(long long); + + const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&); +}; + +} // namespace mkvparser + +inline long mkvparser::Segment::LoadCluster() { + long long pos; + long size; + + return LoadCluster(pos, size); +} + +#endif // MKVPARSER_MKVPARSER_H_ From 5268443fdfd6f9f8172cede1140810ae21f7990a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82a=C5=BCej=20Szczygie=C5=82?= Date: Wed, 14 Sep 2016 22:10:03 +0200 Subject: [PATCH 008/223] Add libvpx thirdparty library Only necessary files --- thirdparty/README.md | 7 + thirdparty/libvpx/AUTHORS | 142 + thirdparty/libvpx/CHANGELOG | 654 +++ thirdparty/libvpx/LICENSE | 31 + thirdparty/libvpx/PATENTS | 23 + thirdparty/libvpx/third_party/x86inc/LICENSE | 18 + .../libvpx/third_party/x86inc/README.libvpx | 20 + .../libvpx/third_party/x86inc/x86inc.asm | 1649 +++++++ thirdparty/libvpx/vp8/common/alloccommon.c | 190 + thirdparty/libvpx/vp8/common/alloccommon.h | 31 + .../libvpx/vp8/common/arm/loopfilter_arm.c | 181 + .../common/arm/neon/bilinearpredict_neon.c | 591 +++ .../libvpx/vp8/common/arm/neon/copymem_neon.c | 59 + .../common/arm/neon/dc_only_idct_add_neon.c | 42 + .../vp8/common/arm/neon/dequant_idct_neon.c | 142 + .../vp8/common/arm/neon/dequantizeb_neon.c | 25 + .../vp8/common/arm/neon/idct_blk_neon.c | 96 + .../common/arm/neon/idct_dequant_0_2x_neon.c | 63 + .../arm/neon/idct_dequant_full_2x_neon.c | 185 + .../libvpx/vp8/common/arm/neon/iwalsh_neon.c | 102 + .../loopfiltersimplehorizontaledge_neon.c | 111 + .../neon/loopfiltersimpleverticaledge_neon.c | 283 ++ .../vp8/common/arm/neon/mbloopfilter_neon.c | 625 +++ .../common/arm/neon/shortidct4x4llm_neon.c | 123 + .../vp8/common/arm/neon/sixtappredict_neon.c | 1377 ++++++ .../vp8/common/arm/neon/vp8_loopfilter_neon.c | 550 +++ thirdparty/libvpx/vp8/common/blockd.c | 22 + thirdparty/libvpx/vp8/common/blockd.h | 312 ++ .../libvpx/vp8/common/coefupdateprobs.h | 197 + thirdparty/libvpx/vp8/common/common.h | 48 + thirdparty/libvpx/vp8/common/copy_c.c | 32 + thirdparty/libvpx/vp8/common/debugmodes.c | 155 + .../libvpx/vp8/common/default_coef_probs.h | 200 + thirdparty/libvpx/vp8/common/dequantize.c | 43 + thirdparty/libvpx/vp8/common/entropy.c | 188 + thirdparty/libvpx/vp8/common/entropy.h | 109 + thirdparty/libvpx/vp8/common/entropymode.c | 171 + thirdparty/libvpx/vp8/common/entropymode.h | 88 + thirdparty/libvpx/vp8/common/entropymv.c | 49 + thirdparty/libvpx/vp8/common/entropymv.h | 52 + thirdparty/libvpx/vp8/common/extend.c | 188 + thirdparty/libvpx/vp8/common/extend.h | 33 + thirdparty/libvpx/vp8/common/filter.c | 493 ++ thirdparty/libvpx/vp8/common/filter.h | 32 + thirdparty/libvpx/vp8/common/findnearmv.c | 193 + thirdparty/libvpx/vp8/common/findnearmv.h | 195 + .../vp8/common/generic/systemdependent.c | 106 + thirdparty/libvpx/vp8/common/header.h | 51 + thirdparty/libvpx/vp8/common/idct_blk.c | 90 + thirdparty/libvpx/vp8/common/idctllm.c | 205 + thirdparty/libvpx/vp8/common/invtrans.h | 70 + thirdparty/libvpx/vp8/common/loopfilter.h | 113 + .../libvpx/vp8/common/loopfilter_filters.c | 430 ++ thirdparty/libvpx/vp8/common/mbpitch.c | 68 + thirdparty/libvpx/vp8/common/modecont.c | 40 + thirdparty/libvpx/vp8/common/modecont.h | 25 + thirdparty/libvpx/vp8/common/mv.h | 36 + thirdparty/libvpx/vp8/common/onyxc_int.h | 185 + thirdparty/libvpx/vp8/common/onyxd.h | 63 + thirdparty/libvpx/vp8/common/ppflags.h | 49 + thirdparty/libvpx/vp8/common/quant_common.c | 135 + thirdparty/libvpx/vp8/common/quant_common.h | 34 + thirdparty/libvpx/vp8/common/reconinter.c | 544 +++ thirdparty/libvpx/vp8/common/reconinter.h | 43 + thirdparty/libvpx/vp8/common/reconintra.c | 117 + thirdparty/libvpx/vp8/common/reconintra.h | 44 + thirdparty/libvpx/vp8/common/reconintra4x4.c | 54 + thirdparty/libvpx/vp8/common/reconintra4x4.h | 48 + thirdparty/libvpx/vp8/common/rtcd.c | 19 + .../libvpx/vp8/common/setupintrarecon.c | 39 + .../libvpx/vp8/common/setupintrarecon.h | 45 + thirdparty/libvpx/vp8/common/swapyv12buffer.c | 34 + thirdparty/libvpx/vp8/common/swapyv12buffer.h | 27 + .../libvpx/vp8/common/systemdependent.h | 27 + thirdparty/libvpx/vp8/common/threading.h | 232 + thirdparty/libvpx/vp8/common/treecoder.c | 143 + thirdparty/libvpx/vp8/common/treecoder.h | 98 + .../libvpx/vp8/common/vp8_entropymodedata.h | 254 ++ thirdparty/libvpx/vp8/common/vp8_loopfilter.c | 661 +++ .../libvpx/vp8/common/x86/copy_sse2.asm | 93 + .../libvpx/vp8/common/x86/copy_sse3.asm | 146 + .../libvpx/vp8/common/x86/dequantize_mmx.asm | 258 ++ thirdparty/libvpx/vp8/common/x86/filter_x86.c | 35 + thirdparty/libvpx/vp8/common/x86/filter_x86.h | 33 + .../libvpx/vp8/common/x86/idct_blk_mmx.c | 128 + .../libvpx/vp8/common/x86/idct_blk_sse2.c | 89 + .../libvpx/vp8/common/x86/idctllm_mmx.asm | 295 ++ .../libvpx/vp8/common/x86/idctllm_sse2.asm | 708 +++ .../libvpx/vp8/common/x86/iwalsh_mmx.asm | 140 + .../libvpx/vp8/common/x86/iwalsh_sse2.asm | 121 + .../x86/loopfilter_block_sse2_x86_64.asm | 815 ++++ .../libvpx/vp8/common/x86/loopfilter_sse2.asm | 1640 +++++++ .../libvpx/vp8/common/x86/loopfilter_x86.c | 198 + .../libvpx/vp8/common/x86/recon_mmx.asm | 274 ++ .../libvpx/vp8/common/x86/recon_sse2.asm | 116 + .../libvpx/vp8/common/x86/subpixel_mmx.asm | 702 +++ .../libvpx/vp8/common/x86/subpixel_sse2.asm | 1372 ++++++ .../libvpx/vp8/common/x86/subpixel_ssse3.asm | 1508 ++++++ .../libvpx/vp8/common/x86/vp8_asm_stubs.c | 625 +++ .../vp8/common/x86/vp8_loopfilter_mmx.asm | 1753 +++++++ thirdparty/libvpx/vp8/decoder/dboolhuff.c | 77 + thirdparty/libvpx/vp8/decoder/dboolhuff.h | 141 + thirdparty/libvpx/vp8/decoder/decodeframe.c | 1397 ++++++ thirdparty/libvpx/vp8/decoder/decodemv.c | 670 +++ thirdparty/libvpx/vp8/decoder/decodemv.h | 26 + .../libvpx/vp8/decoder/decoderthreading.h | 30 + thirdparty/libvpx/vp8/decoder/detokenize.c | 245 + thirdparty/libvpx/vp8/decoder/detokenize.h | 27 + thirdparty/libvpx/vp8/decoder/onyxd_if.c | 521 +++ thirdparty/libvpx/vp8/decoder/onyxd_int.h | 161 + thirdparty/libvpx/vp8/decoder/threading.c | 928 ++++ thirdparty/libvpx/vp8/decoder/treereader.h | 49 + thirdparty/libvpx/vp8/vp8_dx_iface.c | 828 ++++ .../vp9/common/arm/neon/vp9_iht4x4_add_neon.c | 248 + .../vp9/common/arm/neon/vp9_iht8x8_add_neon.c | 624 +++ .../libvpx/vp9/common/vp9_alloccommon.c | 201 + .../libvpx/vp9/common/vp9_alloccommon.h | 45 + thirdparty/libvpx/vp9/common/vp9_blockd.c | 135 + thirdparty/libvpx/vp9/common/vp9_blockd.h | 305 ++ thirdparty/libvpx/vp9/common/vp9_common.h | 74 + .../libvpx/vp9/common/vp9_common_data.c | 176 + .../libvpx/vp9/common/vp9_common_data.h | 44 + thirdparty/libvpx/vp9/common/vp9_debugmodes.c | 91 + thirdparty/libvpx/vp9/common/vp9_entropy.c | 802 ++++ thirdparty/libvpx/vp9/common/vp9_entropy.h | 200 + .../libvpx/vp9/common/vp9_entropymode.c | 469 ++ .../libvpx/vp9/common/vp9_entropymode.h | 107 + thirdparty/libvpx/vp9/common/vp9_entropymv.c | 210 + thirdparty/libvpx/vp9/common/vp9_entropymv.h | 140 + thirdparty/libvpx/vp9/common/vp9_enums.h | 147 + thirdparty/libvpx/vp9/common/vp9_filter.c | 104 + thirdparty/libvpx/vp9/common/vp9_filter.h | 42 + .../libvpx/vp9/common/vp9_frame_buffers.c | 86 + .../libvpx/vp9/common/vp9_frame_buffers.h | 53 + thirdparty/libvpx/vp9/common/vp9_idct.c | 405 ++ thirdparty/libvpx/vp9/common/vp9_idct.h | 81 + thirdparty/libvpx/vp9/common/vp9_loopfilter.c | 1697 +++++++ thirdparty/libvpx/vp9/common/vp9_loopfilter.h | 166 + thirdparty/libvpx/vp9/common/vp9_mv.h | 55 + .../libvpx/vp9/common/vp9_mvref_common.c | 201 + .../libvpx/vp9/common/vp9_mvref_common.h | 241 + thirdparty/libvpx/vp9/common/vp9_onyxc_int.h | 446 ++ thirdparty/libvpx/vp9/common/vp9_ppflags.h | 43 + .../libvpx/vp9/common/vp9_pred_common.c | 314 ++ .../libvpx/vp9/common/vp9_pred_common.h | 192 + .../libvpx/vp9/common/vp9_quant_common.c | 278 ++ .../libvpx/vp9/common/vp9_quant_common.h | 36 + thirdparty/libvpx/vp9/common/vp9_reconinter.c | 309 ++ thirdparty/libvpx/vp9/common/vp9_reconinter.h | 119 + thirdparty/libvpx/vp9/common/vp9_reconintra.c | 445 ++ thirdparty/libvpx/vp9/common/vp9_reconintra.h | 32 + thirdparty/libvpx/vp9/common/vp9_rtcd.c | 19 + thirdparty/libvpx/vp9/common/vp9_scale.c | 175 + thirdparty/libvpx/vp9/common/vp9_scale.h | 75 + thirdparty/libvpx/vp9/common/vp9_scan.c | 725 +++ thirdparty/libvpx/vp9/common/vp9_scan.h | 57 + thirdparty/libvpx/vp9/common/vp9_seg_common.c | 64 + thirdparty/libvpx/vp9/common/vp9_seg_common.h | 87 + .../libvpx/vp9/common/vp9_thread_common.c | 435 ++ .../libvpx/vp9/common/vp9_thread_common.h | 65 + .../libvpx/vp9/common/vp9_tile_common.c | 59 + .../libvpx/vp9/common/vp9_tile_common.h | 40 + .../vp9/common/x86/vp9_idct_intrin_sse2.c | 181 + .../libvpx/vp9/decoder/vp9_decodeframe.c | 2271 +++++++++ .../libvpx/vp9/decoder/vp9_decodeframe.h | 37 + thirdparty/libvpx/vp9/decoder/vp9_decodemv.c | 911 ++++ thirdparty/libvpx/vp9/decoder/vp9_decodemv.h | 30 + thirdparty/libvpx/vp9/decoder/vp9_decoder.c | 518 +++ thirdparty/libvpx/vp9/decoder/vp9_decoder.h | 137 + .../libvpx/vp9/decoder/vp9_detokenize.c | 224 + .../libvpx/vp9/decoder/vp9_detokenize.h | 33 + thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c | 76 + thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h | 27 + thirdparty/libvpx/vp9/decoder/vp9_dthread.c | 189 + thirdparty/libvpx/vp9/decoder/vp9_dthread.h | 74 + thirdparty/libvpx/vp9/vp9_dx_iface.c | 1093 +++++ thirdparty/libvpx/vp9/vp9_dx_iface.h | 65 + thirdparty/libvpx/vp9/vp9_iface_common.h | 136 + .../libvpx/vpx/internal/vpx_codec_internal.h | 445 ++ thirdparty/libvpx/vpx/internal/vpx_psnr.h | 34 + thirdparty/libvpx/vpx/src/vpx_codec.c | 158 + thirdparty/libvpx/vpx/src/vpx_decoder.c | 197 + thirdparty/libvpx/vpx/src/vpx_image.c | 285 ++ thirdparty/libvpx/vpx/src/vpx_psnr.c | 24 + thirdparty/libvpx/vpx/vp8.h | 148 + thirdparty/libvpx/vpx/vp8dx.h | 176 + thirdparty/libvpx/vpx/vpx_codec.h | 479 ++ thirdparty/libvpx/vpx/vpx_decoder.h | 378 ++ thirdparty/libvpx/vpx/vpx_encoder.h | 1043 +++++ thirdparty/libvpx/vpx/vpx_frame_buffer.h | 83 + thirdparty/libvpx/vpx/vpx_image.h | 235 + thirdparty/libvpx/vpx/vpx_integer.h | 74 + .../libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c | 61 + .../libvpx/vpx_dsp/arm/idct16x16_add_neon.c | 1317 ++++++ .../libvpx/vpx_dsp/arm/idct16x16_neon.c | 185 + .../libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c | 165 + .../libvpx/vpx_dsp/arm/idct32x32_add_neon.c | 719 +++ .../libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c | 50 + .../libvpx/vpx_dsp/arm/idct4x4_add_neon.c | 151 + .../libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c | 64 + .../libvpx/vpx_dsp/arm/idct8x8_add_neon.c | 540 +++ .../libvpx/vpx_dsp/arm/intrapred_neon.c | 822 ++++ .../libvpx/vpx_dsp/arm/intrapred_neon_asm.asm | 630 +++ .../libvpx/vpx_dsp/arm/loopfilter_16_neon.c | 179 + .../libvpx/vpx_dsp/arm/loopfilter_4_neon.c | 266 ++ .../libvpx/vpx_dsp/arm/loopfilter_8_neon.c | 445 ++ .../libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm | 635 +++ .../libvpx/vpx_dsp/arm/loopfilter_neon.c | 58 + .../libvpx/vpx_dsp/arm/save_reg_neon.asm | 36 + .../vpx_dsp/arm/vpx_convolve8_avg_neon.c | 373 ++ .../libvpx/vpx_dsp/arm/vpx_convolve8_neon.c | 340 ++ .../vpx_dsp/arm/vpx_convolve_avg_neon.c | 147 + .../vpx_dsp/arm/vpx_convolve_copy_neon.c | 94 + .../libvpx/vpx_dsp/arm/vpx_convolve_neon.c | 72 + thirdparty/libvpx/vpx_dsp/bitreader.c | 103 + thirdparty/libvpx/vpx_dsp/bitreader.h | 140 + thirdparty/libvpx/vpx_dsp/bitreader_buffer.c | 53 + thirdparty/libvpx/vpx_dsp/bitreader_buffer.h | 47 + thirdparty/libvpx/vpx_dsp/intrapred.c | 870 ++++ thirdparty/libvpx/vpx_dsp/inv_txfm.c | 2518 ++++++++++ thirdparty/libvpx/vpx_dsp/inv_txfm.h | 133 + thirdparty/libvpx/vpx_dsp/loopfilter.c | 767 ++++ thirdparty/libvpx/vpx_dsp/prob.c | 53 + thirdparty/libvpx/vpx_dsp/prob.h | 103 + thirdparty/libvpx/vpx_dsp/txfm_common.h | 66 + thirdparty/libvpx/vpx_dsp/vpx_convolve.c | 612 +++ thirdparty/libvpx/vpx_dsp/vpx_convolve.h | 38 + thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h | 69 + thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c | 17 + thirdparty/libvpx/vpx_dsp/vpx_filter.h | 34 + thirdparty/libvpx/vpx_dsp/x86/convolve.h | 274 ++ .../libvpx/vpx_dsp/x86/intrapred_sse2.asm | 860 ++++ .../libvpx/vpx_dsp/x86/intrapred_ssse3.asm | 871 ++++ thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c | 4046 +++++++++++++++++ thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h | 196 + .../vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm | 1793 ++++++++ .../libvpx/vpx_dsp/x86/inv_wht_sse2.asm | 109 + .../libvpx/vpx_dsp/x86/loopfilter_avx2.c | 979 ++++ .../libvpx/vpx_dsp/x86/loopfilter_sse2.c | 1776 ++++++++ .../libvpx/vpx_dsp/x86/txfm_common_sse2.h | 29 + thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c | 162 + .../vpx_dsp/x86/vpx_convolve_copy_sse2.asm | 228 + .../vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c | 605 +++ .../x86/vpx_subpixel_8t_intrin_ssse3.c | 915 ++++ .../vpx_dsp/x86/vpx_subpixel_8t_sse2.asm | 987 ++++ .../vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm | 629 +++ .../x86/vpx_subpixel_bilinear_sse2.asm | 448 ++ .../x86/vpx_subpixel_bilinear_ssse3.asm | 422 ++ .../libvpx/vpx_mem/include/vpx_mem_intrnl.h | 31 + thirdparty/libvpx/vpx_mem/vpx_mem.c | 100 + thirdparty/libvpx/vpx_mem/vpx_mem.h | 47 + thirdparty/libvpx/vpx_ports/arm.h | 41 + thirdparty/libvpx/vpx_ports/arm_cpudetect.c | 175 + thirdparty/libvpx/vpx_ports/bitops.h | 76 + thirdparty/libvpx/vpx_ports/config.h | 16 + .../libvpx/vpx_ports/emmintrin_compat.h | 55 + thirdparty/libvpx/vpx_ports/emms.asm | 38 + thirdparty/libvpx/vpx_ports/mem.h | 53 + thirdparty/libvpx/vpx_ports/mem_ops.h | 226 + thirdparty/libvpx/vpx_ports/mem_ops_aligned.h | 169 + thirdparty/libvpx/vpx_ports/msvc.h | 32 + thirdparty/libvpx/vpx_ports/system_state.h | 22 + thirdparty/libvpx/vpx_ports/vpx_once.h | 150 + thirdparty/libvpx/vpx_ports/vpx_timer.h | 120 + thirdparty/libvpx/vpx_ports/x86.h | 330 ++ .../libvpx/vpx_ports/x86_abi_support.asm | 404 ++ .../libvpx/vpx_scale/generic/yv12config.c | 287 ++ .../libvpx/vpx_scale/generic/yv12extend.c | 324 ++ thirdparty/libvpx/vpx_scale/vpx_scale.h | 27 + thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c | 18 + thirdparty/libvpx/vpx_scale/yv12config.h | 105 + thirdparty/libvpx/vpx_util/endian_inl.h | 120 + thirdparty/libvpx/vpx_util/vpx_thread.c | 184 + thirdparty/libvpx/vpx_util/vpx_thread.h | 369 ++ thirdparty/libvpx/vpx_version.h | 7 + 275 files changed, 83150 insertions(+) create mode 100644 thirdparty/libvpx/AUTHORS create mode 100644 thirdparty/libvpx/CHANGELOG create mode 100644 thirdparty/libvpx/LICENSE create mode 100644 thirdparty/libvpx/PATENTS create mode 100644 thirdparty/libvpx/third_party/x86inc/LICENSE create mode 100644 thirdparty/libvpx/third_party/x86inc/README.libvpx create mode 100644 thirdparty/libvpx/third_party/x86inc/x86inc.asm create mode 100644 thirdparty/libvpx/vp8/common/alloccommon.c create mode 100644 thirdparty/libvpx/vp8/common/alloccommon.h create mode 100644 thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c create mode 100644 thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c create mode 100644 thirdparty/libvpx/vp8/common/blockd.c create mode 100644 thirdparty/libvpx/vp8/common/blockd.h create mode 100644 thirdparty/libvpx/vp8/common/coefupdateprobs.h create mode 100644 thirdparty/libvpx/vp8/common/common.h create mode 100644 thirdparty/libvpx/vp8/common/copy_c.c create mode 100644 thirdparty/libvpx/vp8/common/debugmodes.c create mode 100644 thirdparty/libvpx/vp8/common/default_coef_probs.h create mode 100644 thirdparty/libvpx/vp8/common/dequantize.c create mode 100644 thirdparty/libvpx/vp8/common/entropy.c create mode 100644 thirdparty/libvpx/vp8/common/entropy.h create mode 100644 thirdparty/libvpx/vp8/common/entropymode.c create mode 100644 thirdparty/libvpx/vp8/common/entropymode.h create mode 100644 thirdparty/libvpx/vp8/common/entropymv.c create mode 100644 thirdparty/libvpx/vp8/common/entropymv.h create mode 100644 thirdparty/libvpx/vp8/common/extend.c create mode 100644 thirdparty/libvpx/vp8/common/extend.h create mode 100644 thirdparty/libvpx/vp8/common/filter.c create mode 100644 thirdparty/libvpx/vp8/common/filter.h create mode 100644 thirdparty/libvpx/vp8/common/findnearmv.c create mode 100644 thirdparty/libvpx/vp8/common/findnearmv.h create mode 100644 thirdparty/libvpx/vp8/common/generic/systemdependent.c create mode 100644 thirdparty/libvpx/vp8/common/header.h create mode 100644 thirdparty/libvpx/vp8/common/idct_blk.c create mode 100644 thirdparty/libvpx/vp8/common/idctllm.c create mode 100644 thirdparty/libvpx/vp8/common/invtrans.h create mode 100644 thirdparty/libvpx/vp8/common/loopfilter.h create mode 100644 thirdparty/libvpx/vp8/common/loopfilter_filters.c create mode 100644 thirdparty/libvpx/vp8/common/mbpitch.c create mode 100644 thirdparty/libvpx/vp8/common/modecont.c create mode 100644 thirdparty/libvpx/vp8/common/modecont.h create mode 100644 thirdparty/libvpx/vp8/common/mv.h create mode 100644 thirdparty/libvpx/vp8/common/onyxc_int.h create mode 100644 thirdparty/libvpx/vp8/common/onyxd.h create mode 100644 thirdparty/libvpx/vp8/common/ppflags.h create mode 100644 thirdparty/libvpx/vp8/common/quant_common.c create mode 100644 thirdparty/libvpx/vp8/common/quant_common.h create mode 100644 thirdparty/libvpx/vp8/common/reconinter.c create mode 100644 thirdparty/libvpx/vp8/common/reconinter.h create mode 100644 thirdparty/libvpx/vp8/common/reconintra.c create mode 100644 thirdparty/libvpx/vp8/common/reconintra.h create mode 100644 thirdparty/libvpx/vp8/common/reconintra4x4.c create mode 100644 thirdparty/libvpx/vp8/common/reconintra4x4.h create mode 100644 thirdparty/libvpx/vp8/common/rtcd.c create mode 100644 thirdparty/libvpx/vp8/common/setupintrarecon.c create mode 100644 thirdparty/libvpx/vp8/common/setupintrarecon.h create mode 100644 thirdparty/libvpx/vp8/common/swapyv12buffer.c create mode 100644 thirdparty/libvpx/vp8/common/swapyv12buffer.h create mode 100644 thirdparty/libvpx/vp8/common/systemdependent.h create mode 100644 thirdparty/libvpx/vp8/common/threading.h create mode 100644 thirdparty/libvpx/vp8/common/treecoder.c create mode 100644 thirdparty/libvpx/vp8/common/treecoder.h create mode 100644 thirdparty/libvpx/vp8/common/vp8_entropymodedata.h create mode 100644 thirdparty/libvpx/vp8/common/vp8_loopfilter.c create mode 100644 thirdparty/libvpx/vp8/common/x86/copy_sse2.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/copy_sse3.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/dequantize_mmx.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/filter_x86.c create mode 100644 thirdparty/libvpx/vp8/common/x86/filter_x86.h create mode 100644 thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c create mode 100644 thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c create mode 100644 thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/loopfilter_sse2.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c create mode 100644 thirdparty/libvpx/vp8/common/x86/recon_mmx.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/recon_sse2.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/subpixel_ssse3.asm create mode 100644 thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c create mode 100644 thirdparty/libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm create mode 100644 thirdparty/libvpx/vp8/decoder/dboolhuff.c create mode 100644 thirdparty/libvpx/vp8/decoder/dboolhuff.h create mode 100644 thirdparty/libvpx/vp8/decoder/decodeframe.c create mode 100644 thirdparty/libvpx/vp8/decoder/decodemv.c create mode 100644 thirdparty/libvpx/vp8/decoder/decodemv.h create mode 100644 thirdparty/libvpx/vp8/decoder/decoderthreading.h create mode 100644 thirdparty/libvpx/vp8/decoder/detokenize.c create mode 100644 thirdparty/libvpx/vp8/decoder/detokenize.h create mode 100644 thirdparty/libvpx/vp8/decoder/onyxd_if.c create mode 100644 thirdparty/libvpx/vp8/decoder/onyxd_int.h create mode 100644 thirdparty/libvpx/vp8/decoder/threading.c create mode 100644 thirdparty/libvpx/vp8/decoder/treereader.h create mode 100644 thirdparty/libvpx/vp8/vp8_dx_iface.c create mode 100644 thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c create mode 100644 thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_alloccommon.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_alloccommon.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_blockd.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_blockd.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_common.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_common_data.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_common_data.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_debugmodes.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_entropy.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_entropy.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_entropymode.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_entropymode.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_entropymv.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_entropymv.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_enums.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_filter.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_filter.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_frame_buffers.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_frame_buffers.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_idct.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_idct.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_loopfilter.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_loopfilter.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_mv.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_mvref_common.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_mvref_common.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_onyxc_int.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_ppflags.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_pred_common.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_pred_common.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_quant_common.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_quant_common.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_reconinter.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_reconinter.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_reconintra.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_reconintra.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_rtcd.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_scale.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_scale.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_scan.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_scan.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_seg_common.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_seg_common.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_thread_common.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_thread_common.h create mode 100644 thirdparty/libvpx/vp9/common/vp9_tile_common.c create mode 100644 thirdparty/libvpx/vp9/common/vp9_tile_common.h create mode 100644 thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decodemv.c create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decodemv.h create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decoder.c create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_decoder.h create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_detokenize.c create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_detokenize.h create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_dthread.c create mode 100644 thirdparty/libvpx/vp9/decoder/vp9_dthread.h create mode 100644 thirdparty/libvpx/vp9/vp9_dx_iface.c create mode 100644 thirdparty/libvpx/vp9/vp9_dx_iface.h create mode 100644 thirdparty/libvpx/vp9/vp9_iface_common.h create mode 100644 thirdparty/libvpx/vpx/internal/vpx_codec_internal.h create mode 100644 thirdparty/libvpx/vpx/internal/vpx_psnr.h create mode 100644 thirdparty/libvpx/vpx/src/vpx_codec.c create mode 100644 thirdparty/libvpx/vpx/src/vpx_decoder.c create mode 100644 thirdparty/libvpx/vpx/src/vpx_image.c create mode 100644 thirdparty/libvpx/vpx/src/vpx_psnr.c create mode 100644 thirdparty/libvpx/vpx/vp8.h create mode 100644 thirdparty/libvpx/vpx/vp8dx.h create mode 100644 thirdparty/libvpx/vpx/vpx_codec.h create mode 100644 thirdparty/libvpx/vpx/vpx_decoder.h create mode 100644 thirdparty/libvpx/vpx/vpx_encoder.h create mode 100644 thirdparty/libvpx/vpx/vpx_frame_buffer.h create mode 100644 thirdparty/libvpx/vpx/vpx_image.h create mode 100644 thirdparty/libvpx/vpx/vpx_integer.h create mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/intrapred_neon_asm.asm create mode 100644 thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm create mode 100644 thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/save_reg_neon.asm create mode 100644 thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c create mode 100644 thirdparty/libvpx/vpx_dsp/bitreader.c create mode 100644 thirdparty/libvpx/vpx_dsp/bitreader.h create mode 100644 thirdparty/libvpx/vpx_dsp/bitreader_buffer.c create mode 100644 thirdparty/libvpx/vpx_dsp/bitreader_buffer.h create mode 100644 thirdparty/libvpx/vpx_dsp/intrapred.c create mode 100644 thirdparty/libvpx/vpx_dsp/inv_txfm.c create mode 100644 thirdparty/libvpx/vpx_dsp/inv_txfm.h create mode 100644 thirdparty/libvpx/vpx_dsp/loopfilter.c create mode 100644 thirdparty/libvpx/vpx_dsp/prob.c create mode 100644 thirdparty/libvpx/vpx_dsp/prob.h create mode 100644 thirdparty/libvpx/vpx_dsp/txfm_common.h create mode 100644 thirdparty/libvpx/vpx_dsp/vpx_convolve.c create mode 100644 thirdparty/libvpx/vpx_dsp/vpx_convolve.h create mode 100644 thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h create mode 100644 thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c create mode 100644 thirdparty/libvpx/vpx_dsp/vpx_filter.h create mode 100644 thirdparty/libvpx/vpx_dsp/x86/convolve.h create mode 100644 thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm create mode 100644 thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm create mode 100644 thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c create mode 100644 thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h create mode 100644 thirdparty/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm create mode 100644 thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm create mode 100644 thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c create mode 100644 thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c create mode 100644 thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h create mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c create mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm create mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c create mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c create mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm create mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm create mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm create mode 100644 thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm create mode 100644 thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h create mode 100644 thirdparty/libvpx/vpx_mem/vpx_mem.c create mode 100644 thirdparty/libvpx/vpx_mem/vpx_mem.h create mode 100644 thirdparty/libvpx/vpx_ports/arm.h create mode 100644 thirdparty/libvpx/vpx_ports/arm_cpudetect.c create mode 100644 thirdparty/libvpx/vpx_ports/bitops.h create mode 100644 thirdparty/libvpx/vpx_ports/config.h create mode 100644 thirdparty/libvpx/vpx_ports/emmintrin_compat.h create mode 100644 thirdparty/libvpx/vpx_ports/emms.asm create mode 100644 thirdparty/libvpx/vpx_ports/mem.h create mode 100644 thirdparty/libvpx/vpx_ports/mem_ops.h create mode 100644 thirdparty/libvpx/vpx_ports/mem_ops_aligned.h create mode 100644 thirdparty/libvpx/vpx_ports/msvc.h create mode 100644 thirdparty/libvpx/vpx_ports/system_state.h create mode 100644 thirdparty/libvpx/vpx_ports/vpx_once.h create mode 100644 thirdparty/libvpx/vpx_ports/vpx_timer.h create mode 100644 thirdparty/libvpx/vpx_ports/x86.h create mode 100644 thirdparty/libvpx/vpx_ports/x86_abi_support.asm create mode 100644 thirdparty/libvpx/vpx_scale/generic/yv12config.c create mode 100644 thirdparty/libvpx/vpx_scale/generic/yv12extend.c create mode 100644 thirdparty/libvpx/vpx_scale/vpx_scale.h create mode 100644 thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c create mode 100644 thirdparty/libvpx/vpx_scale/yv12config.h create mode 100644 thirdparty/libvpx/vpx_util/endian_inl.h create mode 100644 thirdparty/libvpx/vpx_util/vpx_thread.c create mode 100644 thirdparty/libvpx/vpx_util/vpx_thread.h create mode 100644 thirdparty/libvpx/vpx_version.h diff --git a/thirdparty/README.md b/thirdparty/README.md index 1de2949d442..0b0917c1e90 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -114,6 +114,13 @@ Files extracted from upstream source: - COPYING +## libvpx + +- Upstream: http://www.webmproject.org/code/ +- Version: 1.6.0 +- License: BSD-3-Clause + + ## libwebp - Upstream: https://chromium.googlesource.com/webm/libwebp/ diff --git a/thirdparty/libvpx/AUTHORS b/thirdparty/libvpx/AUTHORS new file mode 100644 index 00000000000..fcd5c534a8c --- /dev/null +++ b/thirdparty/libvpx/AUTHORS @@ -0,0 +1,142 @@ +# This file is automatically generated from the git commit history +# by tools/gen_authors.sh. + +Aaron Watry +Abo Talib Mahfoodh +Adam Xu +Adrian Grange +Aℓex Converse +Ahmad Sharif +Alexander Voronov +Alexis Ballier +Alok Ahuja +Alpha Lam +A.Mahfoodh +Ami Fischman +Andoni Morales Alastruey +Andres Mejia +Andrew Russell +Angie Chiang +Aron Rosenberg +Attila Nagy +Brion Vibber +changjun.yang +Charles 'Buck' Krasic +chm +Christian Duvivier +Daniele Castagna +Daniel Kang +Deb Mukherjee +Dim Temp +Dmitry Kovalev +Dragan Mrdjan +Ed Baker +Ehsan Akhgari +Erik Niemeyer +Fabio Pedretti +Frank Galligan +Fredrik Söderquist +Fritz Koenig +Gaute Strokkenes +Geza Lore +Ghislain MARY +Giuseppe Scrivano +Gordana Cmiljanovic +Guillaume Martres +Guillermo Ballester Valor +Hangyu Kuang +Hanno Böck +Henrik Lundin +Hui Su +Ivan Maltz +Jacek Caban +Jacky Chen +James Berry +James Yu +James Zern +Jan Gerber +Jan Kratochvil +Janne Salonen +Jean-Yves Avenard +Jeff Faust +Jeff Muizelaar +Jeff Petkau +Jia Jia +Jian Zhou +Jim Bankoski +Jingning Han +Joey Parrish +Johann Koenig +John Koleszar +Johnny Klonaris +John Stark +Joshua Bleecher Snyder +Joshua Litt +Julia Robson +Justin Clift +Justin Lebar +KO Myung-Hun +Lawrence Velázquez +Linfeng Zhang +Lou Quillio +Luca Barbato +Makoto Kato +Mans Rullgard +Marco Paniconi +Mark Mentovai +Martin Ettl +Martin Storsjo +Matthew Heaney +Michael Kohler +Mike Frysinger +Mike Hommey +Mikhal Shemer +Minghai Shang +Morton Jonuschat +Nico Weber +Parag Salasakar +Pascal Massimino +Patrik Westin +Paul Wilkins +Pavol Rusnak +Paweł Hajdan +Pengchong Jin +Peter de Rivaz +Philip Jägenstedt +Priit Laes +Rafael Ávila de Espíndola +Rafaël Carré +Ralph Giles +Rob Bradford +Ronald S. Bultje +Rui Ueyama +Sami Pietilä +Sasi Inguva +Scott Graham +Scott LaVarnway +Sean McGovern +Sergey Kolomenkin +Sergey Ulanov +Shimon Doodkin +Shunyao Li +Stefan Holmer +Suman Sunkara +Taekhyun Kim +Takanori MATSUURA +Tamar Levy +Tao Bai +Tero Rintaluoma +Thijs Vermeir +Tim Kopp +Timothy B. Terriberry +Tom Finegan +Vignesh Venkatasubramanian +Yaowu Xu +Yi Luo +Yongzhe Wang +Yunqing Wang +Yury Gitman +Zoe Liu +Google Inc. +The Mozilla Foundation +The Xiph.Org Foundation diff --git a/thirdparty/libvpx/CHANGELOG b/thirdparty/libvpx/CHANGELOG new file mode 100644 index 00000000000..795d395f96a --- /dev/null +++ b/thirdparty/libvpx/CHANGELOG @@ -0,0 +1,654 @@ +2016-07-20 v1.6.0 "Khaki Campbell Duck" + This release improves upon the VP9 encoder and speeds up the encoding and + decoding processes. + + - Upgrading: + This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum + in vpx_image and some minor changes to the VP8_COMP structure. + + The default key frame interval for VP9 has changed from 128 to 9999. + + - Enhancement: + A core focus has been performance for low end Intel processors. SSSE3 + instructions such as 'pshufb' have been avoided and instructions have been + reordered to better accommodate the more constrained pipelines. + + As a result, devices based on Celeron processors have seen substantial + decoding improvements. From Indian Runner Duck to Javan Whistling Duck, + decoding speed improved between 10 and 30%. Between Javan Whistling Duck + and Khaki Campbell Duck, it improved another 10 to 15%. + + While Celeron benefited most, Core-i5 also improved 5% and 10% between the + respective releases. + + Realtime performance for WebRTC for both speed and quality has received a + lot of attention. + + - Bug Fixes: + A number of fuzzing issues, found variously by Mozilla, Chromium and others, + have been fixed and we strongly recommend updating. + +2015-11-09 v1.5.0 "Javan Whistling Duck" + This release improves upon the VP9 encoder and speeds up the encoding and + decoding processes. + + - Upgrading: + This release is ABI incompatible with 1.4.0. It drops deprecated VP8 + controls and adds a variety of VP9 controls for testing. + + The vpxenc utility now prefers VP9 by default. + + - Enhancements: + Faster VP9 encoding and decoding + Smaller library size by combining functions used by VP8 and VP9 + + - Bug Fixes: + A variety of fuzzing issues + +2015-04-03 v1.4.0 "Indian Runner Duck" + This release includes significant improvements to the VP9 codec. + + - Upgrading: + This release is ABI incompatible with 1.3.0. It drops the compatibility + layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec + controls for VP9. + + - Enhancements: + Faster VP9 encoding and decoding + Multithreaded VP9 decoding (tile and frame-based) + Multithreaded VP9 encoding - on by default + YUV 4:2:2 and 4:4:4 support in VP9 + 10 and 12bit support in VP9 + 64bit ARM support by replacing ARM assembly with intrinsics + + - Bug Fixes: + Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0 + files. + + - Known Issues: + Frame Parallel decoding fails for segmented and non-420 files. + +2013-11-15 v1.3.0 "Forest" + This release introduces the VP9 codec in a backward-compatible way. + All existing users of VP8 can continue to use the library without + modification. However, some VP8 options do not map to VP9 in the same manner. + + The VP9 encoder in this release is not feature complete. Users interested in + the encoder are advised to use the git master branch and discuss issues on + libvpx mailing lists. + + - Upgrading: + This release is ABI and API compatible with Duclair (v1.0.0). Users + of older releases should refer to the Upgrading notes in this document + for that release. + + - Enhancements: + Get rid of bashisms in the main build scripts + Added usage info on command line options + Add lossless compression mode + Dll build of libvpx + Add additional Mac OS X targets: 10.7, 10.8 and 10.9 (darwin11-13) + Add option to disable documentation + configure: add --enable-external-build support + make: support V=1 as short form of verbose=yes + configure: support mingw-w64 + configure: support hardfloat armv7 CHOSTS + configure: add support for android x86 + Add estimated completion time to vpxenc + Don't exit on decode errors in vpxenc + vpxenc: support scaling prior to encoding + vpxdec: support scaling output + vpxenc: improve progress indicators with --skip + msvs: Don't link to winmm.lib + Add a new script for producing vcxproj files + Produce Visual Studio 10 and 11 project files + Produce Windows Phone project files + msvs-build: use msbuild for vs >= 2005 + configure: default configure log to config.log + Add encoding option --static-thresh + + - Speed: + Miscellaneous speed optimizations for VP8 and VP9. + + - Quality: + In general, quality is consistent with the Eider release. + + - Bug Fixes: + This release represents approximately a year of engineering effort, + and contains multiple bug fixes. Please refer to git history for details. + + +2012-12-21 v1.2.0 + This release acts as a checkpoint for a large amount of internal refactoring + and testing. It also contains a number of small bugfixes, so all users are + encouraged to upgrade. + + - Upgrading: + This release is ABI and API compatible with Duclair (v1.0.0). Users + of older releases should refer to the Upgrading notes in this + document for that release. + + - Enhancements: + VP8 optimizations for MIPS dspr2 + vpxenc: add -quiet option + + - Speed: + Encoder and decoder speed is consistent with the Eider release. + + - Quality: + In general, quality is consistent with the Eider release. + + Minor tweaks to ARNR filtering + Minor improvements to real time encoding with multiple temporal layers + + - Bug Fixes: + Fixes multithreaded encoder race condition in loopfilter + Fixes multi-resolution threaded encoding + Fix potential encoder dead-lock after picture resize + + +2012-05-09 v1.1.0 "Eider" + This introduces a number of enhancements, mostly focused on real-time + encoding. In addition, it fixes a decoder bug (first introduced in + Duclair) so all users of that release are encouraged to upgrade. + + - Upgrading: + This release is ABI and API compatible with Duclair (v1.0.0). Users + of older releases should refer to the Upgrading notes in this + document for that release. + + This release introduces a new temporal denoiser, controlled by the + VP8E_SET_NOISE_SENSITIVITY control. The temporal denoiser does not + currently take a strength parameter, so the control is effectively + a boolean - zero (off) or non-zero (on). For compatibility with + existing applications, the values accepted are the same as those + for the spatial denoiser (0-6). The temporal denoiser is enabled + by default, and the older spatial denoiser may be restored by + configuring with --disable-temporal-denoising. The temporal denoiser + is more computationally intensive than the spatial one. + + This release removes support for a legacy, decode only API that was + supported, but deprecated, at the initial release of libvpx + (v0.9.0). This is not expected to have any impact. If you are + impacted, you can apply a reversion to commit 2bf8fb58 locally. + Please update to the latest libvpx API if you are affected. + + - Enhancements: + Adds a motion compensated temporal denoiser to the encoder, which + gives higher quality than the older spatial denoiser. (See above + for notes on upgrading). + + In addition, support for new compilers and platforms were added, + including: + improved support for XCode + Android x86 NDK build + OS/2 support + SunCC support + + Changing resolution with vpx_codec_enc_config_set() is now + supported. Previously, reinitializing the codec was required to + change the input resolution. + + The vpxenc application has initial support for producing multiple + encodes from the same input in one call. Resizing is not yet + supported, but varying other codec parameters is. Use -- to + delineate output streams. Options persist from one stream to the + next. + + Also, the vpxenc application will now use a keyframe interval of + 5 seconds by default. Use the --kf-max-dist option to override. + + - Speed: + Decoder performance improved 2.5% versus Duclair. Encoder speed is + consistent with Duclair for most material. Two pass encoding of + slideshow-like material will see significant improvements. + + Large realtime encoding speed gains at a small quality expense are + possible by configuring the on-the-fly bitpacking experiment with + --enable-onthefly-bitpacking. Realtime encoder can be up to 13% + faster (ARM) depending on the number of threads and bitrate + settings. This technique sees constant gain over the 5-16 speed + range. For VC style input the loss seen is up to 0.2dB. See commit + 52cf4dca for further details. + + - Quality: + On the whole, quality is consistent with the Duclair release. Some + tweaks: + + Reduced blockiness in easy sections by applying a penalty to + intra modes. + + Improved quality of static sections (like slideshows) with + two pass encoding. + + Improved keyframe sizing with multiple temporal layers + + - Bug Fixes: + Corrected alt-ref contribution to frame rate for visible updates + to the alt-ref buffer. This affected applications making manual + usage of the frame reference flags, or temporal layers. + + Additional constraints were added to disable multi-frame quality + enhancement (MFQE) in sections of the frame where there is motion. + (#392) + + Fixed corruption issues when vpx_codec_enc_config_set() was called + with spatial resampling enabled. + + Fixed a decoder error introduced in Duclair where the segmentation + map was not being reinitialized on keyframes (#378) + + +2012-01-27 v1.0.0 "Duclair" + Our fourth named release, focused on performance and features related to + real-time encoding. It also fixes a decoder crash bug introduced in + v0.9.7, so all users of that release are encouraged to upgrade. + + - Upgrading: + This release is ABI incompatible with prior releases of libvpx, so the + "major" version number has been bumped to 1. You must recompile your + applications against the latest version of the libvpx headers. The + API remains compatible, and this should not require code changes in most + applications. + + - Enhancements: + This release introduces several substantial new features to the encoder, + of particular interest to real time streaming applications. + + Temporal scalability allows the encoder to produce a stream that can + be decimated to different frame rates, with independent rate targetting + for each substream. + + Multiframe quality enhancement postprocessing can make visual quality + more consistent in the presence of frames that are substantially + different quality than the surrounding frames, as in the temporal + scalability case and in some forced keyframe scenarios. + + Multiple-resolution encoding support allows the encoding of the + same content at different resolutions faster than encoding them + separately. + + - Speed: + Optimization targets for this release included the decoder and the real- + time modes of the encoder. Decoder speed on x86 has improved 10.5% with + this release. Encoder improvements followed a curve where speeds 1-3 + improved 4.0%-1.5%, speeds 4-8 improved <1%, and speeds 9-16 improved + 1.5% to 10.5%, respectively. "Best" mode speed is consistent with the + Cayuga release. + + - Quality: + Encoder quality in the single stream case is consistent with the Cayuga + release. + + - Bug Fixes: + This release fixes an OOB read decoder crash bug present in v0.9.7 + related to the clamping of motion vectors in SPLITMV blocks. This + behavior could be triggered by corrupt input or by starting + decoding from a P-frame. + + +2011-08-15 v0.9.7-p1 "Cayuga" patch 1 + This is an incremental bugfix release against Cayuga. All users of that + release are strongly encouraged to upgrade. + + - Fix potential OOB reads (cdae03a) + + An unbounded out of bounds read was discovered when the + decoder was requested to perform error concealment (new in + Cayuga) given a frame with corrupt partition sizes. + + A bounded out of bounds read was discovered affecting all + versions of libvpx. Given an multipartition input frame that + is truncated between the mode/mv partition and the first + residiual paritition (in the block of partition offsets), up + to 3 extra bytes could have been read from the source buffer. + The code will not take any action regardless of the contents + of these undefined bytes, as the truncated buffer is detected + immediately following the read based on the calculated + starting position of the coefficient partition. + + - Fix potential error concealment crash when the very first frame + is missing or corrupt (a609be5) + + - Fix significant artifacts in error concealment (a4c2211, 99d870a) + + - Revert 1-pass CBR rate control changes (e961317) + Further testing showed this change produced undesirable visual + artifacts, rolling back for now. + + +2011-08-02 v0.9.7 "Cayuga" + Our third named release, focused on a faster, higher quality, encoder. + + - Upgrading: + This release is backwards compatible with Aylesbury (v0.9.5) and + Bali (v0.9.6). Users of older releases should refer to the Upgrading + notes in this document for that release. + + - Enhancements: + Stereo 3D format support for vpxenc + Runtime detection of available processor cores. + Allow specifying --end-usage by enum name + vpxdec: test for frame corruption + vpxenc: add quantizer histogram display + vpxenc: add rate histogram display + Set VPX_FRAME_IS_DROPPABLE + update configure for ios sdk 4.3 + Avoid text relocations in ARM vp8 decoder + Generate a vpx.pc file for pkg-config. + New ways of passing encoded data between encoder and decoder. + + - Speed: + This release includes across-the-board speed improvements to the + encoder. On x86, these measure at approximately 11.5% in Best mode, + 21.5% in Good mode (speed 0), and 22.5% in Realtime mode (speed 6). + On ARM Cortex A9 with Neon extensions, real-time encoding of video + telephony content is 35% faster than Bali on single core and 48% + faster on multi-core. On the NVidia Tegra2 platform, real time + encoding is 40% faster than Bali. + + Decoder speed was not a priority for this release, but improved + approximately 8.4% on x86. + + Reduce motion vector search on alt-ref frame. + Encoder loopfilter running in its own thread + Reworked loopfilter to precalculate more parameters + SSE2/SSSE3 optimizations for build_predictors_mbuv{,_s}(). + Make hor UV predict ~2x faster (73 vs 132 cycles) using SSSE3. + Removed redundant checks + Reduced structure sizes + utilize preload in ARMv6 MC/LPF/Copy routines + ARM optimized quantization, dfct, variance, subtract + Increase chrow row alignment to 16 bytes. + disable trellis optimization for first pass + Write SSSE3 sub-pixel filter function + Improve SSE2 half-pixel filter funtions + Add vp8_sub_pixel_variance16x8_ssse3 function + Reduce unnecessary distortion computation + Use diamond search to replace full search + Preload reference area in sub-pixel motion search (real-time mode) + + - Quality: + This release focused primarily on one-pass use cases, including + video conferencing. Low latency data rate control was significantly + improved, improving streamability over bandwidth constrained links. + Added support for error concealment, allowing frames to maintain + visual quality in the presence of substantial packet loss. + + Add rc_max_intra_bitrate_pct control + Limit size of initial keyframe in one-pass. + Improve framerate adaptation + Improved 1-pass CBR rate control + Improved KF insertion after fades to still. + Improved key frame detection. + Improved activity masking (lower PSNR impact for same SSIM boost) + Improved interaction between GF and ARFs + Adding error-concealment to the decoder. + Adding support for independent partitions + Adjusted rate-distortion constants + + + - Bug Fixes: + Removed firstpass motion map + Fix parallel make install + Fix multithreaded encoding for 1 MB wide frame + Fixed iwalsh_neon build problems with RVDS4.1 + Fix semaphore emulation, spin-wait intrinsics on Windows + Fix build with xcode4 and simplify GLOBAL. + Mark ARM asm objects as allowing a non-executable stack. + Fix vpxenc encoding incorrect webm file header on big endian + + +2011-03-07 v0.9.6 "Bali" + Our second named release, focused on a faster, higher quality, encoder. + + - Upgrading: + This release is backwards compatible with Aylesbury (v0.9.5). Users + of older releases should refer to the Upgrading notes in this + document for that release. + + - Enhancements: + vpxenc --psnr shows a summary when encode completes + --tune=ssim option to enable activity masking + improved postproc visualizations for development + updated support for Apple iOS to SDK 4.2 + query decoder to determine which reference frames were updated + implemented error tracking in the decoder + fix pipe support on windows + + - Speed: + Primary focus was on good quality mode, speed 0. Average improvement + on x86 about 40%, up to 100% on user-generated content at that speed. + Best quality mode speed improved 35%, and realtime speed 10-20%. This + release also saw significant improvement in realtime encoding speed + on ARM platforms. + + Improved encoder threading + Dont pick encoder filter level when loopfilter is disabled. + Avoid double copying of key frames into alt and golden buffer + FDCT optimizations. + x86 sse2 temporal filter + SSSE3 version of fast quantizer + vp8_rd_pick_best_mbsegmentation code restructure + Adjusted breakout RD for SPLITMV + Changed segmentation check order + Improved rd_pick_intra4x4block + Adds armv6 optimized variance calculation + ARMv6 optimized sad16x16 + ARMv6 optimized half pixel variance calculations + Full search SAD function optimization in SSE4.1 + Improve MV prediction accuracy to achieve performance gain + Improve MV prediction in vp8_pick_inter_mode() for speed>3 + + - Quality: + Best quality mode improved PSNR 6.3%, and SSIM 6.1%. This release + also includes support for "activity masking," which greatly improves + SSIM at the expense of PSNR. For now, this feature is available with + the --tune=ssim option. Further experimentation in this area + is ongoing. This release also introduces a new rate control mode + called "CQ," which changes the allocation of bits within a clip to + the sections where they will have the most visual impact. + + Tuning for the more exact quantizer. + Relax rate control for last few frames + CQ Mode + Limit key frame quantizer for forced key frames. + KF/GF Pulsing + Add simple version of activity masking. + make rdmult adaptive for intra in quantizer RDO + cap the best quantizer for 2nd order DC + change the threshold of DC check for encode breakout + + - Bug Fixes: + Fix crash on Sparc Solaris. + Fix counter of fixed keyframe distance + ARNR filter pointer update bug fix + Fixed use of motion percentage in KF/GF group calc + Changed condition for using RD in Intra Mode + Fix encoder real-time only configuration. + Fix ARM encoder crash with multiple token partitions + Fixed bug first cluster timecode of webm file is wrong. + Fixed various encoder bugs with odd-sized images + vp8e_get_preview fixed when spatial resampling enabled + quantizer: fix assertion in fast quantizer path + Allocate source buffers to be multiples of 16 + Fix for manual Golden frame frequency + Fix drastic undershoot in long form content + + +2010-10-28 v0.9.5 "Aylesbury" + Our first named release, focused on a faster decoder, and a better encoder. + + - Upgrading: + This release incorporates backwards-incompatible changes to the + ivfenc and ivfdec tools. These tools are now called vpxenc and vpxdec. + + vpxdec + * the -q (quiet) option has been removed, and replaced with + -v (verbose). the output is quiet by default. Use -v to see + the version number of the binary. + + * The default behavior is now to write output to a single file + instead of individual frames. The -y option has been removed. + Y4M output is the default. + + * For raw I420/YV12 output instead of Y4M, the --i420 or --yv12 + options must be specified. + + $ ivfdec -o OUTPUT INPUT + $ vpxdec --i420 -o OUTPUT INPUT + + * If an output file is not specified, the default is to write + Y4M to stdout. This makes piping more natural. + + $ ivfdec -y -o - INPUT | ... + $ vpxdec INPUT | ... + + * The output file has additional flexibility for formatting the + filename. It supports escape characters for constructing a + filename from the width, height, and sequence number. This + replaces the -p option. To get the equivalent: + + $ ivfdec -p frame INPUT + $ vpxdec --i420 -o frame-%wx%h-%4.i420 INPUT + + vpxenc + * The output file must be specified with -o, rather than as the + last argument. + + $ ivfenc INPUT OUTPUT + $ vpxenc -o OUTPUT INPUT + + * The output defaults to webm. To get IVF output, use the --ivf + option. + + $ ivfenc INPUT OUTPUT.ivf + $ vpxenc -o OUTPUT.ivf --ivf INPUT + + + - Enhancements: + ivfenc and ivfdec have been renamed to vpxenc, vpxdec. + vpxdec supports .webm input + vpxdec writes .y4m by default + vpxenc writes .webm output by default + vpxenc --psnr now shows the average/overall PSNR at the end + ARM platforms now support runtime cpu detection + vpxdec visualizations added for motion vectors, block modes, references + vpxdec now silent by default + vpxdec --progress shows frame-by-frame timing information + vpxenc supports the distinction between --fps and --timebase + NASM is now a supported assembler + configure: enable PIC for shared libs by default + configure: add --enable-small + configure: support for ppc32-linux-gcc + configure: support for sparc-solaris-gcc + + - Bugs: + Improve handling of invalid frames + Fix valgrind errors in the NEON loop filters. + Fix loopfilter delta zero transitions + Fix valgrind errors in vp8_sixtap_predict8x4_armv6(). + Build fixes for darwin-icc + + - Speed: + 20-40% (average 28%) improvement in libvpx decoder speed, + including: + Rewrite vp8_short_walsh4x4_sse2() + Optimizations on the loopfilters. + Miscellaneous improvements for Atom + Add 4-tap version of 2nd-pass ARMv6 MC filter. + Improved multithread utilization + Better instruction choices on x86 + reorder data to use wider instructions + Update NEON wide idcts + Make block access to frame buffer sequential + Improved subset block search + Bilinear subpixel optimizations for ssse3. + Decrease memory footprint + + Encoder speed improvements (percentage gain not measured): + Skip unnecessary search of identical frames + Add SSE2 subtract functions + Improve bounds checking in vp8_diamond_search_sadx4() + Added vp8_fast_quantize_b_sse2 + + - Quality: + Over 7% overall PSNR improvement (6.3% SSIM) in "best" quality + encoding mode, and up to 60% improvement on very noisy, still + or slow moving source video + + Motion compensated temporal filter for Alt-Ref Noise Reduction + Improved use of trellis quantization on 2nd order Y blocks + Tune effect of motion on KF/GF boost in two pass + Allow coefficient optimization for good quality speed 0. + Improved control of active min quantizer for two pass. + Enable ARFs for non-lagged compress + +2010-09-02 v0.9.2 + - Enhancements: + Disable frame dropping by default + Improved multithreaded performance + Improved Force Key Frame Behaviour + Increased rate control buffer level precision + Fix bug in 1st pass motion compensation + ivfenc: correct fixed kf interval, --disable-kf + - Speed: + Changed above and left context data layout + Rework idct calling structure. + Removed unnecessary MB_MODE_INFO copies + x86: SSSE3 sixtap prediction + Reworked IDCT to include reconstruction (add) step + Swap alt/gold/new/last frame buffer ptrs instead of copying. + Improve SSE2 loopfilter functions + Change bitreader to use a larger window. + Avoid loopfilter reinitialization when possible + - Quality: + Normalize quantizer's zero bin and rounding factors + Add trellis quantization. + Make the quantizer exact. + Updates to ARNR filtering algorithm + Fix breakout thresh computation for golden & AltRef frames + Redo the forward 4x4 dct + Improve the accuracy of forward walsh-hadamard transform + Further adjustment of RD behaviour with Q and Zbin. + - Build System: + Allow linking of libs built with MinGW to MSVC + Fix target auto-detection on mingw32 + Allow --cpu= to work for x86. + configure: pass original arguments through to make dist + Fix builds without runtime CPU detection + msvs: fix install of codec sources + msvs: Change devenv.com command line for better msys support + msvs: Add vs9 targets. + Add x86_64-linux-icc target + - Bugs: + Potential crashes on older MinGW builds + Fix two-pass framrate for Y4M input. + Fixed simple loop filter, other crashes on ARM v6 + arm: fix missing dependency with --enable-shared + configure: support directories containing .o + Replace pinsrw (SSE) with MMX instructions + apple: include proper mach primatives + Fixed rate control bug with long key frame interval. + Fix DSO link errors on x86-64 when not using a version script + Fixed buffer selection for UV in AltRef filtering + + +2010-06-17 v0.9.1 + - Enhancements: + * ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O + * Speed optimizations + - Bugfixes: + * Rate control + * Prevent out-of-bounds accesses on invalid data + - Build system updates: + * Detect toolchain to be used automatically for native builds + * Support building shared libraries + * Better autotools emulation (--prefix, --libdir, DESTDIR) + - Updated LICENSE + * http://webmproject.blogspot.com/2010/06/changes-to-webm-open-source-license.html + + +2010-05-18 v0.9.0 + - Initial open source release. Welcome to WebM and VP8! + diff --git a/thirdparty/libvpx/LICENSE b/thirdparty/libvpx/LICENSE new file mode 100644 index 00000000000..1ce44343c4a --- /dev/null +++ b/thirdparty/libvpx/LICENSE @@ -0,0 +1,31 @@ +Copyright (c) 2010, The WebM Project authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of Google, nor the WebM Project, nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/thirdparty/libvpx/PATENTS b/thirdparty/libvpx/PATENTS new file mode 100644 index 00000000000..caedf607e95 --- /dev/null +++ b/thirdparty/libvpx/PATENTS @@ -0,0 +1,23 @@ +Additional IP Rights Grant (Patents) +------------------------------------ + +"These implementations" means the copyrightable works that implement the WebM +codecs distributed by Google as part of the WebM Project. + +Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge, +royalty-free, irrevocable (except as stated in this section) patent license to +make, have made, use, offer to sell, sell, import, transfer, and otherwise +run, modify and propagate the contents of these implementations of WebM, where +such license applies only to those patent claims, both currently owned by +Google and acquired in the future, licensable by Google that are necessarily +infringed by these implementations of WebM. This grant does not include claims +that would be infringed only as a consequence of further modification of these +implementations. If you or your agent or exclusive licensee institute or order +or agree to the institution of patent litigation or any other patent +enforcement activity against any entity (including a cross-claim or +counterclaim in a lawsuit) alleging that any of these implementations of WebM +or any code incorporated within any of these implementations of WebM +constitute direct or contributory patent infringement, or inducement of +patent infringement, then any patent rights granted to you under this License +for these implementations of WebM shall terminate as of the date such +litigation is filed. diff --git a/thirdparty/libvpx/third_party/x86inc/LICENSE b/thirdparty/libvpx/third_party/x86inc/LICENSE new file mode 100644 index 00000000000..7d07645a171 --- /dev/null +++ b/thirdparty/libvpx/third_party/x86inc/LICENSE @@ -0,0 +1,18 @@ +Copyright (C) 2005-2012 x264 project + +Authors: Loren Merritt + Anton Mitrofanov + Jason Garrett-Glaser + Henrik Gramner + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/thirdparty/libvpx/third_party/x86inc/README.libvpx b/thirdparty/libvpx/third_party/x86inc/README.libvpx new file mode 100644 index 00000000000..8d3cd966da1 --- /dev/null +++ b/thirdparty/libvpx/third_party/x86inc/README.libvpx @@ -0,0 +1,20 @@ +URL: https://git.videolan.org/git/x264.git +Version: d23d18655249944c1ca894b451e2c82c7a584c62 +License: ISC +License File: LICENSE + +Description: +x264/libav's framework for x86 assembly. Contains a variety of macros and +defines that help automatically allow assembly to work cross-platform. + +Local Modifications: +Get configuration from vpx_config.asm. +Prefix functions with vpx by default. +Manage name mangling (prefixing with '_') manually because 'PREFIX' does not + exist in libvpx. +Expand PIC default to macho64 and respect CONFIG_PIC from libvpx +Set 'private_extern' visibility for macho targets. +Copy PIC 'GLOBAL' macros from x86_abi_support.asm +Use .text instead of .rodata on macho to avoid broken tables in PIC mode. +Use .text with no alignment for aout +Only use 'hidden' visibility with Chromium diff --git a/thirdparty/libvpx/third_party/x86inc/x86inc.asm b/thirdparty/libvpx/third_party/x86inc/x86inc.asm new file mode 100644 index 00000000000..b647dff2f88 --- /dev/null +++ b/thirdparty/libvpx/third_party/x86inc/x86inc.asm @@ -0,0 +1,1649 @@ +;***************************************************************************** +;* x86inc.asm: x264asm abstraction layer +;***************************************************************************** +;* Copyright (C) 2005-2016 x264 project +;* +;* Authors: Loren Merritt +;* Anton Mitrofanov +;* Fiona Glaser +;* Henrik Gramner +;* +;* Permission to use, copy, modify, and/or distribute this software for any +;* purpose with or without fee is hereby granted, provided that the above +;* copyright notice and this permission notice appear in all copies. +;* +;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +;***************************************************************************** + +; This is a header file for the x264ASM assembly language, which uses +; NASM/YASM syntax combined with a large number of macros to provide easy +; abstraction between different calling conventions (x86_32, win64, linux64). +; It also has various other useful features to simplify writing the kind of +; DSP functions that are most often used in x264. + +; Unlike the rest of x264, this file is available under an ISC license, as it +; has significant usefulness outside of x264 and we want it to be available +; to the largest audience possible. Of course, if you modify it for your own +; purposes to add a new feature, we strongly encourage contributing a patch +; as this feature might be useful for others as well. Send patches or ideas +; to x264-devel@videolan.org . + +%include "vpx_config.asm" + +%ifndef private_prefix + %define private_prefix vpx +%endif + +%ifndef public_prefix + %define public_prefix private_prefix +%endif + +%ifndef STACK_ALIGNMENT + %if ARCH_X86_64 + %define STACK_ALIGNMENT 16 + %else + %define STACK_ALIGNMENT 4 + %endif +%endif + +%define WIN64 0 +%define UNIX64 0 +%if ARCH_X86_64 + %ifidn __OUTPUT_FORMAT__,win32 + %define WIN64 1 + %elifidn __OUTPUT_FORMAT__,win64 + %define WIN64 1 + %elifidn __OUTPUT_FORMAT__,x64 + %define WIN64 1 + %else + %define UNIX64 1 + %endif +%endif + +%define FORMAT_ELF 0 +%ifidn __OUTPUT_FORMAT__,elf + %define FORMAT_ELF 1 +%elifidn __OUTPUT_FORMAT__,elf32 + %define FORMAT_ELF 1 +%elifidn __OUTPUT_FORMAT__,elf64 + %define FORMAT_ELF 1 +%endif + +%define FORMAT_MACHO 0 +%ifidn __OUTPUT_FORMAT__,macho32 + %define FORMAT_MACHO 1 +%elifidn __OUTPUT_FORMAT__,macho64 + %define FORMAT_MACHO 1 +%endif + +; Set PREFIX for libvpx builds. +%if FORMAT_ELF + %undef PREFIX +%elif WIN64 + %undef PREFIX +%else + %define PREFIX +%endif + +%ifdef PREFIX + %define mangle(x) _ %+ x +%else + %define mangle(x) x +%endif + +; In some instances macho32 tables get misaligned when using .rodata. +; When looking at the disassembly it appears that the offset is either +; correct or consistently off by 90. Placing them in the .text section +; works around the issue. It appears to be specific to the way libvpx +; handles the tables. +%macro SECTION_RODATA 0-1 16 + %ifidn __OUTPUT_FORMAT__,macho32 + SECTION .text align=%1 + fakegot: + %elifidn __OUTPUT_FORMAT__,aout + SECTION .text + %else + SECTION .rodata align=%1 + %endif +%endmacro + +; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC" +; from original code is added in for 64bit. +%ifidn __OUTPUT_FORMAT__,elf32 +%define ABI_IS_32BIT 1 +%elifidn __OUTPUT_FORMAT__,macho32 +%define ABI_IS_32BIT 1 +%elifidn __OUTPUT_FORMAT__,win32 +%define ABI_IS_32BIT 1 +%elifidn __OUTPUT_FORMAT__,aout +%define ABI_IS_32BIT 1 +%else +%define ABI_IS_32BIT 0 +%endif + +%if ABI_IS_32BIT + %if CONFIG_PIC=1 + %ifidn __OUTPUT_FORMAT__,elf32 + %define GET_GOT_DEFINED 1 + %define WRT_PLT wrt ..plt + %macro GET_GOT 1 + extern _GLOBAL_OFFSET_TABLE_ + push %1 + call %%get_got + %%sub_offset: + jmp %%exitGG + %%get_got: + mov %1, [esp] + add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc + ret + %%exitGG: + %undef GLOBAL + %define GLOBAL(x) x + %1 wrt ..gotoff + %undef RESTORE_GOT + %define RESTORE_GOT pop %1 + %endmacro + %elifidn __OUTPUT_FORMAT__,macho32 + %define GET_GOT_DEFINED 1 + %macro GET_GOT 1 + push %1 + call %%get_got + %%get_got: + pop %1 + %undef GLOBAL + %define GLOBAL(x) x + %1 - %%get_got + %undef RESTORE_GOT + %define RESTORE_GOT pop %1 + %endmacro + %else + %define GET_GOT_DEFINED 0 + %endif + %endif + + %if ARCH_X86_64 == 0 + %undef PIC + %endif + +%else + %macro GET_GOT 1 + %endmacro + %define GLOBAL(x) rel x + %define WRT_PLT wrt ..plt + + %if WIN64 + %define PIC + %elifidn __OUTPUT_FORMAT__,macho64 + %define PIC + %elif CONFIG_PIC + %define PIC + %endif +%endif + +%ifnmacro GET_GOT + %macro GET_GOT 1 + %endmacro + %define GLOBAL(x) x +%endif +%ifndef RESTORE_GOT + %define RESTORE_GOT +%endif +%ifndef WRT_PLT + %define WRT_PLT +%endif + +%ifdef PIC + default rel +%endif + +%ifndef GET_GOT_DEFINED + %define GET_GOT_DEFINED 0 +%endif +; Done with PIC macros + +%ifdef __NASM_VER__ + %use smartalign +%endif + +; Macros to eliminate most code duplication between x86_32 and x86_64: +; Currently this works only for leaf functions which load all their arguments +; into registers at the start, and make no other use of the stack. Luckily that +; covers most of x264's asm. + +; PROLOGUE: +; %1 = number of arguments. loads them from stack if needed. +; %2 = number of registers used. pushes callee-saved regs if needed. +; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed. +; %4 = (optional) stack size to be allocated. The stack will be aligned before +; allocating the specified stack size. If the required stack alignment is +; larger than the known stack alignment the stack will be manually aligned +; and an extra register will be allocated to hold the original stack +; pointer (to not invalidate r0m etc.). To prevent the use of an extra +; register as stack pointer, request a negative stack size. +; %4+/%5+ = list of names to define to registers +; PROLOGUE can also be invoked by adding the same options to cglobal + +; e.g. +; cglobal foo, 2,3,7,0x40, dst, src, tmp +; declares a function (foo) that automatically loads two arguments (dst and +; src) into registers, uses one additional register (tmp) plus 7 vector +; registers (m0-m6) and allocates 0x40 bytes of stack space. + +; TODO Some functions can use some args directly from the stack. If they're the +; last args then you can just not declare them, but if they're in the middle +; we need more flexible macro. + +; RET: +; Pops anything that was pushed by PROLOGUE, and returns. + +; REP_RET: +; Use this instead of RET if it's a branch target. + +; registers: +; rN and rNq are the native-size register holding function argument N +; rNd, rNw, rNb are dword, word, and byte size +; rNh is the high 8 bits of the word size +; rNm is the original location of arg N (a register or on the stack), dword +; rNmp is native size + +%macro DECLARE_REG 2-3 + %define r%1q %2 + %define r%1d %2d + %define r%1w %2w + %define r%1b %2b + %define r%1h %2h + %define %2q %2 + %if %0 == 2 + %define r%1m %2d + %define r%1mp %2 + %elif ARCH_X86_64 ; memory + %define r%1m [rstk + stack_offset + %3] + %define r%1mp qword r %+ %1 %+ m + %else + %define r%1m [rstk + stack_offset + %3] + %define r%1mp dword r %+ %1 %+ m + %endif + %define r%1 %2 +%endmacro + +%macro DECLARE_REG_SIZE 3 + %define r%1q r%1 + %define e%1q r%1 + %define r%1d e%1 + %define e%1d e%1 + %define r%1w %1 + %define e%1w %1 + %define r%1h %3 + %define e%1h %3 + %define r%1b %2 + %define e%1b %2 + %if ARCH_X86_64 == 0 + %define r%1 e%1 + %endif +%endmacro + +DECLARE_REG_SIZE ax, al, ah +DECLARE_REG_SIZE bx, bl, bh +DECLARE_REG_SIZE cx, cl, ch +DECLARE_REG_SIZE dx, dl, dh +DECLARE_REG_SIZE si, sil, null +DECLARE_REG_SIZE di, dil, null +DECLARE_REG_SIZE bp, bpl, null + +; t# defines for when per-arch register allocation is more complex than just function arguments + +%macro DECLARE_REG_TMP 1-* + %assign %%i 0 + %rep %0 + CAT_XDEFINE t, %%i, r%1 + %assign %%i %%i+1 + %rotate 1 + %endrep +%endmacro + +%macro DECLARE_REG_TMP_SIZE 0-* + %rep %0 + %define t%1q t%1 %+ q + %define t%1d t%1 %+ d + %define t%1w t%1 %+ w + %define t%1h t%1 %+ h + %define t%1b t%1 %+ b + %rotate 1 + %endrep +%endmacro + +DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 + +%if ARCH_X86_64 + %define gprsize 8 +%else + %define gprsize 4 +%endif + +%macro PUSH 1 + push %1 + %ifidn rstk, rsp + %assign stack_offset stack_offset+gprsize + %endif +%endmacro + +%macro POP 1 + pop %1 + %ifidn rstk, rsp + %assign stack_offset stack_offset-gprsize + %endif +%endmacro + +%macro PUSH_IF_USED 1-* + %rep %0 + %if %1 < regs_used + PUSH r%1 + %endif + %rotate 1 + %endrep +%endmacro + +%macro POP_IF_USED 1-* + %rep %0 + %if %1 < regs_used + pop r%1 + %endif + %rotate 1 + %endrep +%endmacro + +%macro LOAD_IF_USED 1-* + %rep %0 + %if %1 < num_args + mov r%1, r %+ %1 %+ mp + %endif + %rotate 1 + %endrep +%endmacro + +%macro SUB 2 + sub %1, %2 + %ifidn %1, rstk + %assign stack_offset stack_offset+(%2) + %endif +%endmacro + +%macro ADD 2 + add %1, %2 + %ifidn %1, rstk + %assign stack_offset stack_offset-(%2) + %endif +%endmacro + +%macro movifnidn 2 + %ifnidn %1, %2 + mov %1, %2 + %endif +%endmacro + +%macro movsxdifnidn 2 + %ifnidn %1, %2 + movsxd %1, %2 + %endif +%endmacro + +%macro ASSERT 1 + %if (%1) == 0 + %error assertion ``%1'' failed + %endif +%endmacro + +%macro DEFINE_ARGS 0-* + %ifdef n_arg_names + %assign %%i 0 + %rep n_arg_names + CAT_UNDEF arg_name %+ %%i, q + CAT_UNDEF arg_name %+ %%i, d + CAT_UNDEF arg_name %+ %%i, w + CAT_UNDEF arg_name %+ %%i, h + CAT_UNDEF arg_name %+ %%i, b + CAT_UNDEF arg_name %+ %%i, m + CAT_UNDEF arg_name %+ %%i, mp + CAT_UNDEF arg_name, %%i + %assign %%i %%i+1 + %endrep + %endif + + %xdefine %%stack_offset stack_offset + %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine + %assign %%i 0 + %rep %0 + %xdefine %1q r %+ %%i %+ q + %xdefine %1d r %+ %%i %+ d + %xdefine %1w r %+ %%i %+ w + %xdefine %1h r %+ %%i %+ h + %xdefine %1b r %+ %%i %+ b + %xdefine %1m r %+ %%i %+ m + %xdefine %1mp r %+ %%i %+ mp + CAT_XDEFINE arg_name, %%i, %1 + %assign %%i %%i+1 + %rotate 1 + %endrep + %xdefine stack_offset %%stack_offset + %assign n_arg_names %0 +%endmacro + +%define required_stack_alignment ((mmsize + 15) & ~15) + +%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only) + %ifnum %1 + %if %1 != 0 + %assign %%pad 0 + %assign stack_size %1 + %if stack_size < 0 + %assign stack_size -stack_size + %endif + %if WIN64 + %assign %%pad %%pad + 32 ; shadow space + %if mmsize != 8 + %assign xmm_regs_used %2 + %if xmm_regs_used > 8 + %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers + %endif + %endif + %endif + %if required_stack_alignment <= STACK_ALIGNMENT + ; maintain the current stack alignment + %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) + SUB rsp, stack_size_padded + %else + %assign %%reg_num (regs_used - 1) + %xdefine rstk r %+ %%reg_num + ; align stack, and save original stack location directly above + ; it, i.e. in [rsp+stack_size_padded], so we can restore the + ; stack in a single instruction (i.e. mov rsp, rstk or mov + ; rsp, [rsp+stack_size_padded]) + %if %1 < 0 ; need to store rsp on stack + %xdefine rstkm [rsp + stack_size + %%pad] + %assign %%pad %%pad + gprsize + %else ; can keep rsp in rstk during whole function + %xdefine rstkm rstk + %endif + %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1)) + mov rstk, rsp + and rsp, ~(required_stack_alignment-1) + sub rsp, stack_size_padded + movifnidn rstkm, rstk + %endif + WIN64_PUSH_XMM + %endif + %endif +%endmacro + +%macro SETUP_STACK_POINTER 1 + %ifnum %1 + %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT + %if %1 > 0 + %assign regs_used (regs_used + 1) + %endif + %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3 + ; Ensure that we don't clobber any registers containing arguments + %assign regs_used 5 + UNIX64 * 3 + %endif + %endif + %endif +%endmacro + +%macro DEFINE_ARGS_INTERNAL 3+ + %ifnum %2 + DEFINE_ARGS %3 + %elif %1 == 4 + DEFINE_ARGS %2 + %elif %1 > 4 + DEFINE_ARGS %2, %3 + %endif +%endmacro + +%if WIN64 ; Windows x64 ;================================================= + +DECLARE_REG 0, rcx +DECLARE_REG 1, rdx +DECLARE_REG 2, R8 +DECLARE_REG 3, R9 +DECLARE_REG 4, R10, 40 +DECLARE_REG 5, R11, 48 +DECLARE_REG 6, rax, 56 +DECLARE_REG 7, rdi, 64 +DECLARE_REG 8, rsi, 72 +DECLARE_REG 9, rbx, 80 +DECLARE_REG 10, rbp, 88 +DECLARE_REG 11, R12, 96 +DECLARE_REG 12, R13, 104 +DECLARE_REG 13, R14, 112 +DECLARE_REG 14, R15, 120 + +%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... + %assign num_args %1 + %assign regs_used %2 + ASSERT regs_used >= num_args + SETUP_STACK_POINTER %4 + ASSERT regs_used <= 15 + PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14 + ALLOC_STACK %4, %3 + %if mmsize != 8 && stack_size == 0 + WIN64_SPILL_XMM %3 + %endif + LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 + DEFINE_ARGS_INTERNAL %0, %4, %5 +%endmacro + +%macro WIN64_PUSH_XMM 0 + ; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated. + %if xmm_regs_used > 6 + movaps [rstk + stack_offset + 8], xmm6 + %endif + %if xmm_regs_used > 7 + movaps [rstk + stack_offset + 24], xmm7 + %endif + %if xmm_regs_used > 8 + %assign %%i 8 + %rep xmm_regs_used-8 + movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i + %assign %%i %%i+1 + %endrep + %endif +%endmacro + +%macro WIN64_SPILL_XMM 1 + %assign xmm_regs_used %1 + ASSERT xmm_regs_used <= 16 + %if xmm_regs_used > 8 + ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack. + %assign %%pad (xmm_regs_used-8)*16 + 32 + %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) + SUB rsp, stack_size_padded + %endif + WIN64_PUSH_XMM +%endmacro + +%macro WIN64_RESTORE_XMM_INTERNAL 1 + %assign %%pad_size 0 + %if xmm_regs_used > 8 + %assign %%i xmm_regs_used + %rep xmm_regs_used-8 + %assign %%i %%i-1 + movaps xmm %+ %%i, [%1 + (%%i-8)*16 + stack_size + 32] + %endrep + %endif + %if stack_size_padded > 0 + %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT + mov rsp, rstkm + %else + add %1, stack_size_padded + %assign %%pad_size stack_size_padded + %endif + %endif + %if xmm_regs_used > 7 + movaps xmm7, [%1 + stack_offset - %%pad_size + 24] + %endif + %if xmm_regs_used > 6 + movaps xmm6, [%1 + stack_offset - %%pad_size + 8] + %endif +%endmacro + +%macro WIN64_RESTORE_XMM 1 + WIN64_RESTORE_XMM_INTERNAL %1 + %assign stack_offset (stack_offset-stack_size_padded) + %assign xmm_regs_used 0 +%endmacro + +%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 || stack_size > 0 + +%macro RET 0 + WIN64_RESTORE_XMM_INTERNAL rsp + POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 + %if mmsize == 32 + vzeroupper + %endif + AUTO_REP_RET +%endmacro + +%elif ARCH_X86_64 ; *nix x64 ;============================================= + +DECLARE_REG 0, rdi +DECLARE_REG 1, rsi +DECLARE_REG 2, rdx +DECLARE_REG 3, rcx +DECLARE_REG 4, R8 +DECLARE_REG 5, R9 +DECLARE_REG 6, rax, 8 +DECLARE_REG 7, R10, 16 +DECLARE_REG 8, R11, 24 +DECLARE_REG 9, rbx, 32 +DECLARE_REG 10, rbp, 40 +DECLARE_REG 11, R12, 48 +DECLARE_REG 12, R13, 56 +DECLARE_REG 13, R14, 64 +DECLARE_REG 14, R15, 72 + +%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names... + %assign num_args %1 + %assign regs_used %2 + ASSERT regs_used >= num_args + SETUP_STACK_POINTER %4 + ASSERT regs_used <= 15 + PUSH_IF_USED 9, 10, 11, 12, 13, 14 + ALLOC_STACK %4 + LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14 + DEFINE_ARGS_INTERNAL %0, %4, %5 +%endmacro + +%define has_epilogue regs_used > 9 || mmsize == 32 || stack_size > 0 + +%macro RET 0 + %if stack_size_padded > 0 + %if required_stack_alignment > STACK_ALIGNMENT + mov rsp, rstkm + %else + add rsp, stack_size_padded + %endif + %endif + POP_IF_USED 14, 13, 12, 11, 10, 9 + %if mmsize == 32 + vzeroupper + %endif + AUTO_REP_RET +%endmacro + +%else ; X86_32 ;============================================================== + +DECLARE_REG 0, eax, 4 +DECLARE_REG 1, ecx, 8 +DECLARE_REG 2, edx, 12 +DECLARE_REG 3, ebx, 16 +DECLARE_REG 4, esi, 20 +DECLARE_REG 5, edi, 24 +DECLARE_REG 6, ebp, 28 +%define rsp esp + +%macro DECLARE_ARG 1-* + %rep %0 + %define r%1m [rstk + stack_offset + 4*%1 + 4] + %define r%1mp dword r%1m + %rotate 1 + %endrep +%endmacro + +DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 + +%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names... + %assign num_args %1 + %assign regs_used %2 + ASSERT regs_used >= num_args + %if num_args > 7 + %assign num_args 7 + %endif + %if regs_used > 7 + %assign regs_used 7 + %endif + SETUP_STACK_POINTER %4 + ASSERT regs_used <= 7 + PUSH_IF_USED 3, 4, 5, 6 + ALLOC_STACK %4 + LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6 + DEFINE_ARGS_INTERNAL %0, %4, %5 +%endmacro + +%define has_epilogue regs_used > 3 || mmsize == 32 || stack_size > 0 + +%macro RET 0 + %if stack_size_padded > 0 + %if required_stack_alignment > STACK_ALIGNMENT + mov rsp, rstkm + %else + add rsp, stack_size_padded + %endif + %endif + POP_IF_USED 6, 5, 4, 3 + %if mmsize == 32 + vzeroupper + %endif + AUTO_REP_RET +%endmacro + +%endif ;====================================================================== + +%if WIN64 == 0 + %macro WIN64_SPILL_XMM 1 + %endmacro + %macro WIN64_RESTORE_XMM 1 + %endmacro + %macro WIN64_PUSH_XMM 0 + %endmacro +%endif + +; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either +; a branch or a branch target. So switch to a 2-byte form of ret in that case. +; We can automatically detect "follows a branch", but not a branch target. +; (SSSE3 is a sufficient condition to know that your cpu doesn't have this problem.) +%macro REP_RET 0 + %if has_epilogue + RET + %else + rep ret + %endif + annotate_function_size +%endmacro + +%define last_branch_adr $$ +%macro AUTO_REP_RET 0 + %if notcpuflag(ssse3) + times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ == last_branch_adr. + %endif + ret + annotate_function_size +%endmacro + +%macro BRANCH_INSTR 0-* + %rep %0 + %macro %1 1-2 %1 + %2 %1 + %if notcpuflag(ssse3) + %%branch_instr equ $ + %xdefine last_branch_adr %%branch_instr + %endif + %endmacro + %rotate 1 + %endrep +%endmacro + +BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp + +%macro TAIL_CALL 2 ; callee, is_nonadjacent + %if has_epilogue + call %1 + RET + %elif %2 + jmp %1 + %endif + annotate_function_size +%endmacro + +;============================================================================= +; arch-independent part +;============================================================================= + +%assign function_align 16 + +; Begin a function. +; Applies any symbol mangling needed for C linkage, and sets up a define such that +; subsequent uses of the function name automatically refer to the mangled version. +; Appends cpuflags to the function name if cpuflags has been specified. +; The "" empty default parameter is a workaround for nasm, which fails if SUFFIX +; is empty and we call cglobal_internal with just %1 %+ SUFFIX (without %2). +%macro cglobal 1-2+ "" ; name, [PROLOGUE args] + cglobal_internal 1, %1 %+ SUFFIX, %2 +%endmacro +%macro cvisible 1-2+ "" ; name, [PROLOGUE args] + cglobal_internal 0, %1 %+ SUFFIX, %2 +%endmacro +%macro cglobal_internal 2-3+ + annotate_function_size + %if %1 + %xdefine %%FUNCTION_PREFIX private_prefix + ; libvpx explicitly sets visibility in shared object builds. Avoid + ; setting visibility to hidden as it may break builds that split + ; sources on e.g., directory boundaries. + %ifdef CHROMIUM + %xdefine %%VISIBILITY hidden + %else + %xdefine %%VISIBILITY + %endif + %else + %xdefine %%FUNCTION_PREFIX public_prefix + %xdefine %%VISIBILITY + %endif + %ifndef cglobaled_%2 + %xdefine %2 mangle(%%FUNCTION_PREFIX %+ _ %+ %2) + %xdefine %2.skip_prologue %2 %+ .skip_prologue + CAT_XDEFINE cglobaled_, %2, 1 + %endif + %xdefine current_function %2 + %xdefine current_function_section __SECT__ + %if FORMAT_ELF + global %2:function %%VISIBILITY + %elif FORMAT_MACHO + %ifdef __NASM_VER__ + global %2 + %else + global %2:private_extern + %endif + %else + global %2 + %endif + align function_align + %2: + RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer + %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required + %assign stack_offset 0 ; stack pointer offset relative to the return address + %assign stack_size 0 ; amount of stack space that can be freely used inside a function + %assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding + %assign xmm_regs_used 0 ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64 + %ifnidn %3, "" + PROLOGUE %3 + %endif +%endmacro + +%macro cextern 1 + %xdefine %1 mangle(private_prefix %+ _ %+ %1) + CAT_XDEFINE cglobaled_, %1, 1 + extern %1 +%endmacro + +; like cextern, but without the prefix +%macro cextern_naked 1 + %ifdef PREFIX + %xdefine %1 mangle(%1) + %endif + CAT_XDEFINE cglobaled_, %1, 1 + extern %1 +%endmacro + +%macro const 1-2+ + %xdefine %1 mangle(private_prefix %+ _ %+ %1) + %if FORMAT_ELF + global %1:data hidden + %else + global %1 + %endif + %1: %2 +%endmacro + +; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default. +%if FORMAT_ELF + [SECTION .note.GNU-stack noalloc noexec nowrite progbits] +%endif + +; Tell debuggers how large the function was. +; This may be invoked multiple times per function; we rely on later instances overriding earlier ones. +; This is invoked by RET and similar macros, and also cglobal does it for the previous function, +; but if the last function in a source file doesn't use any of the standard macros for its epilogue, +; then its size might be unspecified. +%macro annotate_function_size 0 + %ifdef __YASM_VER__ + %ifdef current_function + %if FORMAT_ELF + current_function_section + %%ecf equ $ + size current_function %%ecf - current_function + __SECT__ + %endif + %endif + %endif +%endmacro + +; cpuflags + +%assign cpuflags_mmx (1<<0) +%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx +%assign cpuflags_3dnow (1<<2) | cpuflags_mmx +%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow +%assign cpuflags_sse (1<<4) | cpuflags_mmx2 +%assign cpuflags_sse2 (1<<5) | cpuflags_sse +%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 +%assign cpuflags_sse3 (1<<7) | cpuflags_sse2 +%assign cpuflags_ssse3 (1<<8) | cpuflags_sse3 +%assign cpuflags_sse4 (1<<9) | cpuflags_ssse3 +%assign cpuflags_sse42 (1<<10)| cpuflags_sse4 +%assign cpuflags_avx (1<<11)| cpuflags_sse42 +%assign cpuflags_xop (1<<12)| cpuflags_avx +%assign cpuflags_fma4 (1<<13)| cpuflags_avx +%assign cpuflags_fma3 (1<<14)| cpuflags_avx +%assign cpuflags_avx2 (1<<15)| cpuflags_fma3 + +%assign cpuflags_cache32 (1<<16) +%assign cpuflags_cache64 (1<<17) +%assign cpuflags_slowctz (1<<18) +%assign cpuflags_lzcnt (1<<19) +%assign cpuflags_aligned (1<<20) ; not a cpu feature, but a function variant +%assign cpuflags_atom (1<<21) +%assign cpuflags_bmi1 (1<<22)|cpuflags_lzcnt +%assign cpuflags_bmi2 (1<<23)|cpuflags_bmi1 + +; Returns a boolean value expressing whether or not the specified cpuflag is enabled. +%define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1) +%define notcpuflag(x) (cpuflag(x) ^ 1) + +; Takes an arbitrary number of cpuflags from the above list. +; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu. +; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co. +%macro INIT_CPUFLAGS 0-* + %xdefine SUFFIX + %undef cpuname + %assign cpuflags 0 + + %if %0 >= 1 + %rep %0 + %ifdef cpuname + %xdefine cpuname cpuname %+ _%1 + %else + %xdefine cpuname %1 + %endif + %assign cpuflags cpuflags | cpuflags_%1 + %rotate 1 + %endrep + %xdefine SUFFIX _ %+ cpuname + + %if cpuflag(avx) + %assign avx_enabled 1 + %endif + %if (mmsize == 16 && notcpuflag(sse2)) || (mmsize == 32 && notcpuflag(avx2)) + %define mova movaps + %define movu movups + %define movnta movntps + %endif + %if cpuflag(aligned) + %define movu mova + %elif cpuflag(sse3) && notcpuflag(ssse3) + %define movu lddqu + %endif + %endif + + %if ARCH_X86_64 || cpuflag(sse2) + %ifdef __NASM_VER__ + ALIGNMODE k8 + %else + CPU amdnop + %endif + %else + %ifdef __NASM_VER__ + ALIGNMODE nop + %else + CPU basicnop + %endif + %endif +%endmacro + +; Merge mmx and sse* +; m# is a simd register of the currently selected size +; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m# +; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m# +; (All 3 remain in sync through SWAP.) + +%macro CAT_XDEFINE 3 + %xdefine %1%2 %3 +%endmacro + +%macro CAT_UNDEF 2 + %undef %1%2 +%endmacro + +%macro INIT_MMX 0-1+ + %assign avx_enabled 0 + %define RESET_MM_PERMUTATION INIT_MMX %1 + %define mmsize 8 + %define num_mmregs 8 + %define mova movq + %define movu movq + %define movh movd + %define movnta movntq + %assign %%i 0 + %rep 8 + CAT_XDEFINE m, %%i, mm %+ %%i + CAT_XDEFINE nnmm, %%i, %%i + %assign %%i %%i+1 + %endrep + %rep 8 + CAT_UNDEF m, %%i + CAT_UNDEF nnmm, %%i + %assign %%i %%i+1 + %endrep + INIT_CPUFLAGS %1 +%endmacro + +%macro INIT_XMM 0-1+ + %assign avx_enabled 0 + %define RESET_MM_PERMUTATION INIT_XMM %1 + %define mmsize 16 + %define num_mmregs 8 + %if ARCH_X86_64 + %define num_mmregs 16 + %endif + %define mova movdqa + %define movu movdqu + %define movh movq + %define movnta movntdq + %assign %%i 0 + %rep num_mmregs + CAT_XDEFINE m, %%i, xmm %+ %%i + CAT_XDEFINE nnxmm, %%i, %%i + %assign %%i %%i+1 + %endrep + INIT_CPUFLAGS %1 +%endmacro + +%macro INIT_YMM 0-1+ + %assign avx_enabled 1 + %define RESET_MM_PERMUTATION INIT_YMM %1 + %define mmsize 32 + %define num_mmregs 8 + %if ARCH_X86_64 + %define num_mmregs 16 + %endif + %define mova movdqa + %define movu movdqu + %undef movh + %define movnta movntdq + %assign %%i 0 + %rep num_mmregs + CAT_XDEFINE m, %%i, ymm %+ %%i + CAT_XDEFINE nnymm, %%i, %%i + %assign %%i %%i+1 + %endrep + INIT_CPUFLAGS %1 +%endmacro + +INIT_XMM + +%macro DECLARE_MMCAST 1 + %define mmmm%1 mm%1 + %define mmxmm%1 mm%1 + %define mmymm%1 mm%1 + %define xmmmm%1 mm%1 + %define xmmxmm%1 xmm%1 + %define xmmymm%1 xmm%1 + %define ymmmm%1 mm%1 + %define ymmxmm%1 xmm%1 + %define ymmymm%1 ymm%1 + %define xm%1 xmm %+ m%1 + %define ym%1 ymm %+ m%1 +%endmacro + +%assign i 0 +%rep 16 + DECLARE_MMCAST i + %assign i i+1 +%endrep + +; I often want to use macros that permute their arguments. e.g. there's no +; efficient way to implement butterfly or transpose or dct without swapping some +; arguments. +; +; I would like to not have to manually keep track of the permutations: +; If I insert a permutation in the middle of a function, it should automatically +; change everything that follows. For more complex macros I may also have multiple +; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations. +; +; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that +; permutes its arguments. It's equivalent to exchanging the contents of the +; registers, except that this way you exchange the register names instead, so it +; doesn't cost any cycles. + +%macro PERMUTE 2-* ; takes a list of pairs to swap + %rep %0/2 + %xdefine %%tmp%2 m%2 + %rotate 2 + %endrep + %rep %0/2 + %xdefine m%1 %%tmp%2 + CAT_XDEFINE nn, m%1, %1 + %rotate 2 + %endrep +%endmacro + +%macro SWAP 2+ ; swaps a single chain (sometimes more concise than pairs) + %ifnum %1 ; SWAP 0, 1, ... + SWAP_INTERNAL_NUM %1, %2 + %else ; SWAP m0, m1, ... + SWAP_INTERNAL_NAME %1, %2 + %endif +%endmacro + +%macro SWAP_INTERNAL_NUM 2-* + %rep %0-1 + %xdefine %%tmp m%1 + %xdefine m%1 m%2 + %xdefine m%2 %%tmp + CAT_XDEFINE nn, m%1, %1 + CAT_XDEFINE nn, m%2, %2 + %rotate 1 + %endrep +%endmacro + +%macro SWAP_INTERNAL_NAME 2-* + %xdefine %%args nn %+ %1 + %rep %0-1 + %xdefine %%args %%args, nn %+ %2 + %rotate 1 + %endrep + SWAP_INTERNAL_NUM %%args +%endmacro + +; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later +; calls to that function will automatically load the permutation, so values can +; be returned in mmregs. +%macro SAVE_MM_PERMUTATION 0-1 + %if %0 + %xdefine %%f %1_m + %else + %xdefine %%f current_function %+ _m + %endif + %assign %%i 0 + %rep num_mmregs + CAT_XDEFINE %%f, %%i, m %+ %%i + %assign %%i %%i+1 + %endrep +%endmacro + +%macro LOAD_MM_PERMUTATION 1 ; name to load from + %ifdef %1_m0 + %assign %%i 0 + %rep num_mmregs + CAT_XDEFINE m, %%i, %1_m %+ %%i + CAT_XDEFINE nn, m %+ %%i, %%i + %assign %%i %%i+1 + %endrep + %endif +%endmacro + +; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't +%macro call 1 + call_internal %1 %+ SUFFIX, %1 +%endmacro +%macro call_internal 2 + %xdefine %%i %2 + %ifndef cglobaled_%2 + %ifdef cglobaled_%1 + %xdefine %%i %1 + %endif + %endif + call %%i + LOAD_MM_PERMUTATION %%i +%endmacro + +; Substitutions that reduce instruction size but are functionally equivalent +%macro add 2 + %ifnum %2 + %if %2==128 + sub %1, -128 + %else + add %1, %2 + %endif + %else + add %1, %2 + %endif +%endmacro + +%macro sub 2 + %ifnum %2 + %if %2==128 + add %1, -128 + %else + sub %1, %2 + %endif + %else + sub %1, %2 + %endif +%endmacro + +;============================================================================= +; AVX abstraction layer +;============================================================================= + +%assign i 0 +%rep 16 + %if i < 8 + CAT_XDEFINE sizeofmm, i, 8 + %endif + CAT_XDEFINE sizeofxmm, i, 16 + CAT_XDEFINE sizeofymm, i, 32 + %assign i i+1 +%endrep +%undef i + +%macro CHECK_AVX_INSTR_EMU 3-* + %xdefine %%opcode %1 + %xdefine %%dst %2 + %rep %0-2 + %ifidn %%dst, %3 + %error non-avx emulation of ``%%opcode'' is not supported + %endif + %rotate 1 + %endrep +%endmacro + +;%1 == instruction +;%2 == minimal instruction set +;%3 == 1 if float, 0 if int +;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise +;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not +;%6+: operands +%macro RUN_AVX_INSTR 6-9+ + %ifnum sizeof%7 + %assign __sizeofreg sizeof%7 + %elifnum sizeof%6 + %assign __sizeofreg sizeof%6 + %else + %assign __sizeofreg mmsize + %endif + %assign __emulate_avx 0 + %if avx_enabled && __sizeofreg >= 16 + %xdefine __instr v%1 + %else + %xdefine __instr %1 + %if %0 >= 8+%4 + %assign __emulate_avx 1 + %endif + %endif + %ifnidn %2, fnord + %ifdef cpuname + %if notcpuflag(%2) + %error use of ``%1'' %2 instruction in cpuname function: current_function + %elif cpuflags_%2 < cpuflags_sse && notcpuflag(sse2) && __sizeofreg > 8 + %error use of ``%1'' sse2 instruction in cpuname function: current_function + %endif + %endif + %endif + + %if __emulate_avx + %xdefine __src1 %7 + %xdefine __src2 %8 + %ifnidn %6, %7 + %if %0 >= 9 + CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, %8, %9 + %else + CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, %8 + %endif + %if %5 && %4 == 0 + %ifnid %8 + ; 3-operand AVX instructions with a memory arg can only have it in src2, + ; whereas SSE emulation prefers to have it in src1 (i.e. the mov). + ; So, if the instruction is commutative with a memory arg, swap them. + %xdefine __src1 %8 + %xdefine __src2 %7 + %endif + %endif + %if __sizeofreg == 8 + MOVQ %6, __src1 + %elif %3 + MOVAPS %6, __src1 + %else + MOVDQA %6, __src1 + %endif + %endif + %if %0 >= 9 + %1 %6, __src2, %9 + %else + %1 %6, __src2 + %endif + %elif %0 >= 9 + __instr %6, %7, %8, %9 + %elif %0 == 8 + __instr %6, %7, %8 + %elif %0 == 7 + __instr %6, %7 + %else + __instr %6 + %endif +%endmacro + +;%1 == instruction +;%2 == minimal instruction set +;%3 == 1 if float, 0 if int +;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise +;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not +%macro AVX_INSTR 1-5 fnord, 0, 1, 0 + %macro %1 1-10 fnord, fnord, fnord, fnord, %1, %2, %3, %4, %5 + %ifidn %2, fnord + RUN_AVX_INSTR %6, %7, %8, %9, %10, %1 + %elifidn %3, fnord + RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2 + %elifidn %4, fnord + RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3 + %elifidn %5, fnord + RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4 + %else + RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4, %5 + %endif + %endmacro +%endmacro + +; Instructions with both VEX and non-VEX encodings +; Non-destructive instructions are written without parameters +AVX_INSTR addpd, sse2, 1, 0, 1 +AVX_INSTR addps, sse, 1, 0, 1 +AVX_INSTR addsd, sse2, 1, 0, 1 +AVX_INSTR addss, sse, 1, 0, 1 +AVX_INSTR addsubpd, sse3, 1, 0, 0 +AVX_INSTR addsubps, sse3, 1, 0, 0 +AVX_INSTR aesdec, fnord, 0, 0, 0 +AVX_INSTR aesdeclast, fnord, 0, 0, 0 +AVX_INSTR aesenc, fnord, 0, 0, 0 +AVX_INSTR aesenclast, fnord, 0, 0, 0 +AVX_INSTR aesimc +AVX_INSTR aeskeygenassist +AVX_INSTR andnpd, sse2, 1, 0, 0 +AVX_INSTR andnps, sse, 1, 0, 0 +AVX_INSTR andpd, sse2, 1, 0, 1 +AVX_INSTR andps, sse, 1, 0, 1 +AVX_INSTR blendpd, sse4, 1, 0, 0 +AVX_INSTR blendps, sse4, 1, 0, 0 +AVX_INSTR blendvpd, sse4, 1, 0, 0 +AVX_INSTR blendvps, sse4, 1, 0, 0 +AVX_INSTR cmppd, sse2, 1, 1, 0 +AVX_INSTR cmpps, sse, 1, 1, 0 +AVX_INSTR cmpsd, sse2, 1, 1, 0 +AVX_INSTR cmpss, sse, 1, 1, 0 +AVX_INSTR comisd, sse2 +AVX_INSTR comiss, sse +AVX_INSTR cvtdq2pd, sse2 +AVX_INSTR cvtdq2ps, sse2 +AVX_INSTR cvtpd2dq, sse2 +AVX_INSTR cvtpd2ps, sse2 +AVX_INSTR cvtps2dq, sse2 +AVX_INSTR cvtps2pd, sse2 +AVX_INSTR cvtsd2si, sse2 +AVX_INSTR cvtsd2ss, sse2 +AVX_INSTR cvtsi2sd, sse2 +AVX_INSTR cvtsi2ss, sse +AVX_INSTR cvtss2sd, sse2 +AVX_INSTR cvtss2si, sse +AVX_INSTR cvttpd2dq, sse2 +AVX_INSTR cvttps2dq, sse2 +AVX_INSTR cvttsd2si, sse2 +AVX_INSTR cvttss2si, sse +AVX_INSTR divpd, sse2, 1, 0, 0 +AVX_INSTR divps, sse, 1, 0, 0 +AVX_INSTR divsd, sse2, 1, 0, 0 +AVX_INSTR divss, sse, 1, 0, 0 +AVX_INSTR dppd, sse4, 1, 1, 0 +AVX_INSTR dpps, sse4, 1, 1, 0 +AVX_INSTR extractps, sse4 +AVX_INSTR haddpd, sse3, 1, 0, 0 +AVX_INSTR haddps, sse3, 1, 0, 0 +AVX_INSTR hsubpd, sse3, 1, 0, 0 +AVX_INSTR hsubps, sse3, 1, 0, 0 +AVX_INSTR insertps, sse4, 1, 1, 0 +AVX_INSTR lddqu, sse3 +AVX_INSTR ldmxcsr, sse +AVX_INSTR maskmovdqu, sse2 +AVX_INSTR maxpd, sse2, 1, 0, 1 +AVX_INSTR maxps, sse, 1, 0, 1 +AVX_INSTR maxsd, sse2, 1, 0, 1 +AVX_INSTR maxss, sse, 1, 0, 1 +AVX_INSTR minpd, sse2, 1, 0, 1 +AVX_INSTR minps, sse, 1, 0, 1 +AVX_INSTR minsd, sse2, 1, 0, 1 +AVX_INSTR minss, sse, 1, 0, 1 +AVX_INSTR movapd, sse2 +AVX_INSTR movaps, sse +AVX_INSTR movd, mmx +AVX_INSTR movddup, sse3 +AVX_INSTR movdqa, sse2 +AVX_INSTR movdqu, sse2 +AVX_INSTR movhlps, sse, 1, 0, 0 +AVX_INSTR movhpd, sse2, 1, 0, 0 +AVX_INSTR movhps, sse, 1, 0, 0 +AVX_INSTR movlhps, sse, 1, 0, 0 +AVX_INSTR movlpd, sse2, 1, 0, 0 +AVX_INSTR movlps, sse, 1, 0, 0 +AVX_INSTR movmskpd, sse2 +AVX_INSTR movmskps, sse +AVX_INSTR movntdq, sse2 +AVX_INSTR movntdqa, sse4 +AVX_INSTR movntpd, sse2 +AVX_INSTR movntps, sse +AVX_INSTR movq, mmx +AVX_INSTR movsd, sse2, 1, 0, 0 +AVX_INSTR movshdup, sse3 +AVX_INSTR movsldup, sse3 +AVX_INSTR movss, sse, 1, 0, 0 +AVX_INSTR movupd, sse2 +AVX_INSTR movups, sse +AVX_INSTR mpsadbw, sse4 +AVX_INSTR mulpd, sse2, 1, 0, 1 +AVX_INSTR mulps, sse, 1, 0, 1 +AVX_INSTR mulsd, sse2, 1, 0, 1 +AVX_INSTR mulss, sse, 1, 0, 1 +AVX_INSTR orpd, sse2, 1, 0, 1 +AVX_INSTR orps, sse, 1, 0, 1 +AVX_INSTR pabsb, ssse3 +AVX_INSTR pabsd, ssse3 +AVX_INSTR pabsw, ssse3 +AVX_INSTR packsswb, mmx, 0, 0, 0 +AVX_INSTR packssdw, mmx, 0, 0, 0 +AVX_INSTR packuswb, mmx, 0, 0, 0 +AVX_INSTR packusdw, sse4, 0, 0, 0 +AVX_INSTR paddb, mmx, 0, 0, 1 +AVX_INSTR paddw, mmx, 0, 0, 1 +AVX_INSTR paddd, mmx, 0, 0, 1 +AVX_INSTR paddq, sse2, 0, 0, 1 +AVX_INSTR paddsb, mmx, 0, 0, 1 +AVX_INSTR paddsw, mmx, 0, 0, 1 +AVX_INSTR paddusb, mmx, 0, 0, 1 +AVX_INSTR paddusw, mmx, 0, 0, 1 +AVX_INSTR palignr, ssse3 +AVX_INSTR pand, mmx, 0, 0, 1 +AVX_INSTR pandn, mmx, 0, 0, 0 +AVX_INSTR pavgb, mmx2, 0, 0, 1 +AVX_INSTR pavgw, mmx2, 0, 0, 1 +AVX_INSTR pblendvb, sse4, 0, 0, 0 +AVX_INSTR pblendw, sse4 +AVX_INSTR pclmulqdq +AVX_INSTR pcmpestri, sse42 +AVX_INSTR pcmpestrm, sse42 +AVX_INSTR pcmpistri, sse42 +AVX_INSTR pcmpistrm, sse42 +AVX_INSTR pcmpeqb, mmx, 0, 0, 1 +AVX_INSTR pcmpeqw, mmx, 0, 0, 1 +AVX_INSTR pcmpeqd, mmx, 0, 0, 1 +AVX_INSTR pcmpeqq, sse4, 0, 0, 1 +AVX_INSTR pcmpgtb, mmx, 0, 0, 0 +AVX_INSTR pcmpgtw, mmx, 0, 0, 0 +AVX_INSTR pcmpgtd, mmx, 0, 0, 0 +AVX_INSTR pcmpgtq, sse42, 0, 0, 0 +AVX_INSTR pextrb, sse4 +AVX_INSTR pextrd, sse4 +AVX_INSTR pextrq, sse4 +AVX_INSTR pextrw, mmx2 +AVX_INSTR phaddw, ssse3, 0, 0, 0 +AVX_INSTR phaddd, ssse3, 0, 0, 0 +AVX_INSTR phaddsw, ssse3, 0, 0, 0 +AVX_INSTR phminposuw, sse4 +AVX_INSTR phsubw, ssse3, 0, 0, 0 +AVX_INSTR phsubd, ssse3, 0, 0, 0 +AVX_INSTR phsubsw, ssse3, 0, 0, 0 +AVX_INSTR pinsrb, sse4 +AVX_INSTR pinsrd, sse4 +AVX_INSTR pinsrq, sse4 +AVX_INSTR pinsrw, mmx2 +AVX_INSTR pmaddwd, mmx, 0, 0, 1 +AVX_INSTR pmaddubsw, ssse3, 0, 0, 0 +AVX_INSTR pmaxsb, sse4, 0, 0, 1 +AVX_INSTR pmaxsw, mmx2, 0, 0, 1 +AVX_INSTR pmaxsd, sse4, 0, 0, 1 +AVX_INSTR pmaxub, mmx2, 0, 0, 1 +AVX_INSTR pmaxuw, sse4, 0, 0, 1 +AVX_INSTR pmaxud, sse4, 0, 0, 1 +AVX_INSTR pminsb, sse4, 0, 0, 1 +AVX_INSTR pminsw, mmx2, 0, 0, 1 +AVX_INSTR pminsd, sse4, 0, 0, 1 +AVX_INSTR pminub, mmx2, 0, 0, 1 +AVX_INSTR pminuw, sse4, 0, 0, 1 +AVX_INSTR pminud, sse4, 0, 0, 1 +AVX_INSTR pmovmskb, mmx2 +AVX_INSTR pmovsxbw, sse4 +AVX_INSTR pmovsxbd, sse4 +AVX_INSTR pmovsxbq, sse4 +AVX_INSTR pmovsxwd, sse4 +AVX_INSTR pmovsxwq, sse4 +AVX_INSTR pmovsxdq, sse4 +AVX_INSTR pmovzxbw, sse4 +AVX_INSTR pmovzxbd, sse4 +AVX_INSTR pmovzxbq, sse4 +AVX_INSTR pmovzxwd, sse4 +AVX_INSTR pmovzxwq, sse4 +AVX_INSTR pmovzxdq, sse4 +AVX_INSTR pmuldq, sse4, 0, 0, 1 +AVX_INSTR pmulhrsw, ssse3, 0, 0, 1 +AVX_INSTR pmulhuw, mmx2, 0, 0, 1 +AVX_INSTR pmulhw, mmx, 0, 0, 1 +AVX_INSTR pmullw, mmx, 0, 0, 1 +AVX_INSTR pmulld, sse4, 0, 0, 1 +AVX_INSTR pmuludq, sse2, 0, 0, 1 +AVX_INSTR por, mmx, 0, 0, 1 +AVX_INSTR psadbw, mmx2, 0, 0, 1 +AVX_INSTR pshufb, ssse3, 0, 0, 0 +AVX_INSTR pshufd, sse2 +AVX_INSTR pshufhw, sse2 +AVX_INSTR pshuflw, sse2 +AVX_INSTR psignb, ssse3, 0, 0, 0 +AVX_INSTR psignw, ssse3, 0, 0, 0 +AVX_INSTR psignd, ssse3, 0, 0, 0 +AVX_INSTR psllw, mmx, 0, 0, 0 +AVX_INSTR pslld, mmx, 0, 0, 0 +AVX_INSTR psllq, mmx, 0, 0, 0 +AVX_INSTR pslldq, sse2, 0, 0, 0 +AVX_INSTR psraw, mmx, 0, 0, 0 +AVX_INSTR psrad, mmx, 0, 0, 0 +AVX_INSTR psrlw, mmx, 0, 0, 0 +AVX_INSTR psrld, mmx, 0, 0, 0 +AVX_INSTR psrlq, mmx, 0, 0, 0 +AVX_INSTR psrldq, sse2, 0, 0, 0 +AVX_INSTR psubb, mmx, 0, 0, 0 +AVX_INSTR psubw, mmx, 0, 0, 0 +AVX_INSTR psubd, mmx, 0, 0, 0 +AVX_INSTR psubq, sse2, 0, 0, 0 +AVX_INSTR psubsb, mmx, 0, 0, 0 +AVX_INSTR psubsw, mmx, 0, 0, 0 +AVX_INSTR psubusb, mmx, 0, 0, 0 +AVX_INSTR psubusw, mmx, 0, 0, 0 +AVX_INSTR ptest, sse4 +AVX_INSTR punpckhbw, mmx, 0, 0, 0 +AVX_INSTR punpckhwd, mmx, 0, 0, 0 +AVX_INSTR punpckhdq, mmx, 0, 0, 0 +AVX_INSTR punpckhqdq, sse2, 0, 0, 0 +AVX_INSTR punpcklbw, mmx, 0, 0, 0 +AVX_INSTR punpcklwd, mmx, 0, 0, 0 +AVX_INSTR punpckldq, mmx, 0, 0, 0 +AVX_INSTR punpcklqdq, sse2, 0, 0, 0 +AVX_INSTR pxor, mmx, 0, 0, 1 +AVX_INSTR rcpps, sse, 1, 0, 0 +AVX_INSTR rcpss, sse, 1, 0, 0 +AVX_INSTR roundpd, sse4 +AVX_INSTR roundps, sse4 +AVX_INSTR roundsd, sse4 +AVX_INSTR roundss, sse4 +AVX_INSTR rsqrtps, sse, 1, 0, 0 +AVX_INSTR rsqrtss, sse, 1, 0, 0 +AVX_INSTR shufpd, sse2, 1, 1, 0 +AVX_INSTR shufps, sse, 1, 1, 0 +AVX_INSTR sqrtpd, sse2, 1, 0, 0 +AVX_INSTR sqrtps, sse, 1, 0, 0 +AVX_INSTR sqrtsd, sse2, 1, 0, 0 +AVX_INSTR sqrtss, sse, 1, 0, 0 +AVX_INSTR stmxcsr, sse +AVX_INSTR subpd, sse2, 1, 0, 0 +AVX_INSTR subps, sse, 1, 0, 0 +AVX_INSTR subsd, sse2, 1, 0, 0 +AVX_INSTR subss, sse, 1, 0, 0 +AVX_INSTR ucomisd, sse2 +AVX_INSTR ucomiss, sse +AVX_INSTR unpckhpd, sse2, 1, 0, 0 +AVX_INSTR unpckhps, sse, 1, 0, 0 +AVX_INSTR unpcklpd, sse2, 1, 0, 0 +AVX_INSTR unpcklps, sse, 1, 0, 0 +AVX_INSTR xorpd, sse2, 1, 0, 1 +AVX_INSTR xorps, sse, 1, 0, 1 + +; 3DNow instructions, for sharing code between AVX, SSE and 3DN +AVX_INSTR pfadd, 3dnow, 1, 0, 1 +AVX_INSTR pfsub, 3dnow, 1, 0, 0 +AVX_INSTR pfmul, 3dnow, 1, 0, 1 + +; base-4 constants for shuffles +%assign i 0 +%rep 256 + %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3) + %if j < 10 + CAT_XDEFINE q000, j, i + %elif j < 100 + CAT_XDEFINE q00, j, i + %elif j < 1000 + CAT_XDEFINE q0, j, i + %else + CAT_XDEFINE q, j, i + %endif + %assign i i+1 +%endrep +%undef i +%undef j + +%macro FMA_INSTR 3 + %macro %1 4-7 %1, %2, %3 + %if cpuflag(xop) + v%5 %1, %2, %3, %4 + %elifnidn %1, %4 + %6 %1, %2, %3 + %7 %1, %4 + %else + %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported + %endif + %endmacro +%endmacro + +FMA_INSTR pmacsww, pmullw, paddw +FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation +FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation +FMA_INSTR pmadcswd, pmaddwd, paddd + +; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax. +; FMA3 is only possible if dst is the same as one of the src registers. +; Either src2 or src3 can be a memory operand. +%macro FMA4_INSTR 2-* + %push fma4_instr + %xdefine %$prefix %1 + %rep %0 - 1 + %macro %$prefix%2 4-6 %$prefix, %2 + %if notcpuflag(fma3) && notcpuflag(fma4) + %error use of ``%5%6'' fma instruction in cpuname function: current_function + %elif cpuflag(fma4) + v%5%6 %1, %2, %3, %4 + %elifidn %1, %2 + ; If %3 or %4 is a memory operand it needs to be encoded as the last operand. + %ifid %3 + v%{5}213%6 %2, %3, %4 + %else + v%{5}132%6 %2, %4, %3 + %endif + %elifidn %1, %3 + v%{5}213%6 %3, %2, %4 + %elifidn %1, %4 + v%{5}231%6 %4, %2, %3 + %else + %error fma3 emulation of ``%5%6 %1, %2, %3, %4'' is not supported + %endif + %endmacro + %rotate 1 + %endrep + %pop +%endmacro + +FMA4_INSTR fmadd, pd, ps, sd, ss +FMA4_INSTR fmaddsub, pd, ps +FMA4_INSTR fmsub, pd, ps, sd, ss +FMA4_INSTR fmsubadd, pd, ps +FMA4_INSTR fnmadd, pd, ps, sd, ss +FMA4_INSTR fnmsub, pd, ps, sd, ss + +; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0) +%ifdef __YASM_VER__ + %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0 + %macro vpbroadcastq 2 + %if sizeof%1 == 16 + movddup %1, %2 + %else + vbroadcastsd %1, %2 + %endif + %endmacro + %endif +%endif diff --git a/thirdparty/libvpx/vp8/common/alloccommon.c b/thirdparty/libvpx/vp8/common/alloccommon.c new file mode 100644 index 00000000000..8dfd4ce203e --- /dev/null +++ b/thirdparty/libvpx/vp8/common/alloccommon.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "alloccommon.h" +#include "blockd.h" +#include "vpx_mem/vpx_mem.h" +#include "onyxc_int.h" +#include "findnearmv.h" +#include "entropymode.h" +#include "systemdependent.h" + +void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) +{ + int i; + for (i = 0; i < NUM_YV12_BUFFERS; i++) + vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]); + + vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame); +#if CONFIG_POSTPROC + vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); + if (oci->post_proc_buffer_int_used) + vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int); + + vpx_free(oci->pp_limits_buffer); + oci->pp_limits_buffer = NULL; +#endif + + vpx_free(oci->above_context); + vpx_free(oci->mip); +#if CONFIG_ERROR_CONCEALMENT + vpx_free(oci->prev_mip); + oci->prev_mip = NULL; +#endif + + oci->above_context = NULL; + oci->mip = NULL; +} + +int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) +{ + int i; + + vp8_de_alloc_frame_buffers(oci); + + /* our internal buffers are always multiples of 16 */ + if ((width & 0xf) != 0) + width += 16 - (width & 0xf); + + if ((height & 0xf) != 0) + height += 16 - (height & 0xf); + + + for (i = 0; i < NUM_YV12_BUFFERS; i++) + { + oci->fb_idx_ref_cnt[i] = 0; + oci->yv12_fb[i].flags = 0; + if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0) + goto allocation_fail; + } + + oci->new_fb_idx = 0; + oci->lst_fb_idx = 1; + oci->gld_fb_idx = 2; + oci->alt_fb_idx = 3; + + oci->fb_idx_ref_cnt[0] = 1; + oci->fb_idx_ref_cnt[1] = 1; + oci->fb_idx_ref_cnt[2] = 1; + oci->fb_idx_ref_cnt[3] = 1; + + if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0) + goto allocation_fail; + + oci->mb_rows = height >> 4; + oci->mb_cols = width >> 4; + oci->MBs = oci->mb_rows * oci->mb_cols; + oci->mode_info_stride = oci->mb_cols + 1; + oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); + + if (!oci->mip) + goto allocation_fail; + + oci->mi = oci->mip + oci->mode_info_stride + 1; + + /* Allocation of previous mode info will be done in vp8_decode_frame() + * as it is a decoder only data */ + + oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1); + + if (!oci->above_context) + goto allocation_fail; + +#if CONFIG_POSTPROC + if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) + goto allocation_fail; + + oci->post_proc_buffer_int_used = 0; + memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); + memset(oci->post_proc_buffer.buffer_alloc, 128, + oci->post_proc_buffer.frame_size); + + /* Allocate buffer to store post-processing filter coefficients. + * + * Note: Round up mb_cols to support SIMD reads + */ + oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1)); + if (!oci->pp_limits_buffer) + goto allocation_fail; +#endif + + return 0; + +allocation_fail: + vp8_de_alloc_frame_buffers(oci); + return 1; +} + +void vp8_setup_version(VP8_COMMON *cm) +{ + switch (cm->version) + { + case 0: + cm->no_lpf = 0; + cm->filter_type = NORMAL_LOOPFILTER; + cm->use_bilinear_mc_filter = 0; + cm->full_pixel = 0; + break; + case 1: + cm->no_lpf = 0; + cm->filter_type = SIMPLE_LOOPFILTER; + cm->use_bilinear_mc_filter = 1; + cm->full_pixel = 0; + break; + case 2: + cm->no_lpf = 1; + cm->filter_type = NORMAL_LOOPFILTER; + cm->use_bilinear_mc_filter = 1; + cm->full_pixel = 0; + break; + case 3: + cm->no_lpf = 1; + cm->filter_type = SIMPLE_LOOPFILTER; + cm->use_bilinear_mc_filter = 1; + cm->full_pixel = 1; + break; + default: + /*4,5,6,7 are reserved for future use*/ + cm->no_lpf = 0; + cm->filter_type = NORMAL_LOOPFILTER; + cm->use_bilinear_mc_filter = 0; + cm->full_pixel = 0; + break; + } +} +void vp8_create_common(VP8_COMMON *oci) +{ + vp8_machine_specific_config(oci); + + vp8_init_mbmode_probs(oci); + vp8_default_bmode_probs(oci->fc.bmode_prob); + + oci->mb_no_coeff_skip = 1; + oci->no_lpf = 0; + oci->filter_type = NORMAL_LOOPFILTER; + oci->use_bilinear_mc_filter = 0; + oci->full_pixel = 0; + oci->multi_token_partition = ONE_PARTITION; + oci->clamp_type = RECON_CLAMP_REQUIRED; + + /* Initialize reference frame sign bias structure to defaults */ + memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias)); + + /* Default disable buffer to buffer copying */ + oci->copy_buffer_to_gf = 0; + oci->copy_buffer_to_arf = 0; +} + +void vp8_remove_common(VP8_COMMON *oci) +{ + vp8_de_alloc_frame_buffers(oci); +} diff --git a/thirdparty/libvpx/vp8/common/alloccommon.h b/thirdparty/libvpx/vp8/common/alloccommon.h new file mode 100644 index 00000000000..93e99d76b1d --- /dev/null +++ b/thirdparty/libvpx/vp8/common/alloccommon.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_ALLOCCOMMON_H_ +#define VP8_COMMON_ALLOCCOMMON_H_ + +#include "onyxc_int.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_create_common(VP8_COMMON *oci); +void vp8_remove_common(VP8_COMMON *oci); +void vp8_de_alloc_frame_buffers(VP8_COMMON *oci); +int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height); +void vp8_setup_version(VP8_COMMON *oci); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_ALLOCCOMMON_H_ diff --git a/thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c b/thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c new file mode 100644 index 00000000000..5840c2bbaaa --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#include "vp8/common/loopfilter.h" +#include "vp8/common/onyxc_int.h" + +#define prototype_loopfilter(sym) \ + void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ + const unsigned char *limit, const unsigned char *thresh, int count) + +#if HAVE_MEDIA +extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6); +extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6); +extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6); +extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6); +#endif + +#if HAVE_NEON +typedef void loopfilter_y_neon(unsigned char *src, int pitch, + unsigned char blimit, unsigned char limit, unsigned char thresh); +typedef void loopfilter_uv_neon(unsigned char *u, int pitch, + unsigned char blimit, unsigned char limit, unsigned char thresh, + unsigned char *v); + +extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon; +extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon; +extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon; +extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon; + +extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon; +extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon; +extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon; +extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon; +#endif + +#if HAVE_MEDIA +/* ARMV6/MEDIA loopfilter functions*/ +/* Horizontal MB filtering */ +void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); +} + +/* Vertical MB Filtering */ +void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); +} + +/* Horizontal B Filtering */ +void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); +} + +void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride, + const unsigned char *blimit) +{ + vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit); +} + +/* Vertical B Filtering */ +void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); +} + +void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride, + const unsigned char *blimit) +{ + vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit); +} +#endif + +#if HAVE_NEON +/* NEON loopfilter functions */ +/* Horizontal MB filtering */ +void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + unsigned char mblim = *lfi->mblim; + unsigned char lim = *lfi->lim; + unsigned char hev_thr = *lfi->hev_thr; + vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr); + + if (u_ptr) + vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr); +} + +/* Vertical MB Filtering */ +void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + unsigned char mblim = *lfi->mblim; + unsigned char lim = *lfi->lim; + unsigned char hev_thr = *lfi->hev_thr; + + vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr); + + if (u_ptr) + vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr); +} + +/* Horizontal B Filtering */ +void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + unsigned char blim = *lfi->blim; + unsigned char lim = *lfi->lim; + unsigned char hev_thr = *lfi->hev_thr; + + vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr); + vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr); + vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr); + + if (u_ptr) + vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride); +} + +/* Vertical B Filtering */ +void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + unsigned char blim = *lfi->blim; + unsigned char lim = *lfi->lim; + unsigned char hev_thr = *lfi->hev_thr; + + vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr); + vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr); + vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr); + + if (u_ptr) + vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4); +} +#endif diff --git a/thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c new file mode 100644 index 00000000000..bb6ea76ba47 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c @@ -0,0 +1,591 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +static const uint8_t bifilter4_coeff[8][2] = { + {128, 0}, + {112, 16}, + { 96, 32}, + { 80, 48}, + { 64, 64}, + { 48, 80}, + { 32, 96}, + { 16, 112} +}; + +void vp8_bilinear_predict8x4_neon( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch) { + uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8; + uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8; + uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8; + uint16x8_t q1u16, q2u16, q3u16, q4u16; + uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16; + + if (xoffset == 0) { // skip_1stpass_filter + d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d26u8 = vld1_u8(src_ptr); + } else { + q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q5u8 = vld1q_u8(src_ptr); + + d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); + d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); + + q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); + q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); + q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); + q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); + q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); + + d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); + d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); + d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); + d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); + d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); + + q6u16 = vmlal_u8(q6u16, d3u8, d1u8); + q7u16 = vmlal_u8(q7u16, d5u8, d1u8); + q8u16 = vmlal_u8(q8u16, d7u8, d1u8); + q9u16 = vmlal_u8(q9u16, d9u8, d1u8); + q10u16 = vmlal_u8(q10u16, d11u8, d1u8); + + d22u8 = vqrshrn_n_u16(q6u16, 7); + d23u8 = vqrshrn_n_u16(q7u16, 7); + d24u8 = vqrshrn_n_u16(q8u16, 7); + d25u8 = vqrshrn_n_u16(q9u16, 7); + d26u8 = vqrshrn_n_u16(q10u16, 7); + } + + // secondpass_filter + if (yoffset == 0) { // skip_2ndpass_filter + vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d25u8); + } else { + d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); + d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); + + q1u16 = vmull_u8(d22u8, d0u8); + q2u16 = vmull_u8(d23u8, d0u8); + q3u16 = vmull_u8(d24u8, d0u8); + q4u16 = vmull_u8(d25u8, d0u8); + + q1u16 = vmlal_u8(q1u16, d23u8, d1u8); + q2u16 = vmlal_u8(q2u16, d24u8, d1u8); + q3u16 = vmlal_u8(q3u16, d25u8, d1u8); + q4u16 = vmlal_u8(q4u16, d26u8, d1u8); + + d2u8 = vqrshrn_n_u16(q1u16, 7); + d3u8 = vqrshrn_n_u16(q2u16, 7); + d4u8 = vqrshrn_n_u16(q3u16, 7); + d5u8 = vqrshrn_n_u16(q4u16, 7); + + vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d5u8); + } + return; +} + +void vp8_bilinear_predict8x8_neon( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch) { + uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8; + uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8; + uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8; + uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16; + uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16; + + if (xoffset == 0) { // skip_1stpass_filter + d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; + d30u8 = vld1_u8(src_ptr); + } else { + q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + + d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); + d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); + + q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); + q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); + q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); + q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); + + d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); + d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); + d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); + d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); + + q6u16 = vmlal_u8(q6u16, d3u8, d1u8); + q7u16 = vmlal_u8(q7u16, d5u8, d1u8); + q8u16 = vmlal_u8(q8u16, d7u8, d1u8); + q9u16 = vmlal_u8(q9u16, d9u8, d1u8); + + d22u8 = vqrshrn_n_u16(q6u16, 7); + d23u8 = vqrshrn_n_u16(q7u16, 7); + d24u8 = vqrshrn_n_u16(q8u16, 7); + d25u8 = vqrshrn_n_u16(q9u16, 7); + + // first_pass filtering on the rest 5-line data + q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q5u8 = vld1q_u8(src_ptr); + + q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); + q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); + q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); + q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); + q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); + + d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); + d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); + d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); + d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); + d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); + + q6u16 = vmlal_u8(q6u16, d3u8, d1u8); + q7u16 = vmlal_u8(q7u16, d5u8, d1u8); + q8u16 = vmlal_u8(q8u16, d7u8, d1u8); + q9u16 = vmlal_u8(q9u16, d9u8, d1u8); + q10u16 = vmlal_u8(q10u16, d11u8, d1u8); + + d26u8 = vqrshrn_n_u16(q6u16, 7); + d27u8 = vqrshrn_n_u16(q7u16, 7); + d28u8 = vqrshrn_n_u16(q8u16, 7); + d29u8 = vqrshrn_n_u16(q9u16, 7); + d30u8 = vqrshrn_n_u16(q10u16, 7); + } + + // secondpass_filter + if (yoffset == 0) { // skip_2ndpass_filter + vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d29u8); + } else { + d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); + d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); + + q1u16 = vmull_u8(d22u8, d0u8); + q2u16 = vmull_u8(d23u8, d0u8); + q3u16 = vmull_u8(d24u8, d0u8); + q4u16 = vmull_u8(d25u8, d0u8); + q5u16 = vmull_u8(d26u8, d0u8); + q6u16 = vmull_u8(d27u8, d0u8); + q7u16 = vmull_u8(d28u8, d0u8); + q8u16 = vmull_u8(d29u8, d0u8); + + q1u16 = vmlal_u8(q1u16, d23u8, d1u8); + q2u16 = vmlal_u8(q2u16, d24u8, d1u8); + q3u16 = vmlal_u8(q3u16, d25u8, d1u8); + q4u16 = vmlal_u8(q4u16, d26u8, d1u8); + q5u16 = vmlal_u8(q5u16, d27u8, d1u8); + q6u16 = vmlal_u8(q6u16, d28u8, d1u8); + q7u16 = vmlal_u8(q7u16, d29u8, d1u8); + q8u16 = vmlal_u8(q8u16, d30u8, d1u8); + + d2u8 = vqrshrn_n_u16(q1u16, 7); + d3u8 = vqrshrn_n_u16(q2u16, 7); + d4u8 = vqrshrn_n_u16(q3u16, 7); + d5u8 = vqrshrn_n_u16(q4u16, 7); + d6u8 = vqrshrn_n_u16(q5u16, 7); + d7u8 = vqrshrn_n_u16(q6u16, 7); + d8u8 = vqrshrn_n_u16(q7u16, 7); + d9u8 = vqrshrn_n_u16(q8u16, 7); + + vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch; + vst1_u8((uint8_t *)dst_ptr, d9u8); + } + return; +} + +void vp8_bilinear_predict16x16_neon( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch) { + int i; + unsigned char tmp[272]; + unsigned char *tmpp; + uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; + uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8; + uint8x8_t d19u8, d20u8, d21u8; + uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8; + uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8; + uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16; + uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16; + + if (xoffset == 0) { // secondpass_bfilter16x16_only + d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); + d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); + + q11u8 = vld1q_u8(src_ptr); + src_ptr += src_pixels_per_line; + for (i = 4; i > 0; i--) { + q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; + + q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); + q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); + q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); + q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); + q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); + q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); + q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); + q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); + + q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); + q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); + q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); + q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); + q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); + q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); + q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); + q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); + + d2u8 = vqrshrn_n_u16(q1u16, 7); + d3u8 = vqrshrn_n_u16(q2u16, 7); + d4u8 = vqrshrn_n_u16(q3u16, 7); + d5u8 = vqrshrn_n_u16(q4u16, 7); + d6u8 = vqrshrn_n_u16(q5u16, 7); + d7u8 = vqrshrn_n_u16(q6u16, 7); + d8u8 = vqrshrn_n_u16(q7u16, 7); + d9u8 = vqrshrn_n_u16(q8u16, 7); + + q1u8 = vcombine_u8(d2u8, d3u8); + q2u8 = vcombine_u8(d4u8, d5u8); + q3u8 = vcombine_u8(d6u8, d7u8); + q4u8 = vcombine_u8(d8u8, d9u8); + + q11u8 = q15u8; + + vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch; + vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch; + vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch; + vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch; + } + return; + } + + if (yoffset == 0) { // firstpass_bfilter16x16_only + d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); + d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); + + for (i = 4; i > 0 ; i--) { + d2u8 = vld1_u8(src_ptr); + d3u8 = vld1_u8(src_ptr + 8); + d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + d5u8 = vld1_u8(src_ptr); + d6u8 = vld1_u8(src_ptr + 8); + d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + d8u8 = vld1_u8(src_ptr); + d9u8 = vld1_u8(src_ptr + 8); + d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + d11u8 = vld1_u8(src_ptr); + d12u8 = vld1_u8(src_ptr + 8); + d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + + q7u16 = vmull_u8(d2u8, d0u8); + q8u16 = vmull_u8(d3u8, d0u8); + q9u16 = vmull_u8(d5u8, d0u8); + q10u16 = vmull_u8(d6u8, d0u8); + q11u16 = vmull_u8(d8u8, d0u8); + q12u16 = vmull_u8(d9u8, d0u8); + q13u16 = vmull_u8(d11u8, d0u8); + q14u16 = vmull_u8(d12u8, d0u8); + + d2u8 = vext_u8(d2u8, d3u8, 1); + d5u8 = vext_u8(d5u8, d6u8, 1); + d8u8 = vext_u8(d8u8, d9u8, 1); + d11u8 = vext_u8(d11u8, d12u8, 1); + + q7u16 = vmlal_u8(q7u16, d2u8, d1u8); + q9u16 = vmlal_u8(q9u16, d5u8, d1u8); + q11u16 = vmlal_u8(q11u16, d8u8, d1u8); + q13u16 = vmlal_u8(q13u16, d11u8, d1u8); + + d3u8 = vext_u8(d3u8, d4u8, 1); + d6u8 = vext_u8(d6u8, d7u8, 1); + d9u8 = vext_u8(d9u8, d10u8, 1); + d12u8 = vext_u8(d12u8, d13u8, 1); + + q8u16 = vmlal_u8(q8u16, d3u8, d1u8); + q10u16 = vmlal_u8(q10u16, d6u8, d1u8); + q12u16 = vmlal_u8(q12u16, d9u8, d1u8); + q14u16 = vmlal_u8(q14u16, d12u8, d1u8); + + d14u8 = vqrshrn_n_u16(q7u16, 7); + d15u8 = vqrshrn_n_u16(q8u16, 7); + d16u8 = vqrshrn_n_u16(q9u16, 7); + d17u8 = vqrshrn_n_u16(q10u16, 7); + d18u8 = vqrshrn_n_u16(q11u16, 7); + d19u8 = vqrshrn_n_u16(q12u16, 7); + d20u8 = vqrshrn_n_u16(q13u16, 7); + d21u8 = vqrshrn_n_u16(q14u16, 7); + + q7u8 = vcombine_u8(d14u8, d15u8); + q8u8 = vcombine_u8(d16u8, d17u8); + q9u8 = vcombine_u8(d18u8, d19u8); + q10u8 =vcombine_u8(d20u8, d21u8); + + vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch; + vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch; + vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch; + vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch; + } + return; + } + + d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); + d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); + + d2u8 = vld1_u8(src_ptr); + d3u8 = vld1_u8(src_ptr + 8); + d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + d5u8 = vld1_u8(src_ptr); + d6u8 = vld1_u8(src_ptr + 8); + d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + d8u8 = vld1_u8(src_ptr); + d9u8 = vld1_u8(src_ptr + 8); + d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + d11u8 = vld1_u8(src_ptr); + d12u8 = vld1_u8(src_ptr + 8); + d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + + // First Pass: output_height lines x output_width columns (17x16) + tmpp = tmp; + for (i = 3; i > 0; i--) { + q7u16 = vmull_u8(d2u8, d0u8); + q8u16 = vmull_u8(d3u8, d0u8); + q9u16 = vmull_u8(d5u8, d0u8); + q10u16 = vmull_u8(d6u8, d0u8); + q11u16 = vmull_u8(d8u8, d0u8); + q12u16 = vmull_u8(d9u8, d0u8); + q13u16 = vmull_u8(d11u8, d0u8); + q14u16 = vmull_u8(d12u8, d0u8); + + d2u8 = vext_u8(d2u8, d3u8, 1); + d5u8 = vext_u8(d5u8, d6u8, 1); + d8u8 = vext_u8(d8u8, d9u8, 1); + d11u8 = vext_u8(d11u8, d12u8, 1); + + q7u16 = vmlal_u8(q7u16, d2u8, d1u8); + q9u16 = vmlal_u8(q9u16, d5u8, d1u8); + q11u16 = vmlal_u8(q11u16, d8u8, d1u8); + q13u16 = vmlal_u8(q13u16, d11u8, d1u8); + + d3u8 = vext_u8(d3u8, d4u8, 1); + d6u8 = vext_u8(d6u8, d7u8, 1); + d9u8 = vext_u8(d9u8, d10u8, 1); + d12u8 = vext_u8(d12u8, d13u8, 1); + + q8u16 = vmlal_u8(q8u16, d3u8, d1u8); + q10u16 = vmlal_u8(q10u16, d6u8, d1u8); + q12u16 = vmlal_u8(q12u16, d9u8, d1u8); + q14u16 = vmlal_u8(q14u16, d12u8, d1u8); + + d14u8 = vqrshrn_n_u16(q7u16, 7); + d15u8 = vqrshrn_n_u16(q8u16, 7); + d16u8 = vqrshrn_n_u16(q9u16, 7); + d17u8 = vqrshrn_n_u16(q10u16, 7); + d18u8 = vqrshrn_n_u16(q11u16, 7); + d19u8 = vqrshrn_n_u16(q12u16, 7); + d20u8 = vqrshrn_n_u16(q13u16, 7); + d21u8 = vqrshrn_n_u16(q14u16, 7); + + d2u8 = vld1_u8(src_ptr); + d3u8 = vld1_u8(src_ptr + 8); + d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + d5u8 = vld1_u8(src_ptr); + d6u8 = vld1_u8(src_ptr + 8); + d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + d8u8 = vld1_u8(src_ptr); + d9u8 = vld1_u8(src_ptr + 8); + d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + d11u8 = vld1_u8(src_ptr); + d12u8 = vld1_u8(src_ptr + 8); + d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + + q7u8 = vcombine_u8(d14u8, d15u8); + q8u8 = vcombine_u8(d16u8, d17u8); + q9u8 = vcombine_u8(d18u8, d19u8); + q10u8 = vcombine_u8(d20u8, d21u8); + + vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16; + vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16; + vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16; + vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16; + } + + // First-pass filtering for rest 5 lines + d14u8 = vld1_u8(src_ptr); + d15u8 = vld1_u8(src_ptr + 8); + d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; + + q9u16 = vmull_u8(d2u8, d0u8); + q10u16 = vmull_u8(d3u8, d0u8); + q11u16 = vmull_u8(d5u8, d0u8); + q12u16 = vmull_u8(d6u8, d0u8); + q13u16 = vmull_u8(d8u8, d0u8); + q14u16 = vmull_u8(d9u8, d0u8); + + d2u8 = vext_u8(d2u8, d3u8, 1); + d5u8 = vext_u8(d5u8, d6u8, 1); + d8u8 = vext_u8(d8u8, d9u8, 1); + + q9u16 = vmlal_u8(q9u16, d2u8, d1u8); + q11u16 = vmlal_u8(q11u16, d5u8, d1u8); + q13u16 = vmlal_u8(q13u16, d8u8, d1u8); + + d3u8 = vext_u8(d3u8, d4u8, 1); + d6u8 = vext_u8(d6u8, d7u8, 1); + d9u8 = vext_u8(d9u8, d10u8, 1); + + q10u16 = vmlal_u8(q10u16, d3u8, d1u8); + q12u16 = vmlal_u8(q12u16, d6u8, d1u8); + q14u16 = vmlal_u8(q14u16, d9u8, d1u8); + + q1u16 = vmull_u8(d11u8, d0u8); + q2u16 = vmull_u8(d12u8, d0u8); + q3u16 = vmull_u8(d14u8, d0u8); + q4u16 = vmull_u8(d15u8, d0u8); + + d11u8 = vext_u8(d11u8, d12u8, 1); + d14u8 = vext_u8(d14u8, d15u8, 1); + + q1u16 = vmlal_u8(q1u16, d11u8, d1u8); + q3u16 = vmlal_u8(q3u16, d14u8, d1u8); + + d12u8 = vext_u8(d12u8, d13u8, 1); + d15u8 = vext_u8(d15u8, d16u8, 1); + + q2u16 = vmlal_u8(q2u16, d12u8, d1u8); + q4u16 = vmlal_u8(q4u16, d15u8, d1u8); + + d10u8 = vqrshrn_n_u16(q9u16, 7); + d11u8 = vqrshrn_n_u16(q10u16, 7); + d12u8 = vqrshrn_n_u16(q11u16, 7); + d13u8 = vqrshrn_n_u16(q12u16, 7); + d14u8 = vqrshrn_n_u16(q13u16, 7); + d15u8 = vqrshrn_n_u16(q14u16, 7); + d16u8 = vqrshrn_n_u16(q1u16, 7); + d17u8 = vqrshrn_n_u16(q2u16, 7); + d18u8 = vqrshrn_n_u16(q3u16, 7); + d19u8 = vqrshrn_n_u16(q4u16, 7); + + q5u8 = vcombine_u8(d10u8, d11u8); + q6u8 = vcombine_u8(d12u8, d13u8); + q7u8 = vcombine_u8(d14u8, d15u8); + q8u8 = vcombine_u8(d16u8, d17u8); + q9u8 = vcombine_u8(d18u8, d19u8); + + vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16; + vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16; + vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16; + vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16; + vst1q_u8((uint8_t *)tmpp, q9u8); + + // secondpass_filter + d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); + d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); + + tmpp = tmp; + q11u8 = vld1q_u8(tmpp); + tmpp += 16; + for (i = 4; i > 0; i--) { + q12u8 = vld1q_u8(tmpp); tmpp += 16; + q13u8 = vld1q_u8(tmpp); tmpp += 16; + q14u8 = vld1q_u8(tmpp); tmpp += 16; + q15u8 = vld1q_u8(tmpp); tmpp += 16; + + q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); + q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); + q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); + q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); + q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); + q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); + q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); + q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); + + q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); + q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); + q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); + q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); + q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); + q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); + q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); + q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); + + d2u8 = vqrshrn_n_u16(q1u16, 7); + d3u8 = vqrshrn_n_u16(q2u16, 7); + d4u8 = vqrshrn_n_u16(q3u16, 7); + d5u8 = vqrshrn_n_u16(q4u16, 7); + d6u8 = vqrshrn_n_u16(q5u16, 7); + d7u8 = vqrshrn_n_u16(q6u16, 7); + d8u8 = vqrshrn_n_u16(q7u16, 7); + d9u8 = vqrshrn_n_u16(q8u16, 7); + + q1u8 = vcombine_u8(d2u8, d3u8); + q2u8 = vcombine_u8(d4u8, d5u8); + q3u8 = vcombine_u8(d6u8, d7u8); + q4u8 = vcombine_u8(d8u8, d9u8); + + q11u8 = q15u8; + + vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch; + vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch; + vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch; + vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch; + } + return; +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c new file mode 100644 index 00000000000..deced115c14 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +void vp8_copy_mem8x4_neon( + unsigned char *src, + int src_stride, + unsigned char *dst, + int dst_stride) { + uint8x8_t vtmp; + int r; + + for (r = 0; r < 4; r++) { + vtmp = vld1_u8(src); + vst1_u8(dst, vtmp); + src += src_stride; + dst += dst_stride; + } +} + +void vp8_copy_mem8x8_neon( + unsigned char *src, + int src_stride, + unsigned char *dst, + int dst_stride) { + uint8x8_t vtmp; + int r; + + for (r = 0; r < 8; r++) { + vtmp = vld1_u8(src); + vst1_u8(dst, vtmp); + src += src_stride; + dst += dst_stride; + } +} + +void vp8_copy_mem16x16_neon( + unsigned char *src, + int src_stride, + unsigned char *dst, + int dst_stride) { + int r; + uint8x16_t qtmp; + + for (r = 0; r < 16; r++) { + qtmp = vld1q_u8(src); + vst1q_u8(dst, qtmp); + src += src_stride; + dst += dst_stride; + } +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c new file mode 100644 index 00000000000..ad5f41d7dee --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +void vp8_dc_only_idct_add_neon( + int16_t input_dc, + unsigned char *pred_ptr, + int pred_stride, + unsigned char *dst_ptr, + int dst_stride) { + int i; + uint16_t a1 = ((input_dc + 4) >> 3); + uint32x2_t d2u32 = vdup_n_u32(0); + uint8x8_t d2u8; + uint16x8_t q1u16; + uint16x8_t qAdd; + + qAdd = vdupq_n_u16(a1); + + for (i = 0; i < 2; i++) { + d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0); + pred_ptr += pred_stride; + d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1); + pred_ptr += pred_stride; + + q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32)); + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); + + vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0); + dst_ptr += dst_stride; + vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1); + dst_ptr += dst_stride; + } +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c new file mode 100644 index 00000000000..58e11922c76 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +static const int16_t cospi8sqrt2minus1 = 20091; +static const int16_t sinpi8sqrt2 = 35468; + +void vp8_dequant_idct_add_neon( + int16_t *input, + int16_t *dq, + unsigned char *dst, + int stride) { + unsigned char *dst0; + int32x2_t d14, d15; + int16x4_t d2, d3, d4, d5, d10, d11, d12, d13; + int16x8_t q1, q2, q3, q4, q5, q6; + int16x8_t qEmpty = vdupq_n_s16(0); + int32x2x2_t d2tmp0, d2tmp1; + int16x4x2_t d2tmp2, d2tmp3; + + d14 = d15 = vdup_n_s32(0); + + // load input + q3 = vld1q_s16(input); + vst1q_s16(input, qEmpty); + input += 8; + q4 = vld1q_s16(input); + vst1q_s16(input, qEmpty); + + // load dq + q5 = vld1q_s16(dq); + dq += 8; + q6 = vld1q_s16(dq); + + // load src from dst + dst0 = dst; + d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0); + dst0 += stride; + d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1); + dst0 += stride; + d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0); + dst0 += stride; + d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1); + + q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3), + vreinterpretq_u16_s16(q5))); + q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4), + vreinterpretq_u16_s16(q6))); + + d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2)); + d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2)); + + q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2)); + + q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); + q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); + + q3 = vshrq_n_s16(q3, 1); + q4 = vshrq_n_s16(q4, 1); + + q3 = vqaddq_s16(q3, q2); + q4 = vqaddq_s16(q4, q2); + + d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); + d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); + + d2 = vqadd_s16(d12, d11); + d3 = vqadd_s16(d13, d10); + d4 = vqsub_s16(d13, d10); + d5 = vqsub_s16(d12, d11); + + d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); + d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); + d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), + vreinterpret_s16_s32(d2tmp1.val[0])); + d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), + vreinterpret_s16_s32(d2tmp1.val[1])); + + // loop 2 + q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]); + + q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); + q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); + + d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]); + d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]); + + q3 = vshrq_n_s16(q3, 1); + q4 = vshrq_n_s16(q4, 1); + + q3 = vqaddq_s16(q3, q2); + q4 = vqaddq_s16(q4, q2); + + d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); + d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); + + d2 = vqadd_s16(d12, d11); + d3 = vqadd_s16(d13, d10); + d4 = vqsub_s16(d13, d10); + d5 = vqsub_s16(d12, d11); + + d2 = vrshr_n_s16(d2, 3); + d3 = vrshr_n_s16(d3, 3); + d4 = vrshr_n_s16(d4, 3); + d5 = vrshr_n_s16(d5, 3); + + d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); + d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); + d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), + vreinterpret_s16_s32(d2tmp1.val[0])); + d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), + vreinterpret_s16_s32(d2tmp1.val[1])); + + q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]); + q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]); + + q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1), + vreinterpret_u8_s32(d14))); + q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2), + vreinterpret_u8_s32(d15))); + + d14 = vreinterpret_s32_u8(vqmovun_s16(q1)); + d15 = vreinterpret_s32_u8(vqmovun_s16(q2)); + + dst0 = dst; + vst1_lane_s32((int32_t *)dst0, d14, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d14, 1); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d15, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d15, 1); + return; +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c new file mode 100644 index 00000000000..54e709dd3c3 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vp8/common/blockd.h" + +void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) { + int16x8x2_t qQ, qDQC, qDQ; + + qQ = vld2q_s16(d->qcoeff); + qDQC = vld2q_s16(DQC); + + qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]); + qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]); + + vst2q_s16(d->dqcoeff, qDQ); +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c new file mode 100644 index 00000000000..fb327a72601 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_config.h" +#include "vp8_rtcd.h" + +/* place these declarations here because we don't want to maintain them + * outside of this scope + */ +void idct_dequant_full_2x_neon(short *q, short *dq, + unsigned char *dst, int stride); +void idct_dequant_0_2x_neon(short *q, short dq, + unsigned char *dst, int stride); + + +void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, + unsigned char *dst, + int stride, char *eobs) +{ + int i; + + for (i = 0; i < 4; i++) + { + if (((short *)(eobs))[0]) + { + if (((short *)eobs)[0] & 0xfefe) + idct_dequant_full_2x_neon (q, dq, dst, stride); + else + idct_dequant_0_2x_neon (q, dq[0], dst, stride); + } + + if (((short *)(eobs))[1]) + { + if (((short *)eobs)[1] & 0xfefe) + idct_dequant_full_2x_neon (q+32, dq, dst+8, stride); + else + idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride); + } + q += 64; + dst += 4*stride; + eobs += 4; + } +} + +void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, + unsigned char *dstu, + unsigned char *dstv, + int stride, char *eobs) +{ + if (((short *)(eobs))[0]) + { + if (((short *)eobs)[0] & 0xfefe) + idct_dequant_full_2x_neon (q, dq, dstu, stride); + else + idct_dequant_0_2x_neon (q, dq[0], dstu, stride); + } + + q += 32; + dstu += 4*stride; + + if (((short *)(eobs))[1]) + { + if (((short *)eobs)[1] & 0xfefe) + idct_dequant_full_2x_neon (q, dq, dstu, stride); + else + idct_dequant_0_2x_neon (q, dq[0], dstu, stride); + } + + q += 32; + + if (((short *)(eobs))[2]) + { + if (((short *)eobs)[2] & 0xfefe) + idct_dequant_full_2x_neon (q, dq, dstv, stride); + else + idct_dequant_0_2x_neon (q, dq[0], dstv, stride); + } + + q += 32; + dstv += 4*stride; + + if (((short *)(eobs))[3]) + { + if (((short *)eobs)[3] & 0xfefe) + idct_dequant_full_2x_neon (q, dq, dstv, stride); + else + idct_dequant_0_2x_neon (q, dq[0], dstv, stride); + } +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c new file mode 100644 index 00000000000..e6f862fa898 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +void idct_dequant_0_2x_neon( + int16_t *q, + int16_t dq, + unsigned char *dst, + int stride) { + unsigned char *dst0; + int i, a0, a1; + int16x8x2_t q2Add; + int32x2_t d2s32 = vdup_n_s32(0), + d4s32 = vdup_n_s32(0); + uint8x8_t d2u8, d4u8; + uint16x8_t q1u16, q2u16; + + a0 = ((q[0] * dq) + 4) >> 3; + a1 = ((q[16] * dq) + 4) >> 3; + q[0] = q[16] = 0; + q2Add.val[0] = vdupq_n_s16((int16_t)a0); + q2Add.val[1] = vdupq_n_s16((int16_t)a1); + + for (i = 0; i < 2; i++, dst += 4) { + dst0 = dst; + d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0); + dst0 += stride; + d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1); + dst0 += stride; + d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0); + dst0 += stride; + d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1); + + q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), + vreinterpret_u8_s32(d2s32)); + q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), + vreinterpret_u8_s32(d4s32)); + + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); + d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); + + d2s32 = vreinterpret_s32_u8(d2u8); + d4s32 = vreinterpret_s32_u8(d4u8); + + dst0 = dst; + vst1_lane_s32((int32_t *)dst0, d2s32, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d2s32, 1); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d4s32, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d4s32, 1); + } + return; +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c new file mode 100644 index 00000000000..a60ed46b764 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +static const int16_t cospi8sqrt2minus1 = 20091; +static const int16_t sinpi8sqrt2 = 17734; +// because the lowest bit in 0x8a8c is 0, we can pre-shift this + +void idct_dequant_full_2x_neon( + int16_t *q, + int16_t *dq, + unsigned char *dst, + int stride) { + unsigned char *dst0, *dst1; + int32x2_t d28, d29, d30, d31; + int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; + int16x8_t qEmpty = vdupq_n_s16(0); + int32x4x2_t q2tmp0, q2tmp1; + int16x8x2_t q2tmp2, q2tmp3; + int16x4_t dLow0, dLow1, dHigh0, dHigh1; + + d28 = d29 = d30 = d31 = vdup_n_s32(0); + + // load dq + q0 = vld1q_s16(dq); + dq += 8; + q1 = vld1q_s16(dq); + + // load q + q2 = vld1q_s16(q); + vst1q_s16(q, qEmpty); + q += 8; + q3 = vld1q_s16(q); + vst1q_s16(q, qEmpty); + q += 8; + q4 = vld1q_s16(q); + vst1q_s16(q, qEmpty); + q += 8; + q5 = vld1q_s16(q); + vst1q_s16(q, qEmpty); + + // load src from dst + dst0 = dst; + dst1 = dst + 4; + d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0); + dst0 += stride; + d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1); + dst1 += stride; + d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0); + dst0 += stride; + d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1); + dst1 += stride; + + d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0); + dst0 += stride; + d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1); + dst1 += stride; + d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0); + d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1); + + q2 = vmulq_s16(q2, q0); + q3 = vmulq_s16(q3, q1); + q4 = vmulq_s16(q4, q0); + q5 = vmulq_s16(q5, q1); + + // vswp + dLow0 = vget_low_s16(q2); + dHigh0 = vget_high_s16(q2); + dLow1 = vget_low_s16(q4); + dHigh1 = vget_high_s16(q4); + q2 = vcombine_s16(dLow0, dLow1); + q4 = vcombine_s16(dHigh0, dHigh1); + + dLow0 = vget_low_s16(q3); + dHigh0 = vget_high_s16(q3); + dLow1 = vget_low_s16(q5); + dHigh1 = vget_high_s16(q5); + q3 = vcombine_s16(dLow0, dLow1); + q5 = vcombine_s16(dHigh0, dHigh1); + + q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2); + q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2); + q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1); + q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1); + + q10 = vqaddq_s16(q2, q3); + q11 = vqsubq_s16(q2, q3); + + q8 = vshrq_n_s16(q8, 1); + q9 = vshrq_n_s16(q9, 1); + + q4 = vqaddq_s16(q4, q8); + q5 = vqaddq_s16(q5, q9); + + q2 = vqsubq_s16(q6, q5); + q3 = vqaddq_s16(q7, q4); + + q4 = vqaddq_s16(q10, q3); + q5 = vqaddq_s16(q11, q2); + q6 = vqsubq_s16(q11, q2); + q7 = vqsubq_s16(q10, q3); + + q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); + q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); + q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), + vreinterpretq_s16_s32(q2tmp1.val[0])); + q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), + vreinterpretq_s16_s32(q2tmp1.val[1])); + + // loop 2 + q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2); + q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2); + q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1); + q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1); + + q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]); + q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]); + + q10 = vshrq_n_s16(q10, 1); + q11 = vshrq_n_s16(q11, 1); + + q10 = vqaddq_s16(q2tmp2.val[1], q10); + q11 = vqaddq_s16(q2tmp3.val[1], q11); + + q8 = vqsubq_s16(q8, q11); + q9 = vqaddq_s16(q9, q10); + + q4 = vqaddq_s16(q2, q9); + q5 = vqaddq_s16(q3, q8); + q6 = vqsubq_s16(q3, q8); + q7 = vqsubq_s16(q2, q9); + + q4 = vrshrq_n_s16(q4, 3); + q5 = vrshrq_n_s16(q5, 3); + q6 = vrshrq_n_s16(q6, 3); + q7 = vrshrq_n_s16(q7, 3); + + q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); + q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); + q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), + vreinterpretq_s16_s32(q2tmp1.val[0])); + q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), + vreinterpretq_s16_s32(q2tmp1.val[1])); + + q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]), + vreinterpret_u8_s32(d28))); + q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]), + vreinterpret_u8_s32(d29))); + q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]), + vreinterpret_u8_s32(d30))); + q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]), + vreinterpret_u8_s32(d31))); + + d28 = vreinterpret_s32_u8(vqmovun_s16(q4)); + d29 = vreinterpret_s32_u8(vqmovun_s16(q5)); + d30 = vreinterpret_s32_u8(vqmovun_s16(q6)); + d31 = vreinterpret_s32_u8(vqmovun_s16(q7)); + + dst0 = dst; + dst1 = dst + 4; + vst1_lane_s32((int32_t *)dst0, d28, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst1, d28, 1); + dst1 += stride; + vst1_lane_s32((int32_t *)dst0, d29, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst1, d29, 1); + dst1 += stride; + + vst1_lane_s32((int32_t *)dst0, d30, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst1, d30, 1); + dst1 += stride; + vst1_lane_s32((int32_t *)dst0, d31, 0); + vst1_lane_s32((int32_t *)dst1, d31, 1); + return; +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c new file mode 100644 index 00000000000..6ea9dd712aa --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +void vp8_short_inv_walsh4x4_neon( + int16_t *input, + int16_t *mb_dqcoeff) { + int16x8_t q0s16, q1s16, q2s16, q3s16; + int16x4_t d4s16, d5s16, d6s16, d7s16; + int16x4x2_t v2tmp0, v2tmp1; + int32x2x2_t v2tmp2, v2tmp3; + int16x8_t qAdd3; + + q0s16 = vld1q_s16(input); + q1s16 = vld1q_s16(input + 8); + + // 1st for loop + d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); + d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); + d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); + d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); + + q2s16 = vcombine_s16(d4s16, d5s16); + q3s16 = vcombine_s16(d6s16, d7s16); + + q0s16 = vaddq_s16(q2s16, q3s16); + q1s16 = vsubq_s16(q2s16, q3s16); + + v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)), + vreinterpret_s32_s16(vget_low_s16(q1s16))); + v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)), + vreinterpret_s32_s16(vget_high_s16(q1s16))); + v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), + vreinterpret_s16_s32(v2tmp3.val[0])); + v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), + vreinterpret_s16_s32(v2tmp3.val[1])); + + // 2nd for loop + d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]); + d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]); + d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]); + d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]); + q2s16 = vcombine_s16(d4s16, d5s16); + q3s16 = vcombine_s16(d6s16, d7s16); + + qAdd3 = vdupq_n_s16(3); + + q0s16 = vaddq_s16(q2s16, q3s16); + q1s16 = vsubq_s16(q2s16, q3s16); + + q0s16 = vaddq_s16(q0s16, qAdd3); + q1s16 = vaddq_s16(q1s16, qAdd3); + + q0s16 = vshrq_n_s16(q0s16, 3); + q1s16 = vshrq_n_s16(q1s16, 3); + + // store + vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0); + mb_dqcoeff += 16; + + vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1); + mb_dqcoeff += 16; + + vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2); + mb_dqcoeff += 16; + + vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3); + mb_dqcoeff += 16; + vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3); + mb_dqcoeff += 16; + return; +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c new file mode 100644 index 00000000000..b25686ffb88 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include "./vpx_config.h" + +static INLINE void vp8_loop_filter_simple_horizontal_edge_neon( + unsigned char *s, + int p, + const unsigned char *blimit) { + uint8_t *sp; + uint8x16_t qblimit, q0u8; + uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8; + int16x8_t q2s16, q3s16, q13s16; + int8x8_t d8s8, d9s8; + int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8; + + qblimit = vdupq_n_u8(*blimit); + + sp = s - (p << 1); + q5u8 = vld1q_u8(sp); + sp += p; + q6u8 = vld1q_u8(sp); + sp += p; + q7u8 = vld1q_u8(sp); + sp += p; + q8u8 = vld1q_u8(sp); + + q15u8 = vabdq_u8(q6u8, q7u8); + q14u8 = vabdq_u8(q5u8, q8u8); + + q15u8 = vqaddq_u8(q15u8, q15u8); + q14u8 = vshrq_n_u8(q14u8, 1); + q0u8 = vdupq_n_u8(0x80); + q13s16 = vdupq_n_s16(3); + q15u8 = vqaddq_u8(q15u8, q14u8); + + q5u8 = veorq_u8(q5u8, q0u8); + q6u8 = veorq_u8(q6u8, q0u8); + q7u8 = veorq_u8(q7u8, q0u8); + q8u8 = veorq_u8(q8u8, q0u8); + + q15u8 = vcgeq_u8(qblimit, q15u8); + + q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)), + vget_low_s8(vreinterpretq_s8_u8(q6u8))); + q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)), + vget_high_s8(vreinterpretq_s8_u8(q6u8))); + + q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8), + vreinterpretq_s8_u8(q8u8)); + + q2s16 = vmulq_s16(q2s16, q13s16); + q3s16 = vmulq_s16(q3s16, q13s16); + + q10u8 = vdupq_n_u8(3); + q9u8 = vdupq_n_u8(4); + + q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8)); + q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8)); + + d8s8 = vqmovn_s16(q2s16); + d9s8 = vqmovn_s16(q3s16); + q4s8 = vcombine_s8(d8s8, d9s8); + + q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8)); + + q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8)); + q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8)); + q2s8 = vshrq_n_s8(q2s8, 3); + q3s8 = vshrq_n_s8(q3s8, 3); + + q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8); + q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8); + + q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); + q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); + + vst1q_u8(s, q7u8); + s -= p; + vst1q_u8(s, q6u8); + return; +} + +void vp8_loop_filter_bhs_neon( + unsigned char *y_ptr, + int y_stride, + const unsigned char *blimit) { + y_ptr += y_stride * 4; + vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); + y_ptr += y_stride * 4; + vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); + y_ptr += y_stride * 4; + vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); + return; +} + +void vp8_loop_filter_mbhs_neon( + unsigned char *y_ptr, + int y_stride, + const unsigned char *blimit) { + vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); + return; +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c new file mode 100644 index 00000000000..921bcad6983 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include "./vpx_config.h" +#include "vpx_ports/arm.h" + +#ifdef VPX_INCOMPATIBLE_GCC +static INLINE void write_2x4(unsigned char *dst, int pitch, + const uint8x8x2_t result) { + /* + * uint8x8x2_t result + 00 01 02 03 | 04 05 06 07 + 10 11 12 13 | 14 15 16 17 + --- + * after vtrn_u8 + 00 10 02 12 | 04 14 06 16 + 01 11 03 13 | 05 15 07 17 + */ + const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0], + result.val[1]); + const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]); + const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]); + vst1_lane_u16((uint16_t *)dst, x_0_4, 0); + dst += pitch; + vst1_lane_u16((uint16_t *)dst, x_1_5, 0); + dst += pitch; + vst1_lane_u16((uint16_t *)dst, x_0_4, 1); + dst += pitch; + vst1_lane_u16((uint16_t *)dst, x_1_5, 1); + dst += pitch; + vst1_lane_u16((uint16_t *)dst, x_0_4, 2); + dst += pitch; + vst1_lane_u16((uint16_t *)dst, x_1_5, 2); + dst += pitch; + vst1_lane_u16((uint16_t *)dst, x_0_4, 3); + dst += pitch; + vst1_lane_u16((uint16_t *)dst, x_1_5, 3); +} + +static INLINE void write_2x8(unsigned char *dst, int pitch, + const uint8x8x2_t result, + const uint8x8x2_t result2) { + write_2x4(dst, pitch, result); + dst += pitch * 8; + write_2x4(dst, pitch, result2); +} +#else +static INLINE void write_2x8(unsigned char *dst, int pitch, + const uint8x8x2_t result, + const uint8x8x2_t result2) { + vst2_lane_u8(dst, result, 0); + dst += pitch; + vst2_lane_u8(dst, result, 1); + dst += pitch; + vst2_lane_u8(dst, result, 2); + dst += pitch; + vst2_lane_u8(dst, result, 3); + dst += pitch; + vst2_lane_u8(dst, result, 4); + dst += pitch; + vst2_lane_u8(dst, result, 5); + dst += pitch; + vst2_lane_u8(dst, result, 6); + dst += pitch; + vst2_lane_u8(dst, result, 7); + dst += pitch; + + vst2_lane_u8(dst, result2, 0); + dst += pitch; + vst2_lane_u8(dst, result2, 1); + dst += pitch; + vst2_lane_u8(dst, result2, 2); + dst += pitch; + vst2_lane_u8(dst, result2, 3); + dst += pitch; + vst2_lane_u8(dst, result2, 4); + dst += pitch; + vst2_lane_u8(dst, result2, 5); + dst += pitch; + vst2_lane_u8(dst, result2, 6); + dst += pitch; + vst2_lane_u8(dst, result2, 7); +} +#endif // VPX_INCOMPATIBLE_GCC + + +#ifdef VPX_INCOMPATIBLE_GCC +static INLINE +uint8x8x4_t read_4x8(unsigned char *src, int pitch) { + uint8x8x4_t x; + const uint8x8_t a = vld1_u8(src); + const uint8x8_t b = vld1_u8(src + pitch * 1); + const uint8x8_t c = vld1_u8(src + pitch * 2); + const uint8x8_t d = vld1_u8(src + pitch * 3); + const uint8x8_t e = vld1_u8(src + pitch * 4); + const uint8x8_t f = vld1_u8(src + pitch * 5); + const uint8x8_t g = vld1_u8(src + pitch * 6); + const uint8x8_t h = vld1_u8(src + pitch * 7); + const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a), + vreinterpret_u32_u8(e)); + const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b), + vreinterpret_u32_u8(f)); + const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c), + vreinterpret_u32_u8(g)); + const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d), + vreinterpret_u32_u8(h)); + const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]), + vreinterpret_u16_u32(r26_u32.val[0])); + const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]), + vreinterpret_u16_u32(r37_u32.val[0])); + const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), + vreinterpret_u8_u16(r13_u16.val[0])); + const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), + vreinterpret_u8_u16(r13_u16.val[1])); + /* + * after vtrn_u32 + 00 01 02 03 | 40 41 42 43 + 10 11 12 13 | 50 51 52 53 + 20 21 22 23 | 60 61 62 63 + 30 31 32 33 | 70 71 72 73 + --- + * after vtrn_u16 + 00 01 20 21 | 40 41 60 61 + 02 03 22 23 | 42 43 62 63 + 10 11 30 31 | 50 51 70 71 + 12 13 32 33 | 52 52 72 73 + + 00 01 20 21 | 40 41 60 61 + 10 11 30 31 | 50 51 70 71 + 02 03 22 23 | 42 43 62 63 + 12 13 32 33 | 52 52 72 73 + --- + * after vtrn_u8 + 00 10 20 30 | 40 50 60 70 + 01 11 21 31 | 41 51 61 71 + 02 12 22 32 | 42 52 62 72 + 03 13 23 33 | 43 53 63 73 + */ + x.val[0] = r01_u8.val[0]; + x.val[1] = r01_u8.val[1]; + x.val[2] = r23_u8.val[0]; + x.val[3] = r23_u8.val[1]; + + return x; +} +#else +static INLINE +uint8x8x4_t read_4x8(unsigned char *src, int pitch) { + uint8x8x4_t x; + x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0); + x = vld4_lane_u8(src, x, 0); + src += pitch; + x = vld4_lane_u8(src, x, 1); + src += pitch; + x = vld4_lane_u8(src, x, 2); + src += pitch; + x = vld4_lane_u8(src, x, 3); + src += pitch; + x = vld4_lane_u8(src, x, 4); + src += pitch; + x = vld4_lane_u8(src, x, 5); + src += pitch; + x = vld4_lane_u8(src, x, 6); + src += pitch; + x = vld4_lane_u8(src, x, 7); + return x; +} +#endif // VPX_INCOMPATIBLE_GCC + +static INLINE void vp8_loop_filter_simple_vertical_edge_neon( + unsigned char *s, + int p, + const unsigned char *blimit) { + unsigned char *src1; + uint8x16_t qblimit, q0u8; + uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8; + int16x8_t q2s16, q13s16, q11s16; + int8x8_t d28s8, d29s8; + int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8; + uint8x8x4_t d0u8x4; // d6, d7, d8, d9 + uint8x8x4_t d1u8x4; // d10, d11, d12, d13 + uint8x8x2_t d2u8x2; // d12, d13 + uint8x8x2_t d3u8x2; // d14, d15 + + qblimit = vdupq_n_u8(*blimit); + + src1 = s - 2; + d0u8x4 = read_4x8(src1, p); + src1 += p * 8; + d1u8x4 = read_4x8(src1, p); + + q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10 + q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12 + q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11 + q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13 + + q15u8 = vabdq_u8(q5u8, q4u8); + q14u8 = vabdq_u8(q3u8, q6u8); + + q15u8 = vqaddq_u8(q15u8, q15u8); + q14u8 = vshrq_n_u8(q14u8, 1); + q0u8 = vdupq_n_u8(0x80); + q11s16 = vdupq_n_s16(3); + q15u8 = vqaddq_u8(q15u8, q14u8); + + q3u8 = veorq_u8(q3u8, q0u8); + q4u8 = veorq_u8(q4u8, q0u8); + q5u8 = veorq_u8(q5u8, q0u8); + q6u8 = veorq_u8(q6u8, q0u8); + + q15u8 = vcgeq_u8(qblimit, q15u8); + + q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)), + vget_low_s8(vreinterpretq_s8_u8(q5u8))); + q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)), + vget_high_s8(vreinterpretq_s8_u8(q5u8))); + + q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8), + vreinterpretq_s8_u8(q6u8)); + + q2s16 = vmulq_s16(q2s16, q11s16); + q13s16 = vmulq_s16(q13s16, q11s16); + + q11u8 = vdupq_n_u8(3); + q12u8 = vdupq_n_u8(4); + + q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8)); + q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8)); + + d28s8 = vqmovn_s16(q2s16); + d29s8 = vqmovn_s16(q13s16); + q14s8 = vcombine_s8(d28s8, d29s8); + + q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8)); + + q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8)); + q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8)); + q2s8 = vshrq_n_s8(q2s8, 3); + q14s8 = vshrq_n_s8(q3s8, 3); + + q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8); + q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8); + + q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); + q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); + + d2u8x2.val[0] = vget_low_u8(q6u8); // d12 + d2u8x2.val[1] = vget_low_u8(q7u8); // d14 + d3u8x2.val[0] = vget_high_u8(q6u8); // d13 + d3u8x2.val[1] = vget_high_u8(q7u8); // d15 + + src1 = s - 1; + write_2x8(src1, p, d2u8x2, d3u8x2); +} + +void vp8_loop_filter_bvs_neon( + unsigned char *y_ptr, + int y_stride, + const unsigned char *blimit) { + y_ptr += 4; + vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); + y_ptr += 4; + vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); + y_ptr += 4; + vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); + return; +} + +void vp8_loop_filter_mbvs_neon( + unsigned char *y_ptr, + int y_stride, + const unsigned char *blimit) { + vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); + return; +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c new file mode 100644 index 00000000000..5351f4be665 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c @@ -0,0 +1,625 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include "./vpx_config.h" + +static INLINE void vp8_mbloop_filter_neon( + uint8x16_t qblimit, // mblimit + uint8x16_t qlimit, // limit + uint8x16_t qthresh, // thresh + uint8x16_t q3, // p2 + uint8x16_t q4, // p2 + uint8x16_t q5, // p1 + uint8x16_t q6, // p0 + uint8x16_t q7, // q0 + uint8x16_t q8, // q1 + uint8x16_t q9, // q2 + uint8x16_t q10, // q3 + uint8x16_t *q4r, // p1 + uint8x16_t *q5r, // p1 + uint8x16_t *q6r, // p0 + uint8x16_t *q7r, // q0 + uint8x16_t *q8r, // q1 + uint8x16_t *q9r) { // q1 + uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8; + int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16; + int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8; + uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16; + int8x16_t q0s8, q12s8, q14s8, q15s8; + int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29; + + q11u8 = vabdq_u8(q3, q4); + q12u8 = vabdq_u8(q4, q5); + q13u8 = vabdq_u8(q5, q6); + q14u8 = vabdq_u8(q8, q7); + q1u8 = vabdq_u8(q9, q8); + q0u8 = vabdq_u8(q10, q9); + + q11u8 = vmaxq_u8(q11u8, q12u8); + q12u8 = vmaxq_u8(q13u8, q14u8); + q1u8 = vmaxq_u8(q1u8, q0u8); + q15u8 = vmaxq_u8(q11u8, q12u8); + + q12u8 = vabdq_u8(q6, q7); + + // vp8_hevmask + q13u8 = vcgtq_u8(q13u8, qthresh); + q14u8 = vcgtq_u8(q14u8, qthresh); + q15u8 = vmaxq_u8(q15u8, q1u8); + + q15u8 = vcgeq_u8(qlimit, q15u8); + + q1u8 = vabdq_u8(q5, q8); + q12u8 = vqaddq_u8(q12u8, q12u8); + + // vp8_filter() function + // convert to signed + q0u8 = vdupq_n_u8(0x80); + q9 = veorq_u8(q9, q0u8); + q8 = veorq_u8(q8, q0u8); + q7 = veorq_u8(q7, q0u8); + q6 = veorq_u8(q6, q0u8); + q5 = veorq_u8(q5, q0u8); + q4 = veorq_u8(q4, q0u8); + + q1u8 = vshrq_n_u8(q1u8, 1); + q12u8 = vqaddq_u8(q12u8, q1u8); + + q14u8 = vorrq_u8(q13u8, q14u8); + q12u8 = vcgeq_u8(qblimit, q12u8); + + q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), + vget_low_s8(vreinterpretq_s8_u8(q6))); + q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), + vget_high_s8(vreinterpretq_s8_u8(q6))); + + q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), + vreinterpretq_s8_u8(q8)); + + q11s16 = vdupq_n_s16(3); + q2s16 = vmulq_s16(q2s16, q11s16); + q13s16 = vmulq_s16(q13s16, q11s16); + + q15u8 = vandq_u8(q15u8, q12u8); + + q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); + q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8)); + + q12u8 = vdupq_n_u8(3); + q11u8 = vdupq_n_u8(4); + // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) + d2 = vqmovn_s16(q2s16); + d3 = vqmovn_s16(q13s16); + q1s8 = vcombine_s8(d2, d3); + q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8)); + q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); + + q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8)); + q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8)); + q2s8 = vshrq_n_s8(q2s8, 3); + q13s8 = vshrq_n_s8(q13s8, 3); + + q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8); + q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8); + + q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); + + q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63); + d5 = vdup_n_s8(9); + d4 = vdup_n_s8(18); + + q0s16 = vmlal_s8(vreinterpretq_s16_u16(q0u16), vget_low_s8(q1s8), d5); + q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5); + d5 = vdup_n_s8(27); + q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8), d4); + q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4); + q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8), d5); + q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5); + + d0 = vqshrn_n_s16(q0s16 , 7); + d1 = vqshrn_n_s16(q11s16, 7); + d24 = vqshrn_n_s16(q12s16, 7); + d25 = vqshrn_n_s16(q13s16, 7); + d28 = vqshrn_n_s16(q14s16, 7); + d29 = vqshrn_n_s16(q15s16, 7); + + q0s8 = vcombine_s8(d0, d1); + q12s8 = vcombine_s8(d24, d25); + q14s8 = vcombine_s8(d28, d29); + + q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8); + q0s8 = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8); + q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8); + q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8); + q15s8 = vqsubq_s8((q7s8), q14s8); + q14s8 = vqaddq_s8((q6s8), q14s8); + + q1u8 = vdupq_n_u8(0x80); + *q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8); + *q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8); + *q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8); + *q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8); + *q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8); + *q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8); + return; +} + +void vp8_mbloop_filter_horizontal_edge_y_neon( + unsigned char *src, + int pitch, + unsigned char blimit, + unsigned char limit, + unsigned char thresh) { + uint8x16_t qblimit, qlimit, qthresh, q3, q4; + uint8x16_t q5, q6, q7, q8, q9, q10; + + qblimit = vdupq_n_u8(blimit); + qlimit = vdupq_n_u8(limit); + qthresh = vdupq_n_u8(thresh); + + src -= (pitch << 2); + + q3 = vld1q_u8(src); + src += pitch; + q4 = vld1q_u8(src); + src += pitch; + q5 = vld1q_u8(src); + src += pitch; + q6 = vld1q_u8(src); + src += pitch; + q7 = vld1q_u8(src); + src += pitch; + q8 = vld1q_u8(src); + src += pitch; + q9 = vld1q_u8(src); + src += pitch; + q10 = vld1q_u8(src); + + vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, + q5, q6, q7, q8, q9, q10, + &q4, &q5, &q6, &q7, &q8, &q9); + + src -= (pitch * 6); + vst1q_u8(src, q4); + src += pitch; + vst1q_u8(src, q5); + src += pitch; + vst1q_u8(src, q6); + src += pitch; + vst1q_u8(src, q7); + src += pitch; + vst1q_u8(src, q8); + src += pitch; + vst1q_u8(src, q9); + return; +} + +void vp8_mbloop_filter_horizontal_edge_uv_neon( + unsigned char *u, + int pitch, + unsigned char blimit, + unsigned char limit, + unsigned char thresh, + unsigned char *v) { + uint8x16_t qblimit, qlimit, qthresh, q3, q4; + uint8x16_t q5, q6, q7, q8, q9, q10; + uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; + uint8x8_t d15, d16, d17, d18, d19, d20, d21; + + qblimit = vdupq_n_u8(blimit); + qlimit = vdupq_n_u8(limit); + qthresh = vdupq_n_u8(thresh); + + u -= (pitch << 2); + v -= (pitch << 2); + + d6 = vld1_u8(u); + u += pitch; + d7 = vld1_u8(v); + v += pitch; + d8 = vld1_u8(u); + u += pitch; + d9 = vld1_u8(v); + v += pitch; + d10 = vld1_u8(u); + u += pitch; + d11 = vld1_u8(v); + v += pitch; + d12 = vld1_u8(u); + u += pitch; + d13 = vld1_u8(v); + v += pitch; + d14 = vld1_u8(u); + u += pitch; + d15 = vld1_u8(v); + v += pitch; + d16 = vld1_u8(u); + u += pitch; + d17 = vld1_u8(v); + v += pitch; + d18 = vld1_u8(u); + u += pitch; + d19 = vld1_u8(v); + v += pitch; + d20 = vld1_u8(u); + d21 = vld1_u8(v); + + q3 = vcombine_u8(d6, d7); + q4 = vcombine_u8(d8, d9); + q5 = vcombine_u8(d10, d11); + q6 = vcombine_u8(d12, d13); + q7 = vcombine_u8(d14, d15); + q8 = vcombine_u8(d16, d17); + q9 = vcombine_u8(d18, d19); + q10 = vcombine_u8(d20, d21); + + vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, + q5, q6, q7, q8, q9, q10, + &q4, &q5, &q6, &q7, &q8, &q9); + + u -= (pitch * 6); + v -= (pitch * 6); + vst1_u8(u, vget_low_u8(q4)); + u += pitch; + vst1_u8(v, vget_high_u8(q4)); + v += pitch; + vst1_u8(u, vget_low_u8(q5)); + u += pitch; + vst1_u8(v, vget_high_u8(q5)); + v += pitch; + vst1_u8(u, vget_low_u8(q6)); + u += pitch; + vst1_u8(v, vget_high_u8(q6)); + v += pitch; + vst1_u8(u, vget_low_u8(q7)); + u += pitch; + vst1_u8(v, vget_high_u8(q7)); + v += pitch; + vst1_u8(u, vget_low_u8(q8)); + u += pitch; + vst1_u8(v, vget_high_u8(q8)); + v += pitch; + vst1_u8(u, vget_low_u8(q9)); + vst1_u8(v, vget_high_u8(q9)); + return; +} + +void vp8_mbloop_filter_vertical_edge_y_neon( + unsigned char *src, + int pitch, + unsigned char blimit, + unsigned char limit, + unsigned char thresh) { + unsigned char *s1, *s2; + uint8x16_t qblimit, qlimit, qthresh, q3, q4; + uint8x16_t q5, q6, q7, q8, q9, q10; + uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; + uint8x8_t d15, d16, d17, d18, d19, d20, d21; + uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; + uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; + uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; + + qblimit = vdupq_n_u8(blimit); + qlimit = vdupq_n_u8(limit); + qthresh = vdupq_n_u8(thresh); + + s1 = src - 4; + s2 = s1 + 8 * pitch; + d6 = vld1_u8(s1); + s1 += pitch; + d7 = vld1_u8(s2); + s2 += pitch; + d8 = vld1_u8(s1); + s1 += pitch; + d9 = vld1_u8(s2); + s2 += pitch; + d10 = vld1_u8(s1); + s1 += pitch; + d11 = vld1_u8(s2); + s2 += pitch; + d12 = vld1_u8(s1); + s1 += pitch; + d13 = vld1_u8(s2); + s2 += pitch; + d14 = vld1_u8(s1); + s1 += pitch; + d15 = vld1_u8(s2); + s2 += pitch; + d16 = vld1_u8(s1); + s1 += pitch; + d17 = vld1_u8(s2); + s2 += pitch; + d18 = vld1_u8(s1); + s1 += pitch; + d19 = vld1_u8(s2); + s2 += pitch; + d20 = vld1_u8(s1); + d21 = vld1_u8(s2); + + q3 = vcombine_u8(d6, d7); + q4 = vcombine_u8(d8, d9); + q5 = vcombine_u8(d10, d11); + q6 = vcombine_u8(d12, d13); + q7 = vcombine_u8(d14, d15); + q8 = vcombine_u8(d16, d17); + q9 = vcombine_u8(d18, d19); + q10 = vcombine_u8(d20, d21); + + q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); + q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); + q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); + q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); + + q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), + vreinterpretq_u16_u32(q2tmp2.val[0])); + q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), + vreinterpretq_u16_u32(q2tmp3.val[0])); + q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), + vreinterpretq_u16_u32(q2tmp2.val[1])); + q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), + vreinterpretq_u16_u32(q2tmp3.val[1])); + + q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), + vreinterpretq_u8_u16(q2tmp5.val[0])); + q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), + vreinterpretq_u8_u16(q2tmp5.val[1])); + q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), + vreinterpretq_u8_u16(q2tmp7.val[0])); + q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), + vreinterpretq_u8_u16(q2tmp7.val[1])); + + q3 = q2tmp8.val[0]; + q4 = q2tmp8.val[1]; + q5 = q2tmp9.val[0]; + q6 = q2tmp9.val[1]; + q7 = q2tmp10.val[0]; + q8 = q2tmp10.val[1]; + q9 = q2tmp11.val[0]; + q10 = q2tmp11.val[1]; + + vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, + q5, q6, q7, q8, q9, q10, + &q4, &q5, &q6, &q7, &q8, &q9); + + q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); + q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); + q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); + q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); + + q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), + vreinterpretq_u16_u32(q2tmp2.val[0])); + q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), + vreinterpretq_u16_u32(q2tmp3.val[0])); + q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), + vreinterpretq_u16_u32(q2tmp2.val[1])); + q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), + vreinterpretq_u16_u32(q2tmp3.val[1])); + + q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), + vreinterpretq_u8_u16(q2tmp5.val[0])); + q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), + vreinterpretq_u8_u16(q2tmp5.val[1])); + q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), + vreinterpretq_u8_u16(q2tmp7.val[0])); + q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), + vreinterpretq_u8_u16(q2tmp7.val[1])); + + q3 = q2tmp8.val[0]; + q4 = q2tmp8.val[1]; + q5 = q2tmp9.val[0]; + q6 = q2tmp9.val[1]; + q7 = q2tmp10.val[0]; + q8 = q2tmp10.val[1]; + q9 = q2tmp11.val[0]; + q10 = q2tmp11.val[1]; + + s1 -= 7 * pitch; + s2 -= 7 * pitch; + + vst1_u8(s1, vget_low_u8(q3)); + s1 += pitch; + vst1_u8(s2, vget_high_u8(q3)); + s2 += pitch; + vst1_u8(s1, vget_low_u8(q4)); + s1 += pitch; + vst1_u8(s2, vget_high_u8(q4)); + s2 += pitch; + vst1_u8(s1, vget_low_u8(q5)); + s1 += pitch; + vst1_u8(s2, vget_high_u8(q5)); + s2 += pitch; + vst1_u8(s1, vget_low_u8(q6)); + s1 += pitch; + vst1_u8(s2, vget_high_u8(q6)); + s2 += pitch; + vst1_u8(s1, vget_low_u8(q7)); + s1 += pitch; + vst1_u8(s2, vget_high_u8(q7)); + s2 += pitch; + vst1_u8(s1, vget_low_u8(q8)); + s1 += pitch; + vst1_u8(s2, vget_high_u8(q8)); + s2 += pitch; + vst1_u8(s1, vget_low_u8(q9)); + s1 += pitch; + vst1_u8(s2, vget_high_u8(q9)); + s2 += pitch; + vst1_u8(s1, vget_low_u8(q10)); + vst1_u8(s2, vget_high_u8(q10)); + return; +} + +void vp8_mbloop_filter_vertical_edge_uv_neon( + unsigned char *u, + int pitch, + unsigned char blimit, + unsigned char limit, + unsigned char thresh, + unsigned char *v) { + unsigned char *us, *ud; + unsigned char *vs, *vd; + uint8x16_t qblimit, qlimit, qthresh, q3, q4; + uint8x16_t q5, q6, q7, q8, q9, q10; + uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; + uint8x8_t d15, d16, d17, d18, d19, d20, d21; + uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; + uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; + uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; + + qblimit = vdupq_n_u8(blimit); + qlimit = vdupq_n_u8(limit); + qthresh = vdupq_n_u8(thresh); + + us = u - 4; + vs = v - 4; + d6 = vld1_u8(us); + us += pitch; + d7 = vld1_u8(vs); + vs += pitch; + d8 = vld1_u8(us); + us += pitch; + d9 = vld1_u8(vs); + vs += pitch; + d10 = vld1_u8(us); + us += pitch; + d11 = vld1_u8(vs); + vs += pitch; + d12 = vld1_u8(us); + us += pitch; + d13 = vld1_u8(vs); + vs += pitch; + d14 = vld1_u8(us); + us += pitch; + d15 = vld1_u8(vs); + vs += pitch; + d16 = vld1_u8(us); + us += pitch; + d17 = vld1_u8(vs); + vs += pitch; + d18 = vld1_u8(us); + us += pitch; + d19 = vld1_u8(vs); + vs += pitch; + d20 = vld1_u8(us); + d21 = vld1_u8(vs); + + q3 = vcombine_u8(d6, d7); + q4 = vcombine_u8(d8, d9); + q5 = vcombine_u8(d10, d11); + q6 = vcombine_u8(d12, d13); + q7 = vcombine_u8(d14, d15); + q8 = vcombine_u8(d16, d17); + q9 = vcombine_u8(d18, d19); + q10 = vcombine_u8(d20, d21); + + q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); + q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); + q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); + q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); + + q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), + vreinterpretq_u16_u32(q2tmp2.val[0])); + q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), + vreinterpretq_u16_u32(q2tmp3.val[0])); + q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), + vreinterpretq_u16_u32(q2tmp2.val[1])); + q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), + vreinterpretq_u16_u32(q2tmp3.val[1])); + + q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), + vreinterpretq_u8_u16(q2tmp5.val[0])); + q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), + vreinterpretq_u8_u16(q2tmp5.val[1])); + q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), + vreinterpretq_u8_u16(q2tmp7.val[0])); + q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), + vreinterpretq_u8_u16(q2tmp7.val[1])); + + q3 = q2tmp8.val[0]; + q4 = q2tmp8.val[1]; + q5 = q2tmp9.val[0]; + q6 = q2tmp9.val[1]; + q7 = q2tmp10.val[0]; + q8 = q2tmp10.val[1]; + q9 = q2tmp11.val[0]; + q10 = q2tmp11.val[1]; + + vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, + q5, q6, q7, q8, q9, q10, + &q4, &q5, &q6, &q7, &q8, &q9); + + q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); + q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); + q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); + q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); + + q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), + vreinterpretq_u16_u32(q2tmp2.val[0])); + q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), + vreinterpretq_u16_u32(q2tmp3.val[0])); + q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), + vreinterpretq_u16_u32(q2tmp2.val[1])); + q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), + vreinterpretq_u16_u32(q2tmp3.val[1])); + + q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), + vreinterpretq_u8_u16(q2tmp5.val[0])); + q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), + vreinterpretq_u8_u16(q2tmp5.val[1])); + q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), + vreinterpretq_u8_u16(q2tmp7.val[0])); + q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), + vreinterpretq_u8_u16(q2tmp7.val[1])); + + q3 = q2tmp8.val[0]; + q4 = q2tmp8.val[1]; + q5 = q2tmp9.val[0]; + q6 = q2tmp9.val[1]; + q7 = q2tmp10.val[0]; + q8 = q2tmp10.val[1]; + q9 = q2tmp11.val[0]; + q10 = q2tmp11.val[1]; + + ud = u - 4; + vst1_u8(ud, vget_low_u8(q3)); + ud += pitch; + vst1_u8(ud, vget_low_u8(q4)); + ud += pitch; + vst1_u8(ud, vget_low_u8(q5)); + ud += pitch; + vst1_u8(ud, vget_low_u8(q6)); + ud += pitch; + vst1_u8(ud, vget_low_u8(q7)); + ud += pitch; + vst1_u8(ud, vget_low_u8(q8)); + ud += pitch; + vst1_u8(ud, vget_low_u8(q9)); + ud += pitch; + vst1_u8(ud, vget_low_u8(q10)); + + vd = v - 4; + vst1_u8(vd, vget_high_u8(q3)); + vd += pitch; + vst1_u8(vd, vget_high_u8(q4)); + vd += pitch; + vst1_u8(vd, vget_high_u8(q5)); + vd += pitch; + vst1_u8(vd, vget_high_u8(q6)); + vd += pitch; + vst1_u8(vd, vget_high_u8(q7)); + vd += pitch; + vst1_u8(vd, vget_high_u8(q8)); + vd += pitch; + vst1_u8(vd, vget_high_u8(q9)); + vd += pitch; + vst1_u8(vd, vget_high_u8(q10)); + return; +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c new file mode 100644 index 00000000000..373afa6ed35 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +static const int16_t cospi8sqrt2minus1 = 20091; +static const int16_t sinpi8sqrt2 = 35468; + +void vp8_short_idct4x4llm_neon( + int16_t *input, + unsigned char *pred_ptr, + int pred_stride, + unsigned char *dst_ptr, + int dst_stride) { + int i; + uint32x2_t d6u32 = vdup_n_u32(0); + uint8x8_t d1u8; + int16x4_t d2, d3, d4, d5, d10, d11, d12, d13; + uint16x8_t q1u16; + int16x8_t q1s16, q2s16, q3s16, q4s16; + int32x2x2_t v2tmp0, v2tmp1; + int16x4x2_t v2tmp2, v2tmp3; + + d2 = vld1_s16(input); + d3 = vld1_s16(input + 4); + d4 = vld1_s16(input + 8); + d5 = vld1_s16(input + 12); + + // 1st for loop + q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here + q2s16 = vcombine_s16(d3, d5); + + q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2); + q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1); + + d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 + d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 + + q3s16 = vshrq_n_s16(q3s16, 1); + q4s16 = vshrq_n_s16(q4s16, 1); + + q3s16 = vqaddq_s16(q3s16, q2s16); + q4s16 = vqaddq_s16(q4s16, q2s16); + + d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 + d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 + + d2 = vqadd_s16(d12, d11); + d3 = vqadd_s16(d13, d10); + d4 = vqsub_s16(d13, d10); + d5 = vqsub_s16(d12, d11); + + v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); + v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); + v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]), + vreinterpret_s16_s32(v2tmp1.val[0])); + v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]), + vreinterpret_s16_s32(v2tmp1.val[1])); + + // 2nd for loop + q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]); + q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]); + + q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2); + q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1); + + d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 + d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 + + q3s16 = vshrq_n_s16(q3s16, 1); + q4s16 = vshrq_n_s16(q4s16, 1); + + q3s16 = vqaddq_s16(q3s16, q2s16); + q4s16 = vqaddq_s16(q4s16, q2s16); + + d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 + d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 + + d2 = vqadd_s16(d12, d11); + d3 = vqadd_s16(d13, d10); + d4 = vqsub_s16(d13, d10); + d5 = vqsub_s16(d12, d11); + + d2 = vrshr_n_s16(d2, 3); + d3 = vrshr_n_s16(d3, 3); + d4 = vrshr_n_s16(d4, 3); + d5 = vrshr_n_s16(d5, 3); + + v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); + v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); + v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]), + vreinterpret_s16_s32(v2tmp1.val[0])); + v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]), + vreinterpret_s16_s32(v2tmp1.val[1])); + + q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]); + q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]); + + // dc_only_idct_add + for (i = 0; i < 2; i++, q1s16 = q2s16) { + d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0); + pred_ptr += pred_stride; + d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1); + pred_ptr += pred_stride; + + q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16), + vreinterpret_u8_u32(d6u32)); + d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); + + vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0); + dst_ptr += dst_stride; + vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1); + dst_ptr += dst_stride; + } + return; +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c new file mode 100644 index 00000000000..49d8d221fc7 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c @@ -0,0 +1,1377 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include "vpx_ports/mem.h" + +static const int8_t vp8_sub_pel_filters[8][8] = { + {0, 0, 128, 0, 0, 0, 0, 0}, /* note that 1/8 pel positionyys are */ + {0, -6, 123, 12, -1, 0, 0, 0}, /* just as per alpha -0.5 bicubic */ + {2, -11, 108, 36, -8, 1, 0, 0}, /* New 1/4 pel 6 tap filter */ + {0, -9, 93, 50, -6, 0, 0, 0}, + {3, -16, 77, 77, -16, 3, 0, 0}, /* New 1/2 pel 6 tap filter */ + {0, -6, 50, 93, -9, 0, 0, 0}, + {1, -8, 36, 108, -11, 2, 0, 0}, /* New 1/4 pel 6 tap filter */ + {0, -1, 12, 123, -6, 0, 0, 0}, +}; + +void vp8_sixtap_predict8x4_neon( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch) { + unsigned char *src; + uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; + uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8; + uint8x8_t d27u8, d28u8, d29u8, d30u8, d31u8; + int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; + uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; + uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; + int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; + uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8; + + if (xoffset == 0) { // secondpass_filter8x4_only + // load second_pass filter + dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); + d0s8 = vdup_lane_s8(dtmps8, 0); + d1s8 = vdup_lane_s8(dtmps8, 1); + d2s8 = vdup_lane_s8(dtmps8, 2); + d3s8 = vdup_lane_s8(dtmps8, 3); + d4s8 = vdup_lane_s8(dtmps8, 4); + d5s8 = vdup_lane_s8(dtmps8, 5); + d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); + d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); + d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); + d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); + d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); + d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); + + // load src data + src = src_ptr - src_pixels_per_line * 2; + d22u8 = vld1_u8(src); + src += src_pixels_per_line; + d23u8 = vld1_u8(src); + src += src_pixels_per_line; + d24u8 = vld1_u8(src); + src += src_pixels_per_line; + d25u8 = vld1_u8(src); + src += src_pixels_per_line; + d26u8 = vld1_u8(src); + src += src_pixels_per_line; + d27u8 = vld1_u8(src); + src += src_pixels_per_line; + d28u8 = vld1_u8(src); + src += src_pixels_per_line; + d29u8 = vld1_u8(src); + src += src_pixels_per_line; + d30u8 = vld1_u8(src); + + q3u16 = vmull_u8(d22u8, d0u8); + q4u16 = vmull_u8(d23u8, d0u8); + q5u16 = vmull_u8(d24u8, d0u8); + q6u16 = vmull_u8(d25u8, d0u8); + + q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); + q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); + q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); + q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); + + q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); + q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); + q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); + q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); + + q3u16 = vmlal_u8(q3u16, d24u8, d2u8); + q4u16 = vmlal_u8(q4u16, d25u8, d2u8); + q5u16 = vmlal_u8(q5u16, d26u8, d2u8); + q6u16 = vmlal_u8(q6u16, d27u8, d2u8); + + q3u16 = vmlal_u8(q3u16, d27u8, d5u8); + q4u16 = vmlal_u8(q4u16, d28u8, d5u8); + q5u16 = vmlal_u8(q5u16, d29u8, d5u8); + q6u16 = vmlal_u8(q6u16, d30u8, d5u8); + + q7u16 = vmull_u8(d25u8, d3u8); + q8u16 = vmull_u8(d26u8, d3u8); + q9u16 = vmull_u8(d27u8, d3u8); + q10u16 = vmull_u8(d28u8, d3u8); + + q3s16 = vreinterpretq_s16_u16(q3u16); + q4s16 = vreinterpretq_s16_u16(q4u16); + q5s16 = vreinterpretq_s16_u16(q5u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + + q7s16 = vqaddq_s16(q7s16, q3s16); + q8s16 = vqaddq_s16(q8s16, q4s16); + q9s16 = vqaddq_s16(q9s16, q5s16); + q10s16 = vqaddq_s16(q10s16, q6s16); + + d6u8 = vqrshrun_n_s16(q7s16, 7); + d7u8 = vqrshrun_n_s16(q8s16, 7); + d8u8 = vqrshrun_n_s16(q9s16, 7); + d9u8 = vqrshrun_n_s16(q10s16, 7); + + vst1_u8(dst_ptr, d6u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d7u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d8u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d9u8); + return; + } + + // load first_pass filter + dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); + d0s8 = vdup_lane_s8(dtmps8, 0); + d1s8 = vdup_lane_s8(dtmps8, 1); + d2s8 = vdup_lane_s8(dtmps8, 2); + d3s8 = vdup_lane_s8(dtmps8, 3); + d4s8 = vdup_lane_s8(dtmps8, 4); + d5s8 = vdup_lane_s8(dtmps8, 5); + d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); + d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); + d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); + d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); + d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); + d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); + + // First pass: output_height lines x output_width columns (9x4) + if (yoffset == 0) // firstpass_filter4x4_only + src = src_ptr - 2; + else + src = src_ptr - 2 - (src_pixels_per_line * 2); + q3u8 = vld1q_u8(src); + src += src_pixels_per_line; + q4u8 = vld1q_u8(src); + src += src_pixels_per_line; + q5u8 = vld1q_u8(src); + src += src_pixels_per_line; + q6u8 = vld1q_u8(src); + + q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); + q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); + q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); + q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); + + d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); + d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); + d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); + d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); + + q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); + q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); + q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); + q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); + + d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); + d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); + d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); + d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); + + q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); + q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); + q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); + q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); + + d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); + d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); + d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); + d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); + + q7u16 = vmlal_u8(q7u16, d28u8, d2u8); + q8u16 = vmlal_u8(q8u16, d29u8, d2u8); + q9u16 = vmlal_u8(q9u16, d30u8, d2u8); + q10u16 = vmlal_u8(q10u16, d31u8, d2u8); + + d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); + d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); + d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); + d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); + + q7u16 = vmlal_u8(q7u16, d28u8, d5u8); + q8u16 = vmlal_u8(q8u16, d29u8, d5u8); + q9u16 = vmlal_u8(q9u16, d30u8, d5u8); + q10u16 = vmlal_u8(q10u16, d31u8, d5u8); + + d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); + d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); + d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); + d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); + + q3u16 = vmull_u8(d28u8, d3u8); + q4u16 = vmull_u8(d29u8, d3u8); + q5u16 = vmull_u8(d30u8, d3u8); + q6u16 = vmull_u8(d31u8, d3u8); + + q3s16 = vreinterpretq_s16_u16(q3u16); + q4s16 = vreinterpretq_s16_u16(q4u16); + q5s16 = vreinterpretq_s16_u16(q5u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + + q7s16 = vqaddq_s16(q7s16, q3s16); + q8s16 = vqaddq_s16(q8s16, q4s16); + q9s16 = vqaddq_s16(q9s16, q5s16); + q10s16 = vqaddq_s16(q10s16, q6s16); + + d22u8 = vqrshrun_n_s16(q7s16, 7); + d23u8 = vqrshrun_n_s16(q8s16, 7); + d24u8 = vqrshrun_n_s16(q9s16, 7); + d25u8 = vqrshrun_n_s16(q10s16, 7); + + if (yoffset == 0) { // firstpass_filter8x4_only + vst1_u8(dst_ptr, d22u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d23u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d24u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d25u8); + return; + } + + // First Pass on rest 5-line data + src += src_pixels_per_line; + q3u8 = vld1q_u8(src); + src += src_pixels_per_line; + q4u8 = vld1q_u8(src); + src += src_pixels_per_line; + q5u8 = vld1q_u8(src); + src += src_pixels_per_line; + q6u8 = vld1q_u8(src); + src += src_pixels_per_line; + q7u8 = vld1q_u8(src); + + q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); + q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); + q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); + q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); + q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); + + d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); + d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); + d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); + d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); + d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); + + q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); + q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); + q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); + q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); + q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); + + d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); + d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); + d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); + d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); + d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); + + q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); + q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); + q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); + q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); + q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); + + d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); + d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); + d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); + d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); + d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); + + q8u16 = vmlal_u8(q8u16, d27u8, d2u8); + q9u16 = vmlal_u8(q9u16, d28u8, d2u8); + q10u16 = vmlal_u8(q10u16, d29u8, d2u8); + q11u16 = vmlal_u8(q11u16, d30u8, d2u8); + q12u16 = vmlal_u8(q12u16, d31u8, d2u8); + + d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); + d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); + d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); + d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); + d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); + + q8u16 = vmlal_u8(q8u16, d27u8, d5u8); + q9u16 = vmlal_u8(q9u16, d28u8, d5u8); + q10u16 = vmlal_u8(q10u16, d29u8, d5u8); + q11u16 = vmlal_u8(q11u16, d30u8, d5u8); + q12u16 = vmlal_u8(q12u16, d31u8, d5u8); + + d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); + d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); + d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); + d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); + d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); + + q3u16 = vmull_u8(d27u8, d3u8); + q4u16 = vmull_u8(d28u8, d3u8); + q5u16 = vmull_u8(d29u8, d3u8); + q6u16 = vmull_u8(d30u8, d3u8); + q7u16 = vmull_u8(d31u8, d3u8); + + q3s16 = vreinterpretq_s16_u16(q3u16); + q4s16 = vreinterpretq_s16_u16(q4u16); + q5s16 = vreinterpretq_s16_u16(q5u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + q11s16 = vreinterpretq_s16_u16(q11u16); + q12s16 = vreinterpretq_s16_u16(q12u16); + + q8s16 = vqaddq_s16(q8s16, q3s16); + q9s16 = vqaddq_s16(q9s16, q4s16); + q10s16 = vqaddq_s16(q10s16, q5s16); + q11s16 = vqaddq_s16(q11s16, q6s16); + q12s16 = vqaddq_s16(q12s16, q7s16); + + d26u8 = vqrshrun_n_s16(q8s16, 7); + d27u8 = vqrshrun_n_s16(q9s16, 7); + d28u8 = vqrshrun_n_s16(q10s16, 7); + d29u8 = vqrshrun_n_s16(q11s16, 7); + d30u8 = vqrshrun_n_s16(q12s16, 7); + + // Second pass: 8x4 + dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); + d0s8 = vdup_lane_s8(dtmps8, 0); + d1s8 = vdup_lane_s8(dtmps8, 1); + d2s8 = vdup_lane_s8(dtmps8, 2); + d3s8 = vdup_lane_s8(dtmps8, 3); + d4s8 = vdup_lane_s8(dtmps8, 4); + d5s8 = vdup_lane_s8(dtmps8, 5); + d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); + d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); + d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); + d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); + d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); + d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); + + q3u16 = vmull_u8(d22u8, d0u8); + q4u16 = vmull_u8(d23u8, d0u8); + q5u16 = vmull_u8(d24u8, d0u8); + q6u16 = vmull_u8(d25u8, d0u8); + + q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); + q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); + q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); + q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); + + q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); + q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); + q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); + q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); + + q3u16 = vmlal_u8(q3u16, d24u8, d2u8); + q4u16 = vmlal_u8(q4u16, d25u8, d2u8); + q5u16 = vmlal_u8(q5u16, d26u8, d2u8); + q6u16 = vmlal_u8(q6u16, d27u8, d2u8); + + q3u16 = vmlal_u8(q3u16, d27u8, d5u8); + q4u16 = vmlal_u8(q4u16, d28u8, d5u8); + q5u16 = vmlal_u8(q5u16, d29u8, d5u8); + q6u16 = vmlal_u8(q6u16, d30u8, d5u8); + + q7u16 = vmull_u8(d25u8, d3u8); + q8u16 = vmull_u8(d26u8, d3u8); + q9u16 = vmull_u8(d27u8, d3u8); + q10u16 = vmull_u8(d28u8, d3u8); + + q3s16 = vreinterpretq_s16_u16(q3u16); + q4s16 = vreinterpretq_s16_u16(q4u16); + q5s16 = vreinterpretq_s16_u16(q5u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + + q7s16 = vqaddq_s16(q7s16, q3s16); + q8s16 = vqaddq_s16(q8s16, q4s16); + q9s16 = vqaddq_s16(q9s16, q5s16); + q10s16 = vqaddq_s16(q10s16, q6s16); + + d6u8 = vqrshrun_n_s16(q7s16, 7); + d7u8 = vqrshrun_n_s16(q8s16, 7); + d8u8 = vqrshrun_n_s16(q9s16, 7); + d9u8 = vqrshrun_n_s16(q10s16, 7); + + vst1_u8(dst_ptr, d6u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d7u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d8u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d9u8); + return; +} + +void vp8_sixtap_predict8x8_neon( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch) { + unsigned char *src, *tmpp; + unsigned char tmp[64]; + int i; + uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; + uint8x8_t d18u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8, d25u8; + uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8, d31u8; + int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; + uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; + uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; + int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; + uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q9u8, q10u8, q11u8, q12u8; + + if (xoffset == 0) { // secondpass_filter8x8_only + // load second_pass filter + dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); + d0s8 = vdup_lane_s8(dtmps8, 0); + d1s8 = vdup_lane_s8(dtmps8, 1); + d2s8 = vdup_lane_s8(dtmps8, 2); + d3s8 = vdup_lane_s8(dtmps8, 3); + d4s8 = vdup_lane_s8(dtmps8, 4); + d5s8 = vdup_lane_s8(dtmps8, 5); + d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); + d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); + d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); + d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); + d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); + d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); + + // load src data + src = src_ptr - src_pixels_per_line * 2; + d18u8 = vld1_u8(src); + src += src_pixels_per_line; + d19u8 = vld1_u8(src); + src += src_pixels_per_line; + d20u8 = vld1_u8(src); + src += src_pixels_per_line; + d21u8 = vld1_u8(src); + src += src_pixels_per_line; + d22u8 = vld1_u8(src); + src += src_pixels_per_line; + d23u8 = vld1_u8(src); + src += src_pixels_per_line; + d24u8 = vld1_u8(src); + src += src_pixels_per_line; + d25u8 = vld1_u8(src); + src += src_pixels_per_line; + d26u8 = vld1_u8(src); + src += src_pixels_per_line; + d27u8 = vld1_u8(src); + src += src_pixels_per_line; + d28u8 = vld1_u8(src); + src += src_pixels_per_line; + d29u8 = vld1_u8(src); + src += src_pixels_per_line; + d30u8 = vld1_u8(src); + + for (i = 2; i > 0; i--) { + q3u16 = vmull_u8(d18u8, d0u8); + q4u16 = vmull_u8(d19u8, d0u8); + q5u16 = vmull_u8(d20u8, d0u8); + q6u16 = vmull_u8(d21u8, d0u8); + + q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); + q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); + q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); + q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); + + q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); + q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); + q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); + q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); + + q3u16 = vmlal_u8(q3u16, d20u8, d2u8); + q4u16 = vmlal_u8(q4u16, d21u8, d2u8); + q5u16 = vmlal_u8(q5u16, d22u8, d2u8); + q6u16 = vmlal_u8(q6u16, d23u8, d2u8); + + q3u16 = vmlal_u8(q3u16, d23u8, d5u8); + q4u16 = vmlal_u8(q4u16, d24u8, d5u8); + q5u16 = vmlal_u8(q5u16, d25u8, d5u8); + q6u16 = vmlal_u8(q6u16, d26u8, d5u8); + + q7u16 = vmull_u8(d21u8, d3u8); + q8u16 = vmull_u8(d22u8, d3u8); + q9u16 = vmull_u8(d23u8, d3u8); + q10u16 = vmull_u8(d24u8, d3u8); + + q3s16 = vreinterpretq_s16_u16(q3u16); + q4s16 = vreinterpretq_s16_u16(q4u16); + q5s16 = vreinterpretq_s16_u16(q5u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + + q7s16 = vqaddq_s16(q7s16, q3s16); + q8s16 = vqaddq_s16(q8s16, q4s16); + q9s16 = vqaddq_s16(q9s16, q5s16); + q10s16 = vqaddq_s16(q10s16, q6s16); + + d6u8 = vqrshrun_n_s16(q7s16, 7); + d7u8 = vqrshrun_n_s16(q8s16, 7); + d8u8 = vqrshrun_n_s16(q9s16, 7); + d9u8 = vqrshrun_n_s16(q10s16, 7); + + d18u8 = d22u8; + d19u8 = d23u8; + d20u8 = d24u8; + d21u8 = d25u8; + d22u8 = d26u8; + d23u8 = d27u8; + d24u8 = d28u8; + d25u8 = d29u8; + d26u8 = d30u8; + + vst1_u8(dst_ptr, d6u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d7u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d8u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d9u8); + dst_ptr += dst_pitch; + } + return; + } + + // load first_pass filter + dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); + d0s8 = vdup_lane_s8(dtmps8, 0); + d1s8 = vdup_lane_s8(dtmps8, 1); + d2s8 = vdup_lane_s8(dtmps8, 2); + d3s8 = vdup_lane_s8(dtmps8, 3); + d4s8 = vdup_lane_s8(dtmps8, 4); + d5s8 = vdup_lane_s8(dtmps8, 5); + d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); + d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); + d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); + d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); + d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); + d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); + + // First pass: output_height lines x output_width columns (9x4) + if (yoffset == 0) // firstpass_filter4x4_only + src = src_ptr - 2; + else + src = src_ptr - 2 - (src_pixels_per_line * 2); + + tmpp = tmp; + for (i = 2; i > 0; i--) { + q3u8 = vld1q_u8(src); + src += src_pixels_per_line; + q4u8 = vld1q_u8(src); + src += src_pixels_per_line; + q5u8 = vld1q_u8(src); + src += src_pixels_per_line; + q6u8 = vld1q_u8(src); + src += src_pixels_per_line; + + __builtin_prefetch(src); + __builtin_prefetch(src + src_pixels_per_line); + __builtin_prefetch(src + src_pixels_per_line * 2); + + q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); + q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); + q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); + q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); + + d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); + d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); + d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); + d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); + + q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); + q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); + q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); + q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); + + d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); + d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); + d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); + d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); + + q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); + q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); + q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); + q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); + + d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); + d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); + d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); + d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); + + q7u16 = vmlal_u8(q7u16, d28u8, d2u8); + q8u16 = vmlal_u8(q8u16, d29u8, d2u8); + q9u16 = vmlal_u8(q9u16, d30u8, d2u8); + q10u16 = vmlal_u8(q10u16, d31u8, d2u8); + + d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); + d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); + d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); + d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); + + q7u16 = vmlal_u8(q7u16, d28u8, d5u8); + q8u16 = vmlal_u8(q8u16, d29u8, d5u8); + q9u16 = vmlal_u8(q9u16, d30u8, d5u8); + q10u16 = vmlal_u8(q10u16, d31u8, d5u8); + + d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); + d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); + d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); + d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); + + q3u16 = vmull_u8(d28u8, d3u8); + q4u16 = vmull_u8(d29u8, d3u8); + q5u16 = vmull_u8(d30u8, d3u8); + q6u16 = vmull_u8(d31u8, d3u8); + + q3s16 = vreinterpretq_s16_u16(q3u16); + q4s16 = vreinterpretq_s16_u16(q4u16); + q5s16 = vreinterpretq_s16_u16(q5u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + + q7s16 = vqaddq_s16(q7s16, q3s16); + q8s16 = vqaddq_s16(q8s16, q4s16); + q9s16 = vqaddq_s16(q9s16, q5s16); + q10s16 = vqaddq_s16(q10s16, q6s16); + + d22u8 = vqrshrun_n_s16(q7s16, 7); + d23u8 = vqrshrun_n_s16(q8s16, 7); + d24u8 = vqrshrun_n_s16(q9s16, 7); + d25u8 = vqrshrun_n_s16(q10s16, 7); + + if (yoffset == 0) { // firstpass_filter8x4_only + vst1_u8(dst_ptr, d22u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d23u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d24u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d25u8); + dst_ptr += dst_pitch; + } else { + vst1_u8(tmpp, d22u8); + tmpp += 8; + vst1_u8(tmpp, d23u8); + tmpp += 8; + vst1_u8(tmpp, d24u8); + tmpp += 8; + vst1_u8(tmpp, d25u8); + tmpp += 8; + } + } + if (yoffset == 0) + return; + + // First Pass on rest 5-line data + q3u8 = vld1q_u8(src); + src += src_pixels_per_line; + q4u8 = vld1q_u8(src); + src += src_pixels_per_line; + q5u8 = vld1q_u8(src); + src += src_pixels_per_line; + q6u8 = vld1q_u8(src); + src += src_pixels_per_line; + q7u8 = vld1q_u8(src); + + q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); + q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); + q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); + q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); + q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); + + d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); + d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); + d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); + d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); + d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); + + q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); + q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); + q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); + q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); + q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); + + d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); + d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); + d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); + d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); + d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); + + q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); + q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); + q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); + q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); + q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); + + d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); + d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); + d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); + d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); + d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); + + q8u16 = vmlal_u8(q8u16, d27u8, d2u8); + q9u16 = vmlal_u8(q9u16, d28u8, d2u8); + q10u16 = vmlal_u8(q10u16, d29u8, d2u8); + q11u16 = vmlal_u8(q11u16, d30u8, d2u8); + q12u16 = vmlal_u8(q12u16, d31u8, d2u8); + + d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); + d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); + d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); + d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); + d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); + + q8u16 = vmlal_u8(q8u16, d27u8, d5u8); + q9u16 = vmlal_u8(q9u16, d28u8, d5u8); + q10u16 = vmlal_u8(q10u16, d29u8, d5u8); + q11u16 = vmlal_u8(q11u16, d30u8, d5u8); + q12u16 = vmlal_u8(q12u16, d31u8, d5u8); + + d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); + d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); + d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); + d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); + d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); + + q3u16 = vmull_u8(d27u8, d3u8); + q4u16 = vmull_u8(d28u8, d3u8); + q5u16 = vmull_u8(d29u8, d3u8); + q6u16 = vmull_u8(d30u8, d3u8); + q7u16 = vmull_u8(d31u8, d3u8); + + q3s16 = vreinterpretq_s16_u16(q3u16); + q4s16 = vreinterpretq_s16_u16(q4u16); + q5s16 = vreinterpretq_s16_u16(q5u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + q11s16 = vreinterpretq_s16_u16(q11u16); + q12s16 = vreinterpretq_s16_u16(q12u16); + + q8s16 = vqaddq_s16(q8s16, q3s16); + q9s16 = vqaddq_s16(q9s16, q4s16); + q10s16 = vqaddq_s16(q10s16, q5s16); + q11s16 = vqaddq_s16(q11s16, q6s16); + q12s16 = vqaddq_s16(q12s16, q7s16); + + d26u8 = vqrshrun_n_s16(q8s16, 7); + d27u8 = vqrshrun_n_s16(q9s16, 7); + d28u8 = vqrshrun_n_s16(q10s16, 7); + d29u8 = vqrshrun_n_s16(q11s16, 7); + d30u8 = vqrshrun_n_s16(q12s16, 7); + + // Second pass: 8x8 + dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); + d0s8 = vdup_lane_s8(dtmps8, 0); + d1s8 = vdup_lane_s8(dtmps8, 1); + d2s8 = vdup_lane_s8(dtmps8, 2); + d3s8 = vdup_lane_s8(dtmps8, 3); + d4s8 = vdup_lane_s8(dtmps8, 4); + d5s8 = vdup_lane_s8(dtmps8, 5); + d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); + d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); + d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); + d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); + d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); + d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); + + tmpp = tmp; + q9u8 = vld1q_u8(tmpp); + tmpp += 16; + q10u8 = vld1q_u8(tmpp); + tmpp += 16; + q11u8 = vld1q_u8(tmpp); + tmpp += 16; + q12u8 = vld1q_u8(tmpp); + + d18u8 = vget_low_u8(q9u8); + d19u8 = vget_high_u8(q9u8); + d20u8 = vget_low_u8(q10u8); + d21u8 = vget_high_u8(q10u8); + d22u8 = vget_low_u8(q11u8); + d23u8 = vget_high_u8(q11u8); + d24u8 = vget_low_u8(q12u8); + d25u8 = vget_high_u8(q12u8); + + for (i = 2; i > 0; i--) { + q3u16 = vmull_u8(d18u8, d0u8); + q4u16 = vmull_u8(d19u8, d0u8); + q5u16 = vmull_u8(d20u8, d0u8); + q6u16 = vmull_u8(d21u8, d0u8); + + q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); + q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); + q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); + q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); + + q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); + q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); + q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); + q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); + + q3u16 = vmlal_u8(q3u16, d20u8, d2u8); + q4u16 = vmlal_u8(q4u16, d21u8, d2u8); + q5u16 = vmlal_u8(q5u16, d22u8, d2u8); + q6u16 = vmlal_u8(q6u16, d23u8, d2u8); + + q3u16 = vmlal_u8(q3u16, d23u8, d5u8); + q4u16 = vmlal_u8(q4u16, d24u8, d5u8); + q5u16 = vmlal_u8(q5u16, d25u8, d5u8); + q6u16 = vmlal_u8(q6u16, d26u8, d5u8); + + q7u16 = vmull_u8(d21u8, d3u8); + q8u16 = vmull_u8(d22u8, d3u8); + q9u16 = vmull_u8(d23u8, d3u8); + q10u16 = vmull_u8(d24u8, d3u8); + + q3s16 = vreinterpretq_s16_u16(q3u16); + q4s16 = vreinterpretq_s16_u16(q4u16); + q5s16 = vreinterpretq_s16_u16(q5u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + + q7s16 = vqaddq_s16(q7s16, q3s16); + q8s16 = vqaddq_s16(q8s16, q4s16); + q9s16 = vqaddq_s16(q9s16, q5s16); + q10s16 = vqaddq_s16(q10s16, q6s16); + + d6u8 = vqrshrun_n_s16(q7s16, 7); + d7u8 = vqrshrun_n_s16(q8s16, 7); + d8u8 = vqrshrun_n_s16(q9s16, 7); + d9u8 = vqrshrun_n_s16(q10s16, 7); + + d18u8 = d22u8; + d19u8 = d23u8; + d20u8 = d24u8; + d21u8 = d25u8; + d22u8 = d26u8; + d23u8 = d27u8; + d24u8 = d28u8; + d25u8 = d29u8; + d26u8 = d30u8; + + vst1_u8(dst_ptr, d6u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d7u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d8u8); + dst_ptr += dst_pitch; + vst1_u8(dst_ptr, d9u8); + dst_ptr += dst_pitch; + } + return; +} + +void vp8_sixtap_predict16x16_neon( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch) { + unsigned char *src, *src_tmp, *dst, *tmpp; + unsigned char tmp[336]; + int i, j; + uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; + uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d18u8, d19u8; + uint8x8_t d20u8, d21u8, d22u8, d23u8, d24u8, d25u8, d26u8, d27u8; + uint8x8_t d28u8, d29u8, d30u8, d31u8; + int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; + uint8x16_t q3u8, q4u8; + uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16, q8u16, q9u16, q10u16; + uint16x8_t q11u16, q12u16, q13u16, q15u16; + int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16, q8s16, q9s16, q10s16; + int16x8_t q11s16, q12s16, q13s16, q15s16; + + if (xoffset == 0) { // secondpass_filter8x8_only + // load second_pass filter + dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); + d0s8 = vdup_lane_s8(dtmps8, 0); + d1s8 = vdup_lane_s8(dtmps8, 1); + d2s8 = vdup_lane_s8(dtmps8, 2); + d3s8 = vdup_lane_s8(dtmps8, 3); + d4s8 = vdup_lane_s8(dtmps8, 4); + d5s8 = vdup_lane_s8(dtmps8, 5); + d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); + d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); + d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); + d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); + d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); + d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); + + // load src data + src_tmp = src_ptr - src_pixels_per_line * 2; + for (i = 0; i < 2; i++) { + src = src_tmp + i * 8; + dst = dst_ptr + i * 8; + d18u8 = vld1_u8(src); + src += src_pixels_per_line; + d19u8 = vld1_u8(src); + src += src_pixels_per_line; + d20u8 = vld1_u8(src); + src += src_pixels_per_line; + d21u8 = vld1_u8(src); + src += src_pixels_per_line; + d22u8 = vld1_u8(src); + src += src_pixels_per_line; + for (j = 0; j < 4; j++) { + d23u8 = vld1_u8(src); + src += src_pixels_per_line; + d24u8 = vld1_u8(src); + src += src_pixels_per_line; + d25u8 = vld1_u8(src); + src += src_pixels_per_line; + d26u8 = vld1_u8(src); + src += src_pixels_per_line; + + q3u16 = vmull_u8(d18u8, d0u8); + q4u16 = vmull_u8(d19u8, d0u8); + q5u16 = vmull_u8(d20u8, d0u8); + q6u16 = vmull_u8(d21u8, d0u8); + + q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); + q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); + q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); + q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); + + q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); + q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); + q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); + q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); + + q3u16 = vmlal_u8(q3u16, d20u8, d2u8); + q4u16 = vmlal_u8(q4u16, d21u8, d2u8); + q5u16 = vmlal_u8(q5u16, d22u8, d2u8); + q6u16 = vmlal_u8(q6u16, d23u8, d2u8); + + q3u16 = vmlal_u8(q3u16, d23u8, d5u8); + q4u16 = vmlal_u8(q4u16, d24u8, d5u8); + q5u16 = vmlal_u8(q5u16, d25u8, d5u8); + q6u16 = vmlal_u8(q6u16, d26u8, d5u8); + + q7u16 = vmull_u8(d21u8, d3u8); + q8u16 = vmull_u8(d22u8, d3u8); + q9u16 = vmull_u8(d23u8, d3u8); + q10u16 = vmull_u8(d24u8, d3u8); + + q3s16 = vreinterpretq_s16_u16(q3u16); + q4s16 = vreinterpretq_s16_u16(q4u16); + q5s16 = vreinterpretq_s16_u16(q5u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + + q7s16 = vqaddq_s16(q7s16, q3s16); + q8s16 = vqaddq_s16(q8s16, q4s16); + q9s16 = vqaddq_s16(q9s16, q5s16); + q10s16 = vqaddq_s16(q10s16, q6s16); + + d6u8 = vqrshrun_n_s16(q7s16, 7); + d7u8 = vqrshrun_n_s16(q8s16, 7); + d8u8 = vqrshrun_n_s16(q9s16, 7); + d9u8 = vqrshrun_n_s16(q10s16, 7); + + d18u8 = d22u8; + d19u8 = d23u8; + d20u8 = d24u8; + d21u8 = d25u8; + d22u8 = d26u8; + + vst1_u8(dst, d6u8); + dst += dst_pitch; + vst1_u8(dst, d7u8); + dst += dst_pitch; + vst1_u8(dst, d8u8); + dst += dst_pitch; + vst1_u8(dst, d9u8); + dst += dst_pitch; + } + } + return; + } + + // load first_pass filter + dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); + d0s8 = vdup_lane_s8(dtmps8, 0); + d1s8 = vdup_lane_s8(dtmps8, 1); + d2s8 = vdup_lane_s8(dtmps8, 2); + d3s8 = vdup_lane_s8(dtmps8, 3); + d4s8 = vdup_lane_s8(dtmps8, 4); + d5s8 = vdup_lane_s8(dtmps8, 5); + d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); + d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); + d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); + d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); + d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); + d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); + + // First pass: output_height lines x output_width columns (9x4) + if (yoffset == 0) { // firstpass_filter4x4_only + src = src_ptr - 2; + dst = dst_ptr; + for (i = 0; i < 8; i++) { + d6u8 = vld1_u8(src); + d7u8 = vld1_u8(src + 8); + d8u8 = vld1_u8(src + 16); + src += src_pixels_per_line; + d9u8 = vld1_u8(src); + d10u8 = vld1_u8(src + 8); + d11u8 = vld1_u8(src + 16); + src += src_pixels_per_line; + + __builtin_prefetch(src); + __builtin_prefetch(src + src_pixels_per_line); + + q6u16 = vmull_u8(d6u8, d0u8); + q7u16 = vmull_u8(d7u8, d0u8); + q8u16 = vmull_u8(d9u8, d0u8); + q9u16 = vmull_u8(d10u8, d0u8); + + d20u8 = vext_u8(d6u8, d7u8, 1); + d21u8 = vext_u8(d9u8, d10u8, 1); + d22u8 = vext_u8(d7u8, d8u8, 1); + d23u8 = vext_u8(d10u8, d11u8, 1); + d24u8 = vext_u8(d6u8, d7u8, 4); + d25u8 = vext_u8(d9u8, d10u8, 4); + d26u8 = vext_u8(d7u8, d8u8, 4); + d27u8 = vext_u8(d10u8, d11u8, 4); + d28u8 = vext_u8(d6u8, d7u8, 5); + d29u8 = vext_u8(d9u8, d10u8, 5); + + q6u16 = vmlsl_u8(q6u16, d20u8, d1u8); + q8u16 = vmlsl_u8(q8u16, d21u8, d1u8); + q7u16 = vmlsl_u8(q7u16, d22u8, d1u8); + q9u16 = vmlsl_u8(q9u16, d23u8, d1u8); + q6u16 = vmlsl_u8(q6u16, d24u8, d4u8); + q8u16 = vmlsl_u8(q8u16, d25u8, d4u8); + q7u16 = vmlsl_u8(q7u16, d26u8, d4u8); + q9u16 = vmlsl_u8(q9u16, d27u8, d4u8); + q6u16 = vmlal_u8(q6u16, d28u8, d5u8); + q8u16 = vmlal_u8(q8u16, d29u8, d5u8); + + d20u8 = vext_u8(d7u8, d8u8, 5); + d21u8 = vext_u8(d10u8, d11u8, 5); + d22u8 = vext_u8(d6u8, d7u8, 2); + d23u8 = vext_u8(d9u8, d10u8, 2); + d24u8 = vext_u8(d7u8, d8u8, 2); + d25u8 = vext_u8(d10u8, d11u8, 2); + d26u8 = vext_u8(d6u8, d7u8, 3); + d27u8 = vext_u8(d9u8, d10u8, 3); + d28u8 = vext_u8(d7u8, d8u8, 3); + d29u8 = vext_u8(d10u8, d11u8, 3); + + q7u16 = vmlal_u8(q7u16, d20u8, d5u8); + q9u16 = vmlal_u8(q9u16, d21u8, d5u8); + q6u16 = vmlal_u8(q6u16, d22u8, d2u8); + q8u16 = vmlal_u8(q8u16, d23u8, d2u8); + q7u16 = vmlal_u8(q7u16, d24u8, d2u8); + q9u16 = vmlal_u8(q9u16, d25u8, d2u8); + + q10u16 = vmull_u8(d26u8, d3u8); + q11u16 = vmull_u8(d27u8, d3u8); + q12u16 = vmull_u8(d28u8, d3u8); + q15u16 = vmull_u8(d29u8, d3u8); + + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + q11s16 = vreinterpretq_s16_u16(q11u16); + q12s16 = vreinterpretq_s16_u16(q12u16); + q15s16 = vreinterpretq_s16_u16(q15u16); + + q6s16 = vqaddq_s16(q6s16, q10s16); + q8s16 = vqaddq_s16(q8s16, q11s16); + q7s16 = vqaddq_s16(q7s16, q12s16); + q9s16 = vqaddq_s16(q9s16, q15s16); + + d6u8 = vqrshrun_n_s16(q6s16, 7); + d7u8 = vqrshrun_n_s16(q7s16, 7); + d8u8 = vqrshrun_n_s16(q8s16, 7); + d9u8 = vqrshrun_n_s16(q9s16, 7); + + q3u8 = vcombine_u8(d6u8, d7u8); + q4u8 = vcombine_u8(d8u8, d9u8); + vst1q_u8(dst, q3u8); + dst += dst_pitch; + vst1q_u8(dst, q4u8); + dst += dst_pitch; + } + return; + } + + src = src_ptr - 2 - src_pixels_per_line * 2; + tmpp = tmp; + for (i = 0; i < 7; i++) { + d6u8 = vld1_u8(src); + d7u8 = vld1_u8(src + 8); + d8u8 = vld1_u8(src + 16); + src += src_pixels_per_line; + d9u8 = vld1_u8(src); + d10u8 = vld1_u8(src + 8); + d11u8 = vld1_u8(src + 16); + src += src_pixels_per_line; + d12u8 = vld1_u8(src); + d13u8 = vld1_u8(src + 8); + d14u8 = vld1_u8(src + 16); + src += src_pixels_per_line; + + __builtin_prefetch(src); + __builtin_prefetch(src + src_pixels_per_line); + __builtin_prefetch(src + src_pixels_per_line * 2); + + q8u16 = vmull_u8(d6u8, d0u8); + q9u16 = vmull_u8(d7u8, d0u8); + q10u16 = vmull_u8(d9u8, d0u8); + q11u16 = vmull_u8(d10u8, d0u8); + q12u16 = vmull_u8(d12u8, d0u8); + q13u16 = vmull_u8(d13u8, d0u8); + + d28u8 = vext_u8(d6u8, d7u8, 1); + d29u8 = vext_u8(d9u8, d10u8, 1); + d30u8 = vext_u8(d12u8, d13u8, 1); + q8u16 = vmlsl_u8(q8u16, d28u8, d1u8); + q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); + q12u16 = vmlsl_u8(q12u16, d30u8, d1u8); + d28u8 = vext_u8(d7u8, d8u8, 1); + d29u8 = vext_u8(d10u8, d11u8, 1); + d30u8 = vext_u8(d13u8, d14u8, 1); + q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); + q11u16 = vmlsl_u8(q11u16, d29u8, d1u8); + q13u16 = vmlsl_u8(q13u16, d30u8, d1u8); + + d28u8 = vext_u8(d6u8, d7u8, 4); + d29u8 = vext_u8(d9u8, d10u8, 4); + d30u8 = vext_u8(d12u8, d13u8, 4); + q8u16 = vmlsl_u8(q8u16, d28u8, d4u8); + q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); + q12u16 = vmlsl_u8(q12u16, d30u8, d4u8); + d28u8 = vext_u8(d7u8, d8u8, 4); + d29u8 = vext_u8(d10u8, d11u8, 4); + d30u8 = vext_u8(d13u8, d14u8, 4); + q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); + q11u16 = vmlsl_u8(q11u16, d29u8, d4u8); + q13u16 = vmlsl_u8(q13u16, d30u8, d4u8); + + d28u8 = vext_u8(d6u8, d7u8, 5); + d29u8 = vext_u8(d9u8, d10u8, 5); + d30u8 = vext_u8(d12u8, d13u8, 5); + q8u16 = vmlal_u8(q8u16, d28u8, d5u8); + q10u16 = vmlal_u8(q10u16, d29u8, d5u8); + q12u16 = vmlal_u8(q12u16, d30u8, d5u8); + d28u8 = vext_u8(d7u8, d8u8, 5); + d29u8 = vext_u8(d10u8, d11u8, 5); + d30u8 = vext_u8(d13u8, d14u8, 5); + q9u16 = vmlal_u8(q9u16, d28u8, d5u8); + q11u16 = vmlal_u8(q11u16, d29u8, d5u8); + q13u16 = vmlal_u8(q13u16, d30u8, d5u8); + + d28u8 = vext_u8(d6u8, d7u8, 2); + d29u8 = vext_u8(d9u8, d10u8, 2); + d30u8 = vext_u8(d12u8, d13u8, 2); + q8u16 = vmlal_u8(q8u16, d28u8, d2u8); + q10u16 = vmlal_u8(q10u16, d29u8, d2u8); + q12u16 = vmlal_u8(q12u16, d30u8, d2u8); + d28u8 = vext_u8(d7u8, d8u8, 2); + d29u8 = vext_u8(d10u8, d11u8, 2); + d30u8 = vext_u8(d13u8, d14u8, 2); + q9u16 = vmlal_u8(q9u16, d28u8, d2u8); + q11u16 = vmlal_u8(q11u16, d29u8, d2u8); + q13u16 = vmlal_u8(q13u16, d30u8, d2u8); + + d28u8 = vext_u8(d6u8, d7u8, 3); + d29u8 = vext_u8(d9u8, d10u8, 3); + d30u8 = vext_u8(d12u8, d13u8, 3); + d15u8 = vext_u8(d7u8, d8u8, 3); + d31u8 = vext_u8(d10u8, d11u8, 3); + d6u8 = vext_u8(d13u8, d14u8, 3); + q4u16 = vmull_u8(d28u8, d3u8); + q5u16 = vmull_u8(d29u8, d3u8); + q6u16 = vmull_u8(d30u8, d3u8); + q4s16 = vreinterpretq_s16_u16(q4u16); + q5s16 = vreinterpretq_s16_u16(q5u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + q12s16 = vreinterpretq_s16_u16(q12u16); + q8s16 = vqaddq_s16(q8s16, q4s16); + q10s16 = vqaddq_s16(q10s16, q5s16); + q12s16 = vqaddq_s16(q12s16, q6s16); + + q6u16 = vmull_u8(d15u8, d3u8); + q7u16 = vmull_u8(d31u8, d3u8); + q3u16 = vmull_u8(d6u8, d3u8); + q3s16 = vreinterpretq_s16_u16(q3u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q11s16 = vreinterpretq_s16_u16(q11u16); + q13s16 = vreinterpretq_s16_u16(q13u16); + q9s16 = vqaddq_s16(q9s16, q6s16); + q11s16 = vqaddq_s16(q11s16, q7s16); + q13s16 = vqaddq_s16(q13s16, q3s16); + + d6u8 = vqrshrun_n_s16(q8s16, 7); + d7u8 = vqrshrun_n_s16(q9s16, 7); + d8u8 = vqrshrun_n_s16(q10s16, 7); + d9u8 = vqrshrun_n_s16(q11s16, 7); + d10u8 = vqrshrun_n_s16(q12s16, 7); + d11u8 = vqrshrun_n_s16(q13s16, 7); + + vst1_u8(tmpp, d6u8); + tmpp += 8; + vst1_u8(tmpp, d7u8); + tmpp += 8; + vst1_u8(tmpp, d8u8); + tmpp += 8; + vst1_u8(tmpp, d9u8); + tmpp += 8; + vst1_u8(tmpp, d10u8); + tmpp += 8; + vst1_u8(tmpp, d11u8); + tmpp += 8; + } + + // Second pass: 16x16 + dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); + d0s8 = vdup_lane_s8(dtmps8, 0); + d1s8 = vdup_lane_s8(dtmps8, 1); + d2s8 = vdup_lane_s8(dtmps8, 2); + d3s8 = vdup_lane_s8(dtmps8, 3); + d4s8 = vdup_lane_s8(dtmps8, 4); + d5s8 = vdup_lane_s8(dtmps8, 5); + d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); + d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); + d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); + d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); + d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); + d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); + + for (i = 0; i < 2; i++) { + dst = dst_ptr + 8 * i; + tmpp = tmp + 8 * i; + d18u8 = vld1_u8(tmpp); + tmpp += 16; + d19u8 = vld1_u8(tmpp); + tmpp += 16; + d20u8 = vld1_u8(tmpp); + tmpp += 16; + d21u8 = vld1_u8(tmpp); + tmpp += 16; + d22u8 = vld1_u8(tmpp); + tmpp += 16; + for (j = 0; j < 4; j++) { + d23u8 = vld1_u8(tmpp); + tmpp += 16; + d24u8 = vld1_u8(tmpp); + tmpp += 16; + d25u8 = vld1_u8(tmpp); + tmpp += 16; + d26u8 = vld1_u8(tmpp); + tmpp += 16; + + q3u16 = vmull_u8(d18u8, d0u8); + q4u16 = vmull_u8(d19u8, d0u8); + q5u16 = vmull_u8(d20u8, d0u8); + q6u16 = vmull_u8(d21u8, d0u8); + + q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); + q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); + q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); + q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); + + q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); + q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); + q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); + q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); + + q3u16 = vmlal_u8(q3u16, d20u8, d2u8); + q4u16 = vmlal_u8(q4u16, d21u8, d2u8); + q5u16 = vmlal_u8(q5u16, d22u8, d2u8); + q6u16 = vmlal_u8(q6u16, d23u8, d2u8); + + q3u16 = vmlal_u8(q3u16, d23u8, d5u8); + q4u16 = vmlal_u8(q4u16, d24u8, d5u8); + q5u16 = vmlal_u8(q5u16, d25u8, d5u8); + q6u16 = vmlal_u8(q6u16, d26u8, d5u8); + + q7u16 = vmull_u8(d21u8, d3u8); + q8u16 = vmull_u8(d22u8, d3u8); + q9u16 = vmull_u8(d23u8, d3u8); + q10u16 = vmull_u8(d24u8, d3u8); + + q3s16 = vreinterpretq_s16_u16(q3u16); + q4s16 = vreinterpretq_s16_u16(q4u16); + q5s16 = vreinterpretq_s16_u16(q5u16); + q6s16 = vreinterpretq_s16_u16(q6u16); + q7s16 = vreinterpretq_s16_u16(q7u16); + q8s16 = vreinterpretq_s16_u16(q8u16); + q9s16 = vreinterpretq_s16_u16(q9u16); + q10s16 = vreinterpretq_s16_u16(q10u16); + + q7s16 = vqaddq_s16(q7s16, q3s16); + q8s16 = vqaddq_s16(q8s16, q4s16); + q9s16 = vqaddq_s16(q9s16, q5s16); + q10s16 = vqaddq_s16(q10s16, q6s16); + + d6u8 = vqrshrun_n_s16(q7s16, 7); + d7u8 = vqrshrun_n_s16(q8s16, 7); + d8u8 = vqrshrun_n_s16(q9s16, 7); + d9u8 = vqrshrun_n_s16(q10s16, 7); + + d18u8 = d22u8; + d19u8 = d23u8; + d20u8 = d24u8; + d21u8 = d25u8; + d22u8 = d26u8; + + vst1_u8(dst, d6u8); + dst += dst_pitch; + vst1_u8(dst, d7u8); + dst += dst_pitch; + vst1_u8(dst, d8u8); + dst += dst_pitch; + vst1_u8(dst, d9u8); + dst += dst_pitch; + } + } + return; +} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c new file mode 100644 index 00000000000..9d6807af715 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c @@ -0,0 +1,550 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include "./vpx_config.h" +#include "vpx_ports/arm.h" + +static INLINE void vp8_loop_filter_neon( + uint8x16_t qblimit, // flimit + uint8x16_t qlimit, // limit + uint8x16_t qthresh, // thresh + uint8x16_t q3, // p3 + uint8x16_t q4, // p2 + uint8x16_t q5, // p1 + uint8x16_t q6, // p0 + uint8x16_t q7, // q0 + uint8x16_t q8, // q1 + uint8x16_t q9, // q2 + uint8x16_t q10, // q3 + uint8x16_t *q5r, // p1 + uint8x16_t *q6r, // p0 + uint8x16_t *q7r, // q0 + uint8x16_t *q8r) { // q1 + uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8; + int16x8_t q2s16, q11s16; + uint16x8_t q4u16; + int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8; + int8x8_t d2s8, d3s8; + + q11u8 = vabdq_u8(q3, q4); + q12u8 = vabdq_u8(q4, q5); + q13u8 = vabdq_u8(q5, q6); + q14u8 = vabdq_u8(q8, q7); + q3 = vabdq_u8(q9, q8); + q4 = vabdq_u8(q10, q9); + + q11u8 = vmaxq_u8(q11u8, q12u8); + q12u8 = vmaxq_u8(q13u8, q14u8); + q3 = vmaxq_u8(q3, q4); + q15u8 = vmaxq_u8(q11u8, q12u8); + + q9 = vabdq_u8(q6, q7); + + // vp8_hevmask + q13u8 = vcgtq_u8(q13u8, qthresh); + q14u8 = vcgtq_u8(q14u8, qthresh); + q15u8 = vmaxq_u8(q15u8, q3); + + q2u8 = vabdq_u8(q5, q8); + q9 = vqaddq_u8(q9, q9); + + q15u8 = vcgeq_u8(qlimit, q15u8); + + // vp8_filter() function + // convert to signed + q10 = vdupq_n_u8(0x80); + q8 = veorq_u8(q8, q10); + q7 = veorq_u8(q7, q10); + q6 = veorq_u8(q6, q10); + q5 = veorq_u8(q5, q10); + + q2u8 = vshrq_n_u8(q2u8, 1); + q9 = vqaddq_u8(q9, q2u8); + + q10 = vdupq_n_u8(3); + + q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), + vget_low_s8(vreinterpretq_s8_u8(q6))); + q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), + vget_high_s8(vreinterpretq_s8_u8(q6))); + + q9 = vcgeq_u8(qblimit, q9); + + q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), + vreinterpretq_s8_u8(q8)); + + q14u8 = vorrq_u8(q13u8, q14u8); + + q4u16 = vmovl_u8(vget_low_u8(q10)); + q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16)); + q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16)); + + q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8); + q15u8 = vandq_u8(q15u8, q9); + + q1s8 = vreinterpretq_s8_u8(q1u8); + q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); + q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8)); + + q9 = vdupq_n_u8(4); + // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) + d2s8 = vqmovn_s16(q2s16); + d3s8 = vqmovn_s16(q11s16); + q1s8 = vcombine_s8(d2s8, d3s8); + q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8); + q1s8 = vreinterpretq_s8_u8(q1u8); + + q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10)); + q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9)); + q2s8 = vshrq_n_s8(q2s8, 3); + q1s8 = vshrq_n_s8(q1s8, 3); + + q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8); + q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8); + + q1s8 = vrshrq_n_s8(q1s8, 1); + q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); + + q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8); + q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8); + + q0u8 = vdupq_n_u8(0x80); + *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8); + *q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); + *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); + *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8); + return; +} + +void vp8_loop_filter_horizontal_edge_y_neon( + unsigned char *src, + int pitch, + unsigned char blimit, + unsigned char limit, + unsigned char thresh) { + uint8x16_t qblimit, qlimit, qthresh, q3, q4; + uint8x16_t q5, q6, q7, q8, q9, q10; + + qblimit = vdupq_n_u8(blimit); + qlimit = vdupq_n_u8(limit); + qthresh = vdupq_n_u8(thresh); + src -= (pitch << 2); + + q3 = vld1q_u8(src); + src += pitch; + q4 = vld1q_u8(src); + src += pitch; + q5 = vld1q_u8(src); + src += pitch; + q6 = vld1q_u8(src); + src += pitch; + q7 = vld1q_u8(src); + src += pitch; + q8 = vld1q_u8(src); + src += pitch; + q9 = vld1q_u8(src); + src += pitch; + q10 = vld1q_u8(src); + + vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, + q5, q6, q7, q8, q9, q10, + &q5, &q6, &q7, &q8); + + src -= (pitch * 5); + vst1q_u8(src, q5); + src += pitch; + vst1q_u8(src, q6); + src += pitch; + vst1q_u8(src, q7); + src += pitch; + vst1q_u8(src, q8); + return; +} + +void vp8_loop_filter_horizontal_edge_uv_neon( + unsigned char *u, + int pitch, + unsigned char blimit, + unsigned char limit, + unsigned char thresh, + unsigned char *v) { + uint8x16_t qblimit, qlimit, qthresh, q3, q4; + uint8x16_t q5, q6, q7, q8, q9, q10; + uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; + uint8x8_t d15, d16, d17, d18, d19, d20, d21; + + qblimit = vdupq_n_u8(blimit); + qlimit = vdupq_n_u8(limit); + qthresh = vdupq_n_u8(thresh); + + u -= (pitch << 2); + v -= (pitch << 2); + + d6 = vld1_u8(u); + u += pitch; + d7 = vld1_u8(v); + v += pitch; + d8 = vld1_u8(u); + u += pitch; + d9 = vld1_u8(v); + v += pitch; + d10 = vld1_u8(u); + u += pitch; + d11 = vld1_u8(v); + v += pitch; + d12 = vld1_u8(u); + u += pitch; + d13 = vld1_u8(v); + v += pitch; + d14 = vld1_u8(u); + u += pitch; + d15 = vld1_u8(v); + v += pitch; + d16 = vld1_u8(u); + u += pitch; + d17 = vld1_u8(v); + v += pitch; + d18 = vld1_u8(u); + u += pitch; + d19 = vld1_u8(v); + v += pitch; + d20 = vld1_u8(u); + d21 = vld1_u8(v); + + q3 = vcombine_u8(d6, d7); + q4 = vcombine_u8(d8, d9); + q5 = vcombine_u8(d10, d11); + q6 = vcombine_u8(d12, d13); + q7 = vcombine_u8(d14, d15); + q8 = vcombine_u8(d16, d17); + q9 = vcombine_u8(d18, d19); + q10 = vcombine_u8(d20, d21); + + vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, + q5, q6, q7, q8, q9, q10, + &q5, &q6, &q7, &q8); + + u -= (pitch * 5); + vst1_u8(u, vget_low_u8(q5)); + u += pitch; + vst1_u8(u, vget_low_u8(q6)); + u += pitch; + vst1_u8(u, vget_low_u8(q7)); + u += pitch; + vst1_u8(u, vget_low_u8(q8)); + + v -= (pitch * 5); + vst1_u8(v, vget_high_u8(q5)); + v += pitch; + vst1_u8(v, vget_high_u8(q6)); + v += pitch; + vst1_u8(v, vget_high_u8(q7)); + v += pitch; + vst1_u8(v, vget_high_u8(q8)); + return; +} + +static INLINE void write_4x8(unsigned char *dst, int pitch, + const uint8x8x4_t result) { +#ifdef VPX_INCOMPATIBLE_GCC + /* + * uint8x8x4_t result + 00 01 02 03 | 04 05 06 07 + 10 11 12 13 | 14 15 16 17 + 20 21 22 23 | 24 25 26 27 + 30 31 32 33 | 34 35 36 37 + --- + * after vtrn_u16 + 00 01 20 21 | 04 05 24 25 + 02 03 22 23 | 06 07 26 27 + 10 11 30 31 | 14 15 34 35 + 12 13 32 33 | 16 17 36 37 + --- + * after vtrn_u8 + 00 10 20 30 | 04 14 24 34 + 01 11 21 31 | 05 15 25 35 + 02 12 22 32 | 06 16 26 36 + 03 13 23 33 | 07 17 27 37 + */ + const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]), + vreinterpret_u16_u8(result.val[2])); + const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]), + vreinterpret_u16_u8(result.val[3])); + const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), + vreinterpret_u8_u16(r13_u16.val[0])); + const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), + vreinterpret_u8_u16(r13_u16.val[1])); + const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]); + const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]); + const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]); + const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]); + vst1_lane_u32((uint32_t *)dst, x_0_4, 0); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_1_5, 0); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_2_6, 0); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_3_7, 0); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_0_4, 1); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_1_5, 1); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_2_6, 1); + dst += pitch; + vst1_lane_u32((uint32_t *)dst, x_3_7, 1); +#else + vst4_lane_u8(dst, result, 0); + dst += pitch; + vst4_lane_u8(dst, result, 1); + dst += pitch; + vst4_lane_u8(dst, result, 2); + dst += pitch; + vst4_lane_u8(dst, result, 3); + dst += pitch; + vst4_lane_u8(dst, result, 4); + dst += pitch; + vst4_lane_u8(dst, result, 5); + dst += pitch; + vst4_lane_u8(dst, result, 6); + dst += pitch; + vst4_lane_u8(dst, result, 7); +#endif // VPX_INCOMPATIBLE_GCC +} + +void vp8_loop_filter_vertical_edge_y_neon( + unsigned char *src, + int pitch, + unsigned char blimit, + unsigned char limit, + unsigned char thresh) { + unsigned char *s, *d; + uint8x16_t qblimit, qlimit, qthresh, q3, q4; + uint8x16_t q5, q6, q7, q8, q9, q10; + uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; + uint8x8_t d15, d16, d17, d18, d19, d20, d21; + uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; + uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; + uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; + uint8x8x4_t q4ResultH, q4ResultL; + + qblimit = vdupq_n_u8(blimit); + qlimit = vdupq_n_u8(limit); + qthresh = vdupq_n_u8(thresh); + + s = src - 4; + d6 = vld1_u8(s); + s += pitch; + d8 = vld1_u8(s); + s += pitch; + d10 = vld1_u8(s); + s += pitch; + d12 = vld1_u8(s); + s += pitch; + d14 = vld1_u8(s); + s += pitch; + d16 = vld1_u8(s); + s += pitch; + d18 = vld1_u8(s); + s += pitch; + d20 = vld1_u8(s); + s += pitch; + d7 = vld1_u8(s); + s += pitch; + d9 = vld1_u8(s); + s += pitch; + d11 = vld1_u8(s); + s += pitch; + d13 = vld1_u8(s); + s += pitch; + d15 = vld1_u8(s); + s += pitch; + d17 = vld1_u8(s); + s += pitch; + d19 = vld1_u8(s); + s += pitch; + d21 = vld1_u8(s); + + q3 = vcombine_u8(d6, d7); + q4 = vcombine_u8(d8, d9); + q5 = vcombine_u8(d10, d11); + q6 = vcombine_u8(d12, d13); + q7 = vcombine_u8(d14, d15); + q8 = vcombine_u8(d16, d17); + q9 = vcombine_u8(d18, d19); + q10 = vcombine_u8(d20, d21); + + q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); + q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); + q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); + q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); + + q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), + vreinterpretq_u16_u32(q2tmp2.val[0])); + q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), + vreinterpretq_u16_u32(q2tmp3.val[0])); + q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), + vreinterpretq_u16_u32(q2tmp2.val[1])); + q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), + vreinterpretq_u16_u32(q2tmp3.val[1])); + + q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), + vreinterpretq_u8_u16(q2tmp5.val[0])); + q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), + vreinterpretq_u8_u16(q2tmp5.val[1])); + q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), + vreinterpretq_u8_u16(q2tmp7.val[0])); + q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), + vreinterpretq_u8_u16(q2tmp7.val[1])); + + q3 = q2tmp8.val[0]; + q4 = q2tmp8.val[1]; + q5 = q2tmp9.val[0]; + q6 = q2tmp9.val[1]; + q7 = q2tmp10.val[0]; + q8 = q2tmp10.val[1]; + q9 = q2tmp11.val[0]; + q10 = q2tmp11.val[1]; + + vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, + q5, q6, q7, q8, q9, q10, + &q5, &q6, &q7, &q8); + + q4ResultL.val[0] = vget_low_u8(q5); // d10 + q4ResultL.val[1] = vget_low_u8(q6); // d12 + q4ResultL.val[2] = vget_low_u8(q7); // d14 + q4ResultL.val[3] = vget_low_u8(q8); // d16 + q4ResultH.val[0] = vget_high_u8(q5); // d11 + q4ResultH.val[1] = vget_high_u8(q6); // d13 + q4ResultH.val[2] = vget_high_u8(q7); // d15 + q4ResultH.val[3] = vget_high_u8(q8); // d17 + + d = src - 2; + write_4x8(d, pitch, q4ResultL); + d += pitch * 8; + write_4x8(d, pitch, q4ResultH); +} + +void vp8_loop_filter_vertical_edge_uv_neon( + unsigned char *u, + int pitch, + unsigned char blimit, + unsigned char limit, + unsigned char thresh, + unsigned char *v) { + unsigned char *us, *ud; + unsigned char *vs, *vd; + uint8x16_t qblimit, qlimit, qthresh, q3, q4; + uint8x16_t q5, q6, q7, q8, q9, q10; + uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; + uint8x8_t d15, d16, d17, d18, d19, d20, d21; + uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; + uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; + uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; + uint8x8x4_t q4ResultH, q4ResultL; + + qblimit = vdupq_n_u8(blimit); + qlimit = vdupq_n_u8(limit); + qthresh = vdupq_n_u8(thresh); + + us = u - 4; + d6 = vld1_u8(us); + us += pitch; + d8 = vld1_u8(us); + us += pitch; + d10 = vld1_u8(us); + us += pitch; + d12 = vld1_u8(us); + us += pitch; + d14 = vld1_u8(us); + us += pitch; + d16 = vld1_u8(us); + us += pitch; + d18 = vld1_u8(us); + us += pitch; + d20 = vld1_u8(us); + + vs = v - 4; + d7 = vld1_u8(vs); + vs += pitch; + d9 = vld1_u8(vs); + vs += pitch; + d11 = vld1_u8(vs); + vs += pitch; + d13 = vld1_u8(vs); + vs += pitch; + d15 = vld1_u8(vs); + vs += pitch; + d17 = vld1_u8(vs); + vs += pitch; + d19 = vld1_u8(vs); + vs += pitch; + d21 = vld1_u8(vs); + + q3 = vcombine_u8(d6, d7); + q4 = vcombine_u8(d8, d9); + q5 = vcombine_u8(d10, d11); + q6 = vcombine_u8(d12, d13); + q7 = vcombine_u8(d14, d15); + q8 = vcombine_u8(d16, d17); + q9 = vcombine_u8(d18, d19); + q10 = vcombine_u8(d20, d21); + + q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); + q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); + q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); + q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); + + q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), + vreinterpretq_u16_u32(q2tmp2.val[0])); + q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), + vreinterpretq_u16_u32(q2tmp3.val[0])); + q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), + vreinterpretq_u16_u32(q2tmp2.val[1])); + q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), + vreinterpretq_u16_u32(q2tmp3.val[1])); + + q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), + vreinterpretq_u8_u16(q2tmp5.val[0])); + q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), + vreinterpretq_u8_u16(q2tmp5.val[1])); + q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), + vreinterpretq_u8_u16(q2tmp7.val[0])); + q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), + vreinterpretq_u8_u16(q2tmp7.val[1])); + + q3 = q2tmp8.val[0]; + q4 = q2tmp8.val[1]; + q5 = q2tmp9.val[0]; + q6 = q2tmp9.val[1]; + q7 = q2tmp10.val[0]; + q8 = q2tmp10.val[1]; + q9 = q2tmp11.val[0]; + q10 = q2tmp11.val[1]; + + vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, + q5, q6, q7, q8, q9, q10, + &q5, &q6, &q7, &q8); + + q4ResultL.val[0] = vget_low_u8(q5); // d10 + q4ResultL.val[1] = vget_low_u8(q6); // d12 + q4ResultL.val[2] = vget_low_u8(q7); // d14 + q4ResultL.val[3] = vget_low_u8(q8); // d16 + ud = u - 2; + write_4x8(ud, pitch, q4ResultL); + + q4ResultH.val[0] = vget_high_u8(q5); // d11 + q4ResultH.val[1] = vget_high_u8(q6); // d13 + q4ResultH.val[2] = vget_high_u8(q7); // d15 + q4ResultH.val[3] = vget_high_u8(q8); // d17 + vd = v - 2; + write_4x8(vd, pitch, q4ResultH); +} diff --git a/thirdparty/libvpx/vp8/common/blockd.c b/thirdparty/libvpx/vp8/common/blockd.c new file mode 100644 index 00000000000..1fc3cd0ca7c --- /dev/null +++ b/thirdparty/libvpx/vp8/common/blockd.c @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "blockd.h" +#include "vpx_mem/vpx_mem.h" + +const unsigned char vp8_block2left[25] = +{ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 +}; +const unsigned char vp8_block2above[25] = +{ + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8 +}; diff --git a/thirdparty/libvpx/vp8/common/blockd.h b/thirdparty/libvpx/vp8/common/blockd.h new file mode 100644 index 00000000000..192108a06db --- /dev/null +++ b/thirdparty/libvpx/vp8/common/blockd.h @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_BLOCKD_H_ +#define VP8_COMMON_BLOCKD_H_ + +void vpx_log(const char *format, ...); + +#include "vpx_config.h" +#include "vpx_scale/yv12config.h" +#include "mv.h" +#include "treecoder.h" +#include "vpx_ports/mem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/*#define DCPRED 1*/ +#define DCPREDSIMTHRESH 0 +#define DCPREDCNTTHRESH 3 + +#define MB_FEATURE_TREE_PROBS 3 +#define MAX_MB_SEGMENTS 4 + +#define MAX_REF_LF_DELTAS 4 +#define MAX_MODE_LF_DELTAS 4 + +/* Segment Feature Masks */ +#define SEGMENT_DELTADATA 0 +#define SEGMENT_ABSDATA 1 + +typedef struct +{ + int r, c; +} POS; + +#define PLANE_TYPE_Y_NO_DC 0 +#define PLANE_TYPE_Y2 1 +#define PLANE_TYPE_UV 2 +#define PLANE_TYPE_Y_WITH_DC 3 + + +typedef char ENTROPY_CONTEXT; +typedef struct +{ + ENTROPY_CONTEXT y1[4]; + ENTROPY_CONTEXT u[2]; + ENTROPY_CONTEXT v[2]; + ENTROPY_CONTEXT y2; +} ENTROPY_CONTEXT_PLANES; + +extern const unsigned char vp8_block2left[25]; +extern const unsigned char vp8_block2above[25]; + +#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \ + Dest = (A)+(B); + + +typedef enum +{ + KEY_FRAME = 0, + INTER_FRAME = 1 +} FRAME_TYPE; + +typedef enum +{ + DC_PRED, /* average of above and left pixels */ + V_PRED, /* vertical prediction */ + H_PRED, /* horizontal prediction */ + TM_PRED, /* Truemotion prediction */ + B_PRED, /* block based prediction, each block has its own prediction mode */ + + NEARESTMV, + NEARMV, + ZEROMV, + NEWMV, + SPLITMV, + + MB_MODE_COUNT +} MB_PREDICTION_MODE; + +/* Macroblock level features */ +typedef enum +{ + MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */ + MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */ + MB_LVL_MAX = 2 /* Number of MB level features supported */ + +} MB_LVL_FEATURES; + +/* Segment Feature Masks */ +#define SEGMENT_ALTQ 0x01 +#define SEGMENT_ALT_LF 0x02 + +#define VP8_YMODES (B_PRED + 1) +#define VP8_UV_MODES (TM_PRED + 1) + +#define VP8_MVREFS (1 + SPLITMV - NEARESTMV) + +typedef enum +{ + B_DC_PRED, /* average of above and left pixels */ + B_TM_PRED, + + B_VE_PRED, /* vertical prediction */ + B_HE_PRED, /* horizontal prediction */ + + B_LD_PRED, + B_RD_PRED, + + B_VR_PRED, + B_VL_PRED, + B_HD_PRED, + B_HU_PRED, + + LEFT4X4, + ABOVE4X4, + ZERO4X4, + NEW4X4, + + B_MODE_COUNT +} B_PREDICTION_MODE; + +#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */ +#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4) + +/* For keyframes, intra block modes are predicted by the (already decoded) + modes for the Y blocks to the left and above us; for interframes, there + is a single probability table. */ + +union b_mode_info +{ + B_PREDICTION_MODE as_mode; + int_mv mv; +}; + +typedef enum +{ + INTRA_FRAME = 0, + LAST_FRAME = 1, + GOLDEN_FRAME = 2, + ALTREF_FRAME = 3, + MAX_REF_FRAMES = 4 +} MV_REFERENCE_FRAME; + +typedef struct +{ + uint8_t mode, uv_mode; + uint8_t ref_frame; + uint8_t is_4x4; + int_mv mv; + + uint8_t partitioning; + uint8_t mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ + uint8_t need_to_clamp_mvs; + uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */ +} MB_MODE_INFO; + +typedef struct modeinfo +{ + MB_MODE_INFO mbmi; + union b_mode_info bmi[16]; +} MODE_INFO; + +#if CONFIG_MULTI_RES_ENCODING +/* The mb-level information needed to be stored for higher-resolution encoder */ +typedef struct +{ + MB_PREDICTION_MODE mode; + MV_REFERENCE_FRAME ref_frame; + int_mv mv; + int dissim; /* dissimilarity level of the macroblock */ +} LOWER_RES_MB_INFO; + +/* The frame-level information needed to be stored for higher-resolution + * encoder */ +typedef struct +{ + FRAME_TYPE frame_type; + int is_frame_dropped; + // The frame rate for the lowest resolution. + double low_res_framerate; + /* The frame number of each reference frames */ + unsigned int low_res_ref_frames[MAX_REF_FRAMES]; + // The video frame counter value for the key frame, for lowest resolution. + unsigned int key_frame_counter_value; + LOWER_RES_MB_INFO *mb_info; +} LOWER_RES_FRAME_INFO; +#endif + +typedef struct blockd +{ + short *qcoeff; + short *dqcoeff; + unsigned char *predictor; + short *dequant; + + int offset; + char *eob; + + union b_mode_info bmi; +} BLOCKD; + +typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +typedef struct macroblockd +{ + DECLARE_ALIGNED(16, unsigned char, predictor[384]); + DECLARE_ALIGNED(16, short, qcoeff[400]); + DECLARE_ALIGNED(16, short, dqcoeff[400]); + DECLARE_ALIGNED(16, char, eobs[25]); + + DECLARE_ALIGNED(16, short, dequant_y1[16]); + DECLARE_ALIGNED(16, short, dequant_y1_dc[16]); + DECLARE_ALIGNED(16, short, dequant_y2[16]); + DECLARE_ALIGNED(16, short, dequant_uv[16]); + + /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */ + BLOCKD block[25]; + int fullpixel_mask; + + YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */ + YV12_BUFFER_CONFIG dst; + + MODE_INFO *mode_info_context; + int mode_info_stride; + + FRAME_TYPE frame_type; + + int up_available; + int left_available; + + unsigned char *recon_above[3]; + unsigned char *recon_left[3]; + int recon_left_stride[2]; + + /* Y,U,V,Y2 */ + ENTROPY_CONTEXT_PLANES *above_context; + ENTROPY_CONTEXT_PLANES *left_context; + + /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */ + unsigned char segmentation_enabled; + + /* 0 (do not update) 1 (update) the macroblock segmentation map. */ + unsigned char update_mb_segmentation_map; + + /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */ + unsigned char update_mb_segmentation_data; + + /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */ + unsigned char mb_segement_abs_delta; + + /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */ + /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */ + vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */ + + signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */ + + /* mode_based Loop filter adjustment */ + unsigned char mode_ref_lf_delta_enabled; + unsigned char mode_ref_lf_delta_update; + + /* Delta values have the range +/- MAX_LOOP_FILTER */ + signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ + signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ + signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ + signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ + + /* Distance of MB away from frame edges */ + int mb_to_left_edge; + int mb_to_right_edge; + int mb_to_top_edge; + int mb_to_bottom_edge; + + + + vp8_subpix_fn_t subpixel_predict; + vp8_subpix_fn_t subpixel_predict8x4; + vp8_subpix_fn_t subpixel_predict8x8; + vp8_subpix_fn_t subpixel_predict16x16; + + void *current_bc; + + int corrupted; + +#if ARCH_X86 || ARCH_X86_64 + /* This is an intermediate buffer currently used in sub-pixel motion search + * to keep a copy of the reference area. This buffer can be used for other + * purpose. + */ + DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]); +#endif +} MACROBLOCKD; + + +extern void vp8_build_block_doffsets(MACROBLOCKD *x); +extern void vp8_setup_block_dptrs(MACROBLOCKD *x); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_BLOCKD_H_ diff --git a/thirdparty/libvpx/vp8/common/coefupdateprobs.h b/thirdparty/libvpx/vp8/common/coefupdateprobs.h new file mode 100644 index 00000000000..d96a19e7478 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/coefupdateprobs.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP8_COMMON_COEFUPDATEPROBS_H_ +#define VP8_COMMON_COEFUPDATEPROBS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Update probabilities for the nodes in the token entropy tree. + Generated file included by entropy.c */ + +const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] = +{ + { + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, + {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, }, + {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, }, + {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, }, + {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, }, + }, + { + {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, + {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, }, + {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, }, + {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, }, + }, + { + {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, }, + {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, + {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, + {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, + {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, + {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, + {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, + {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, +}; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_COEFUPDATEPROBS_H_ diff --git a/thirdparty/libvpx/vp8/common/common.h b/thirdparty/libvpx/vp8/common/common.h new file mode 100644 index 00000000000..e58a9cc23ba --- /dev/null +++ b/thirdparty/libvpx/vp8/common/common.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_COMMON_H_ +#define VP8_COMMON_COMMON_H_ + +#include + +/* Interface header for common constant data structures and lookup tables */ + +#include "vpx_mem/vpx_mem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Only need this for fixed-size arrays, for structs just assign. */ + +#define vp8_copy( Dest, Src) { \ + assert( sizeof( Dest) == sizeof( Src)); \ + memcpy( Dest, Src, sizeof( Src)); \ + } + +/* Use this for variably-sized arrays. */ + +#define vp8_copy_array( Dest, Src, N) { \ + assert( sizeof( *Dest) == sizeof( *Src)); \ + memcpy( Dest, Src, N * sizeof( *Src)); \ + } + +#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest)); + +#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest)); + + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_COMMON_H_ diff --git a/thirdparty/libvpx/vp8/common/copy_c.c b/thirdparty/libvpx/vp8/common/copy_c.c new file mode 100644 index 00000000000..e3392913f63 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/copy_c.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include + +#include "./vp8_rtcd.h" +#include "vpx/vpx_integer.h" + +/* Copy 2 macroblocks to a buffer */ +void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride, + unsigned char *dst_ptr, int dst_stride, + int height) +{ + int r; + + for (r = 0; r < height; r++) + { + memcpy(dst_ptr, src_ptr, 32); + + src_ptr += src_stride; + dst_ptr += dst_stride; + + } +} diff --git a/thirdparty/libvpx/vp8/common/debugmodes.c b/thirdparty/libvpx/vp8/common/debugmodes.c new file mode 100644 index 00000000000..159fddc6a76 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/debugmodes.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include "blockd.h" + + +void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int frame) +{ + + int mb_row; + int mb_col; + int mb_index = 0; + FILE *mvs = fopen("mvs.stt", "a"); + + /* print out the macroblock Y modes */ + mb_index = 0; + fprintf(mvs, "Mb Modes for Frame %d\n", frame); + + for (mb_row = 0; mb_row < rows; mb_row++) + { + for (mb_col = 0; mb_col < cols; mb_col++) + { + + fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode); + + mb_index++; + } + + fprintf(mvs, "\n"); + mb_index++; + } + + fprintf(mvs, "\n"); + + mb_index = 0; + fprintf(mvs, "Mb mv ref for Frame %d\n", frame); + + for (mb_row = 0; mb_row < rows; mb_row++) + { + for (mb_col = 0; mb_col < cols; mb_col++) + { + + fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame); + + mb_index++; + } + + fprintf(mvs, "\n"); + mb_index++; + } + + fprintf(mvs, "\n"); + + /* print out the macroblock UV modes */ + mb_index = 0; + fprintf(mvs, "UV Modes for Frame %d\n", frame); + + for (mb_row = 0; mb_row < rows; mb_row++) + { + for (mb_col = 0; mb_col < cols; mb_col++) + { + + fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode); + + mb_index++; + } + + mb_index++; + fprintf(mvs, "\n"); + } + + fprintf(mvs, "\n"); + + /* print out the block modes */ + fprintf(mvs, "Mbs for Frame %d\n", frame); + { + int b_row; + + for (b_row = 0; b_row < 4 * rows; b_row++) + { + int b_col; + int bindex; + + for (b_col = 0; b_col < 4 * cols; b_col++) + { + mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2); + bindex = (b_row & 3) * 4 + (b_col & 3); + + if (mi[mb_index].mbmi.mode == B_PRED) + fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode); + else + fprintf(mvs, "xx "); + + } + + fprintf(mvs, "\n"); + } + } + fprintf(mvs, "\n"); + + /* print out the macroblock mvs */ + mb_index = 0; + fprintf(mvs, "MVs for Frame %d\n", frame); + + for (mb_row = 0; mb_row < rows; mb_row++) + { + for (mb_col = 0; mb_col < cols; mb_col++) + { + fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2, mi[mb_index].mbmi.mv.as_mv.col / 2); + + mb_index++; + } + + mb_index++; + fprintf(mvs, "\n"); + } + + fprintf(mvs, "\n"); + + + /* print out the block modes */ + fprintf(mvs, "MVs for Frame %d\n", frame); + { + int b_row; + + for (b_row = 0; b_row < 4 * rows; b_row++) + { + int b_col; + int bindex; + + for (b_col = 0; b_col < 4 * cols; b_col++) + { + mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2); + bindex = (b_row & 3) * 4 + (b_col & 3); + fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row, mi[mb_index].bmi[bindex].mv.as_mv.col); + + } + + fprintf(mvs, "\n"); + } + } + fprintf(mvs, "\n"); + + + fclose(mvs); +} diff --git a/thirdparty/libvpx/vp8/common/default_coef_probs.h b/thirdparty/libvpx/vp8/common/default_coef_probs.h new file mode 100644 index 00000000000..4d69e4be664 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/default_coef_probs.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. +*/ + +#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_ +#define VP8_COMMON_DEFAULT_COEF_PROBS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/*Generated file, included by entropy.c*/ + + +static const vp8_prob default_coef_probs [BLOCK_TYPES] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [ENTROPY_NODES] = +{ + { /* Block Type ( 0 ) */ + { /* Coeff Band ( 0 )*/ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 1 )*/ + { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, + { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, + { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 } + }, + { /* Coeff Band ( 2 )*/ + { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, + { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, + { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 } + }, + { /* Coeff Band ( 3 )*/ + { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, + { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, + { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 4 )*/ + { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, + { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, + { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 } + }, + { /* Coeff Band ( 5 )*/ + { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, + { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, + { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 6 )*/ + { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, + { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, + { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 7 )*/ + { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, + { /* Block Type ( 1 ) */ + { /* Coeff Band ( 0 )*/ + { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, + { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, + { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 } + }, + { /* Coeff Band ( 1 )*/ + { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, + { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, + { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 } + }, + { /* Coeff Band ( 2 )*/ + { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, + { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, + { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 } + }, + { /* Coeff Band ( 3 )*/ + { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, + { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, + { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 } + }, + { /* Coeff Band ( 4 )*/ + { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, + { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, + { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 } + }, + { /* Coeff Band ( 5 )*/ + { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, + { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, + { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 } + }, + { /* Coeff Band ( 6 )*/ + { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, + { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, + { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 } + }, + { /* Coeff Band ( 7 )*/ + { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 } + } + }, + { /* Block Type ( 2 ) */ + { /* Coeff Band ( 0 )*/ + { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, + { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, + { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 } + }, + { /* Coeff Band ( 1 )*/ + { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, + { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, + { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 } + }, + { /* Coeff Band ( 2 )*/ + { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, + { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, + { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 } + }, + { /* Coeff Band ( 3 )*/ + { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 4 )*/ + { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, + { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 5 )*/ + { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 6 )*/ + { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 7 )*/ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, + { /* Block Type ( 3 ) */ + { /* Coeff Band ( 0 )*/ + { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, + { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, + { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 } + }, + { /* Coeff Band ( 1 )*/ + { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, + { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, + { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 } + }, + { /* Coeff Band ( 2 )*/ + { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, + { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, + { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 } + }, + { /* Coeff Band ( 3 )*/ + { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, + { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, + { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 } + }, + { /* Coeff Band ( 4 )*/ + { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, + { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, + { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 } + }, + { /* Coeff Band ( 5 )*/ + { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, + { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, + { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 } + }, + { /* Coeff Band ( 6 )*/ + { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, + { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, + { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 } + }, + { /* Coeff Band ( 7 )*/ + { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + } + } +}; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_DEFAULT_COEF_PROBS_H_ diff --git a/thirdparty/libvpx/vp8/common/dequantize.c b/thirdparty/libvpx/vp8/common/dequantize.c new file mode 100644 index 00000000000..f8b04fa4ee5 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/dequantize.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#include "vp8/common/blockd.h" +#include "vpx_mem/vpx_mem.h" + +void vp8_dequantize_b_c(BLOCKD *d, short *DQC) +{ + int i; + short *DQ = d->dqcoeff; + short *Q = d->qcoeff; + + for (i = 0; i < 16; i++) + { + DQ[i] = Q[i] * DQC[i]; + } +} + +void vp8_dequant_idct_add_c(short *input, short *dq, + unsigned char *dest, int stride) +{ + int i; + + for (i = 0; i < 16; i++) + { + input[i] = dq[i] * input[i]; + } + + vp8_short_idct4x4llm_c(input, dest, stride, dest, stride); + + memset(input, 0, 32); + +} diff --git a/thirdparty/libvpx/vp8/common/entropy.c b/thirdparty/libvpx/vp8/common/entropy.c new file mode 100644 index 00000000000..c00e565f063 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/entropy.c @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "entropy.h" +#include "blockd.h" +#include "onyxc_int.h" +#include "vpx_mem/vpx_mem.h" + +#include "coefupdateprobs.h" + +DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) = +{ + 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) = +{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7}; + +DECLARE_ALIGNED(16, const unsigned char, + vp8_prev_token_class[MAX_ENTROPY_TOKENS]) = +{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0}; + +DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = +{ + 0, 1, 4, 8, + 5, 2, 3, 6, + 9, 12, 13, 10, + 7, 11, 14, 15, +}; + +DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = +{ + 1, 2, 6, 7, + 3, 5, 8, 13, + 4, 9, 12, 14, + 10, 11, 15, 16 +}; + +/* vp8_default_zig_zag_mask generated with: + + void vp8_init_scan_order_mask() + { + int i; + + for (i = 0; i < 16; i++) + { + vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i; + } + + } +*/ +DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) = +{ + 1, 2, 32, 64, + 4, 16, 128, 4096, + 8, 256, 2048, 8192, + 512, 1024, 16384, -32768 +}; + +const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6}; + +/* Array indices are identical to previously-existing CONTEXT_NODE indices */ + +const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ +{ + -DCT_EOB_TOKEN, 2, /* 0 = EOB */ + -ZERO_TOKEN, 4, /* 1 = ZERO */ + -ONE_TOKEN, 6, /* 2 = ONE */ + 8, 12, /* 3 = LOW_VAL */ + -TWO_TOKEN, 10, /* 4 = TWO */ + -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */ + 14, 16, /* 6 = HIGH_LOW */ + -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */ + 18, 20, /* 8 = CAT_THREEFOUR */ + -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */ + -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ +}; + +/* vp8_coef_encodings generated with: + vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree); +*/ +vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = +{ + {2, 2}, + {6, 3}, + {28, 5}, + {58, 6}, + {59, 6}, + {60, 6}, + {61, 6}, + {124, 7}, + {125, 7}, + {126, 7}, + {127, 7}, + {0, 1} +}; + +/* Trees for extra bits. Probabilities are constant and + do not depend on previously encoded bits */ + +static const vp8_prob Pcat1[] = { 159}; +static const vp8_prob Pcat2[] = { 165, 145}; +static const vp8_prob Pcat3[] = { 173, 148, 140}; +static const vp8_prob Pcat4[] = { 176, 155, 140, 135}; +static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130}; +static const vp8_prob Pcat6[] = +{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129}; + + +/* tree index tables generated with: + + void init_bit_tree(vp8_tree_index *p, int n) + { + int i = 0; + + while (++i < n) + { + p[0] = p[1] = i << 1; + p += 2; + } + + p[0] = p[1] = 0; + } + + void init_bit_trees() + { + init_bit_tree(cat1, 1); + init_bit_tree(cat2, 2); + init_bit_tree(cat3, 3); + init_bit_tree(cat4, 4); + init_bit_tree(cat5, 5); + init_bit_tree(cat6, 11); + } +*/ + +static const vp8_tree_index cat1[2] = { 0, 0 }; +static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 }; +static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 }; +static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 }; +static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 }; +static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, + 14, 14, 16, 16, 18, 18, 20, 20, 0, 0 }; + +const vp8_extra_bit_struct vp8_extra_bits[12] = +{ + { 0, 0, 0, 0}, + { 0, 0, 0, 1}, + { 0, 0, 0, 2}, + { 0, 0, 0, 3}, + { 0, 0, 0, 4}, + { cat1, Pcat1, 1, 5}, + { cat2, Pcat2, 2, 7}, + { cat3, Pcat3, 3, 11}, + { cat4, Pcat4, 4, 19}, + { cat5, Pcat5, 5, 35}, + { cat6, Pcat6, 11, 67}, + { 0, 0, 0, 0} +}; + +#include "default_coef_probs.h" + +void vp8_default_coef_probs(VP8_COMMON *pc) +{ + memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs)); +} + diff --git a/thirdparty/libvpx/vp8/common/entropy.h b/thirdparty/libvpx/vp8/common/entropy.h new file mode 100644 index 00000000000..a90bab4bac2 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/entropy.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_ENTROPY_H_ +#define VP8_COMMON_ENTROPY_H_ + +#include "treecoder.h" +#include "blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Coefficient token alphabet */ + +#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ +#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */ +#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */ +#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */ +#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */ +#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */ +#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */ +#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */ +#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */ +#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */ +#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */ +#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */ + +#define MAX_ENTROPY_TOKENS 12 +#define ENTROPY_NODES 11 + +extern const vp8_tree_index vp8_coef_tree[]; + +extern const struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS]; + +typedef struct +{ + vp8_tree_p tree; + const vp8_prob *prob; + int Len; + int base_val; +} vp8_extra_bit_struct; + +extern const vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ + +#define PROB_UPDATE_BASELINE_COST 7 + +#define MAX_PROB 255 +#define DCT_MAX_VALUE 2048 + + +/* Coefficients are predicted via a 3-dimensional probability table. */ + +/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */ + +#define BLOCK_TYPES 4 + +/* Middle dimension is a coarsening of the coefficient's + position within the 4x4 DCT. */ + +#define COEF_BANDS 8 +extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); + +/* Inside dimension is 3-valued measure of nearby complexity, that is, + the extent to which nearby coefficients are nonzero. For the first + coefficient (DC, unless block type is 0), we look at the (already encoded) + blocks above and to the left of the current block. The context index is + then the number (0,1,or 2) of these blocks having nonzero coefficients. + After decoding a coefficient, the measure is roughly the size of the + most recently decoded coefficient (0 for 0, 1 for 1, 2 for >1). + Note that the intuitive meaning of this measure changes as coefficients + are decoded, e.g., prior to the first token, a zero means that my neighbors + are empty while, after the first token, because of the use of end-of-block, + a zero means we just decoded a zero and hence guarantees that a non-zero + coefficient will appear later in this block. However, this shift + in meaning is perfectly OK because our context depends also on the + coefficient band (and since zigzag positions 0, 1, and 2 are in + distinct bands). */ + +/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */ +# define PREV_COEF_CONTEXTS 3 + +extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]); + +extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + + +struct VP8Common; +void vp8_default_coef_probs(struct VP8Common *); + +extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]); +extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]); +extern DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]); +extern const int vp8_mb_feature_data_bits[MB_LVL_MAX]; + +void vp8_coef_tree_initialize(void); +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_ENTROPY_H_ diff --git a/thirdparty/libvpx/vp8/common/entropymode.c b/thirdparty/libvpx/vp8/common/entropymode.c new file mode 100644 index 00000000000..8981a8d3c2a --- /dev/null +++ b/thirdparty/libvpx/vp8/common/entropymode.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#define USE_PREBUILT_TABLES + +#include "entropymode.h" +#include "entropy.h" +#include "vpx_mem/vpx_mem.h" + +#include "vp8_entropymodedata.h" + +int vp8_mv_cont(const int_mv *l, const int_mv *a) +{ + int lez = (l->as_int == 0); + int aez = (a->as_int == 0); + int lea = (l->as_int == a->as_int); + + if (lea && lez) + return SUBMVREF_LEFT_ABOVE_ZED; + + if (lea) + return SUBMVREF_LEFT_ABOVE_SAME; + + if (aez) + return SUBMVREF_ABOVE_ZED; + + if (lez) + return SUBMVREF_LEFT_ZED; + + return SUBMVREF_NORMAL; +} + +static const vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1] = { 180, 162, 25}; + +const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1] = +{ + { 147, 136, 18 }, + { 106, 145, 1 }, + { 179, 121, 1 }, + { 223, 1 , 34 }, + { 208, 1 , 1 } +}; + + + +const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS] = +{ + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 1, 1, 1, 1, + 1, 1, 1, 1, + }, + { + 0, 0, 1, 1, + 0, 0, 1, 1, + 0, 0, 1, 1, + 0, 0, 1, 1, + }, + { + 0, 0, 1, 1, + 0, 0, 1, 1, + 2, 2, 3, 3, + 2, 2, 3, 3, + }, + { + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15, + } +}; + +const int vp8_mbsplit_count [VP8_NUMMBSPLITS] = { 2, 2, 4, 16}; + +const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1] = { 110, 111, 150}; + + +/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ + +const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */ +{ + -B_DC_PRED, 2, /* 0 = DC_NODE */ + -B_TM_PRED, 4, /* 1 = TM_NODE */ + -B_VE_PRED, 6, /* 2 = VE_NODE */ + 8, 12, /* 3 = COM_NODE */ + -B_HE_PRED, 10, /* 4 = HE_NODE */ + -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */ + -B_LD_PRED, 14, /* 6 = LD_NODE */ + -B_VL_PRED, 16, /* 7 = VL_NODE */ + -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */ +}; + +/* Again, these trees use the same probability indices as their + explicitly-programmed predecessors. */ + +const vp8_tree_index vp8_ymode_tree[8] = +{ + -DC_PRED, 2, + 4, 6, + -V_PRED, -H_PRED, + -TM_PRED, -B_PRED +}; + +const vp8_tree_index vp8_kf_ymode_tree[8] = +{ + -B_PRED, 2, + 4, 6, + -DC_PRED, -V_PRED, + -H_PRED, -TM_PRED +}; + +const vp8_tree_index vp8_uv_mode_tree[6] = +{ + -DC_PRED, 2, + -V_PRED, 4, + -H_PRED, -TM_PRED +}; + +const vp8_tree_index vp8_mbsplit_tree[6] = +{ + -3, 2, + -2, 4, + -0, -1 +}; + +const vp8_tree_index vp8_mv_ref_tree[8] = +{ + -ZEROMV, 2, + -NEARESTMV, 4, + -NEARMV, 6, + -NEWMV, -SPLITMV +}; + +const vp8_tree_index vp8_sub_mv_ref_tree[6] = +{ + -LEFT4X4, 2, + -ABOVE4X4, 4, + -ZERO4X4, -NEW4X4 +}; + +const vp8_tree_index vp8_small_mvtree [14] = +{ + 2, 8, + 4, 6, + -0, -1, + -2, -3, + 10, 12, + -4, -5, + -6, -7 +}; + +void vp8_init_mbmode_probs(VP8_COMMON *x) +{ + memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob)); + memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob)); + memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob)); +} + +void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1]) +{ + memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob)); +} + diff --git a/thirdparty/libvpx/vp8/common/entropymode.h b/thirdparty/libvpx/vp8/common/entropymode.h new file mode 100644 index 00000000000..81bdfc4b8bd --- /dev/null +++ b/thirdparty/libvpx/vp8/common/entropymode.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_ENTROPYMODE_H_ +#define VP8_COMMON_ENTROPYMODE_H_ + +#include "onyxc_int.h" +#include "treecoder.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum +{ + SUBMVREF_NORMAL, + SUBMVREF_LEFT_ZED, + SUBMVREF_ABOVE_ZED, + SUBMVREF_LEFT_ABOVE_SAME, + SUBMVREF_LEFT_ABOVE_ZED +} sumvfref_t; + +typedef int vp8_mbsplit[16]; + +#define VP8_NUMMBSPLITS 4 + +extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS]; + +extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */ + +extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1]; + +extern int vp8_mv_cont(const int_mv *l, const int_mv *a); +#define SUBMVREF_COUNT 5 +extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1]; + + +extern const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES]; + + +extern const vp8_tree_index vp8_bmode_tree[]; + +extern const vp8_tree_index vp8_ymode_tree[]; +extern const vp8_tree_index vp8_kf_ymode_tree[]; +extern const vp8_tree_index vp8_uv_mode_tree[]; + +extern const vp8_tree_index vp8_mbsplit_tree[]; +extern const vp8_tree_index vp8_mv_ref_tree[]; +extern const vp8_tree_index vp8_sub_mv_ref_tree[]; + +extern const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES]; +extern const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES]; +extern const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES]; +extern const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES]; +extern const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS]; + +/* Inter mode values do not start at zero */ + +extern const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS]; +extern const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS]; + +extern const vp8_tree_index vp8_small_mvtree[]; + +extern const struct vp8_token_struct vp8_small_mvencodings[8]; + +/* Key frame default mode probs */ +extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES] +[VP8_BINTRAMODES-1]; +extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1]; +extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1]; + +void vp8_init_mbmode_probs(VP8_COMMON *x); +void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]); +void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_ENTROPYMODE_H_ diff --git a/thirdparty/libvpx/vp8/common/entropymv.c b/thirdparty/libvpx/vp8/common/entropymv.c new file mode 100644 index 00000000000..e5df1f09555 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/entropymv.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "entropymv.h" + +const MV_CONTEXT vp8_mv_update_probs[2] = +{ + {{ + 237, + 246, + 253, 253, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 250, 250, 252, 254, 254 + }}, + {{ + 231, + 243, + 245, 253, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 251, 251, 254, 254, 254 + }} +}; +const MV_CONTEXT vp8_default_mv_context[2] = +{ + {{ + /* row */ + 162, /* is short */ + 128, /* sign */ + 225, 146, 172, 147, 214, 39, 156, /* short tree */ + 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */ + }}, + + + + {{ + /* same for column */ + 164, /* is short */ + 128, + 204, 170, 119, 235, 140, 230, 228, + 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */ + + }} +}; diff --git a/thirdparty/libvpx/vp8/common/entropymv.h b/thirdparty/libvpx/vp8/common/entropymv.h new file mode 100644 index 00000000000..42840d58ad2 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/entropymv.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_ENTROPYMV_H_ +#define VP8_COMMON_ENTROPYMV_H_ + +#include "treecoder.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum +{ + mv_max = 1023, /* max absolute value of a MV component */ + MVvals = (2 * mv_max) + 1, /* # possible values "" */ + mvfp_max = 255, /* max absolute value of a full pixel MV component */ + MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */ + + mvlong_width = 10, /* Large MVs have 9 bit magnitudes */ + mvnum_short = 8, /* magnitudes 0 through 7 */ + + /* probability offsets for coding each MV component */ + + mvpis_short = 0, /* short (<= 7) vs long (>= 8) */ + MVPsign, /* sign for non-zero */ + MVPshort, /* 8 short values = 7-position tree */ + + MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */ + MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */ +}; + +typedef struct mv_context +{ + vp8_prob prob[MVPcount]; /* often come in row, col pairs */ +} MV_CONTEXT; + +extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2]; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_ENTROPYMV_H_ diff --git a/thirdparty/libvpx/vp8/common/extend.c b/thirdparty/libvpx/vp8/common/extend.c new file mode 100644 index 00000000000..2d938ad7825 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/extend.c @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "extend.h" +#include "vpx_mem/vpx_mem.h" + + +static void copy_and_extend_plane +( + unsigned char *s, /* source */ + int sp, /* source pitch */ + unsigned char *d, /* destination */ + int dp, /* destination pitch */ + int h, /* height */ + int w, /* width */ + int et, /* extend top border */ + int el, /* extend left border */ + int eb, /* extend bottom border */ + int er /* extend right border */ +) +{ + int i; + unsigned char *src_ptr1, *src_ptr2; + unsigned char *dest_ptr1, *dest_ptr2; + int linesize; + + /* copy the left and right most columns out */ + src_ptr1 = s; + src_ptr2 = s + w - 1; + dest_ptr1 = d - el; + dest_ptr2 = d + w; + + for (i = 0; i < h; i++) + { + memset(dest_ptr1, src_ptr1[0], el); + memcpy(dest_ptr1 + el, src_ptr1, w); + memset(dest_ptr2, src_ptr2[0], er); + src_ptr1 += sp; + src_ptr2 += sp; + dest_ptr1 += dp; + dest_ptr2 += dp; + } + + /* Now copy the top and bottom lines into each line of the respective + * borders + */ + src_ptr1 = d - el; + src_ptr2 = d + dp * (h - 1) - el; + dest_ptr1 = d + dp * (-et) - el; + dest_ptr2 = d + dp * (h) - el; + linesize = el + er + w; + + for (i = 0; i < et; i++) + { + memcpy(dest_ptr1, src_ptr1, linesize); + dest_ptr1 += dp; + } + + for (i = 0; i < eb; i++) + { + memcpy(dest_ptr2, src_ptr2, linesize); + dest_ptr2 += dp; + } +} + + +void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst) +{ + int et = dst->border; + int el = dst->border; + int eb = dst->border + dst->y_height - src->y_height; + int er = dst->border + dst->y_width - src->y_width; + + copy_and_extend_plane(src->y_buffer, src->y_stride, + dst->y_buffer, dst->y_stride, + src->y_height, src->y_width, + et, el, eb, er); + + et = dst->border >> 1; + el = dst->border >> 1; + eb = (dst->border >> 1) + dst->uv_height - src->uv_height; + er = (dst->border >> 1) + dst->uv_width - src->uv_width; + + copy_and_extend_plane(src->u_buffer, src->uv_stride, + dst->u_buffer, dst->uv_stride, + src->uv_height, src->uv_width, + et, el, eb, er); + + copy_and_extend_plane(src->v_buffer, src->uv_stride, + dst->v_buffer, dst->uv_stride, + src->uv_height, src->uv_width, + et, el, eb, er); +} + + +void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, + int srcy, int srcx, + int srch, int srcw) +{ + int et = dst->border; + int el = dst->border; + int eb = dst->border + dst->y_height - src->y_height; + int er = dst->border + dst->y_width - src->y_width; + int src_y_offset = srcy * src->y_stride + srcx; + int dst_y_offset = srcy * dst->y_stride + srcx; + int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); + int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); + + /* If the side is not touching the bounder then don't extend. */ + if (srcy) + et = 0; + if (srcx) + el = 0; + if (srcy + srch != src->y_height) + eb = 0; + if (srcx + srcw != src->y_width) + er = 0; + + copy_and_extend_plane(src->y_buffer + src_y_offset, + src->y_stride, + dst->y_buffer + dst_y_offset, + dst->y_stride, + srch, srcw, + et, el, eb, er); + + et = (et + 1) >> 1; + el = (el + 1) >> 1; + eb = (eb + 1) >> 1; + er = (er + 1) >> 1; + srch = (srch + 1) >> 1; + srcw = (srcw + 1) >> 1; + + copy_and_extend_plane(src->u_buffer + src_uv_offset, + src->uv_stride, + dst->u_buffer + dst_uv_offset, + dst->uv_stride, + srch, srcw, + et, el, eb, er); + + copy_and_extend_plane(src->v_buffer + src_uv_offset, + src->uv_stride, + dst->v_buffer + dst_uv_offset, + dst->uv_stride, + srch, srcw, + et, el, eb, er); +} + + +/* note the extension is only for the last row, for intra prediction purpose */ +void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, + unsigned char *YPtr, + unsigned char *UPtr, + unsigned char *VPtr) +{ + int i; + + YPtr += ybf->y_stride * 14; + UPtr += ybf->uv_stride * 6; + VPtr += ybf->uv_stride * 6; + + for (i = 0; i < 4; i++) + { + YPtr[i] = YPtr[-1]; + UPtr[i] = UPtr[-1]; + VPtr[i] = VPtr[-1]; + } + + YPtr += ybf->y_stride; + UPtr += ybf->uv_stride; + VPtr += ybf->uv_stride; + + for (i = 0; i < 4; i++) + { + YPtr[i] = YPtr[-1]; + UPtr[i] = UPtr[-1]; + VPtr[i] = VPtr[-1]; + } +} diff --git a/thirdparty/libvpx/vp8/common/extend.h b/thirdparty/libvpx/vp8/common/extend.h new file mode 100644 index 00000000000..068f4ac5236 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/extend.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_EXTEND_H_ +#define VP8_COMMON_EXTEND_H_ + +#include "vpx_scale/yv12config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr); +void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst); +void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, + int srcy, int srcx, + int srch, int srcw); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_EXTEND_H_ diff --git a/thirdparty/libvpx/vp8/common/filter.c b/thirdparty/libvpx/vp8/common/filter.c new file mode 100644 index 00000000000..84c608effaa --- /dev/null +++ b/thirdparty/libvpx/vp8/common/filter.c @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "filter.h" +#include "./vp8_rtcd.h" + +DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = +{ + { 128, 0 }, + { 112, 16 }, + { 96, 32 }, + { 80, 48 }, + { 64, 64 }, + { 48, 80 }, + { 32, 96 }, + { 16, 112 } +}; + +DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = +{ + + { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ + { 0, -6, 123, 12, -1, 0 }, + { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ + { 0, -9, 93, 50, -6, 0 }, + { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ + { 0, -6, 50, 93, -9, 0 }, + { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ + { 0, -1, 12, 123, -6, 0 }, +}; + +static void filter_block2d_first_pass +( + unsigned char *src_ptr, + int *output_ptr, + unsigned int src_pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +) +{ + unsigned int i, j; + int Temp; + + for (i = 0; i < output_height; i++) + { + for (j = 0; j < output_width; j++) + { + Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + + ((int)src_ptr[0] * vp8_filter[2]) + + ((int)src_ptr[pixel_step] * vp8_filter[3]) + + ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ + + /* Normalize back to 0-255 */ + Temp = Temp >> VP8_FILTER_SHIFT; + + if (Temp < 0) + Temp = 0; + else if (Temp > 255) + Temp = 255; + + output_ptr[j] = Temp; + src_ptr++; + } + + /* Next row... */ + src_ptr += src_pixels_per_line - output_width; + output_ptr += output_width; + } +} + +static void filter_block2d_second_pass +( + int *src_ptr, + unsigned char *output_ptr, + int output_pitch, + unsigned int src_pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +) +{ + unsigned int i, j; + int Temp; + + for (i = 0; i < output_height; i++) + { + for (j = 0; j < output_width; j++) + { + /* Apply filter */ + Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + + ((int)src_ptr[0] * vp8_filter[2]) + + ((int)src_ptr[pixel_step] * vp8_filter[3]) + + ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ + + /* Normalize back to 0-255 */ + Temp = Temp >> VP8_FILTER_SHIFT; + + if (Temp < 0) + Temp = 0; + else if (Temp > 255) + Temp = 255; + + output_ptr[j] = (unsigned char)Temp; + src_ptr++; + } + + /* Start next row */ + src_ptr += src_pixels_per_line - output_width; + output_ptr += output_pitch; + } +} + + +static void filter_block2d +( + unsigned char *src_ptr, + unsigned char *output_ptr, + unsigned int src_pixels_per_line, + int output_pitch, + const short *HFilter, + const short *VFilter +) +{ + int FData[9*4]; /* Temp data buffer used in filtering */ + + /* First filter 1-D horizontally... */ + filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); + + /* then filter verticaly... */ + filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); +} + + +void vp8_sixtap_predict4x4_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ + + filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); +} +void vp8_sixtap_predict8x8_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + int FData[13*16]; /* Temp data buffer used in filtering */ + + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ + + /* First filter 1-D horizontally... */ + filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); + + + /* then filter verticaly... */ + filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); + +} + +void vp8_sixtap_predict8x4_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + int FData[13*16]; /* Temp data buffer used in filtering */ + + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ + + /* First filter 1-D horizontally... */ + filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); + + + /* then filter verticaly... */ + filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); + +} + +void vp8_sixtap_predict16x16_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + int FData[21*24]; /* Temp data buffer used in filtering */ + + + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ + + /* First filter 1-D horizontally... */ + filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); + + /* then filter verticaly... */ + filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); + +} + + +/**************************************************************************** + * + * ROUTINE : filter_block2d_bil_first_pass + * + * INPUTS : UINT8 *src_ptr : Pointer to source block. + * UINT32 src_stride : Stride of source block. + * UINT32 height : Block height. + * UINT32 width : Block width. + * INT32 *vp8_filter : Array of 2 bi-linear filter taps. + * + * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block. + * + * RETURNS : void + * + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block + * in the horizontal direction to produce the filtered output + * block. Used to implement first-pass of 2-D separable filter. + * + * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. + * Two filter taps should sum to VP8_FILTER_WEIGHT. + * + ****************************************************************************/ +static void filter_block2d_bil_first_pass +( + unsigned char *src_ptr, + unsigned short *dst_ptr, + unsigned int src_stride, + unsigned int height, + unsigned int width, + const short *vp8_filter +) +{ + unsigned int i, j; + + for (i = 0; i < height; i++) + { + for (j = 0; j < width; j++) + { + /* Apply bilinear filter */ + dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + + ((int)src_ptr[1] * vp8_filter[1]) + + (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; + src_ptr++; + } + + /* Next row... */ + src_ptr += src_stride - width; + dst_ptr += width; + } +} + +/**************************************************************************** + * + * ROUTINE : filter_block2d_bil_second_pass + * + * INPUTS : INT32 *src_ptr : Pointer to source block. + * UINT32 dst_pitch : Destination block pitch. + * UINT32 height : Block height. + * UINT32 width : Block width. + * INT32 *vp8_filter : Array of 2 bi-linear filter taps. + * + * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. + * + * RETURNS : void + * + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block + * in the vertical direction to produce the filtered output + * block. Used to implement second-pass of 2-D separable filter. + * + * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. + * Two filter taps should sum to VP8_FILTER_WEIGHT. + * + ****************************************************************************/ +static void filter_block2d_bil_second_pass +( + unsigned short *src_ptr, + unsigned char *dst_ptr, + int dst_pitch, + unsigned int height, + unsigned int width, + const short *vp8_filter +) +{ + unsigned int i, j; + int Temp; + + for (i = 0; i < height; i++) + { + for (j = 0; j < width; j++) + { + /* Apply filter */ + Temp = ((int)src_ptr[0] * vp8_filter[0]) + + ((int)src_ptr[width] * vp8_filter[1]) + + (VP8_FILTER_WEIGHT / 2); + dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); + src_ptr++; + } + + /* Next row... */ + dst_ptr += dst_pitch; + } +} + + +/**************************************************************************** + * + * ROUTINE : filter_block2d_bil + * + * INPUTS : UINT8 *src_ptr : Pointer to source block. + * UINT32 src_pitch : Stride of source block. + * UINT32 dst_pitch : Stride of destination block. + * INT32 *HFilter : Array of 2 horizontal filter taps. + * INT32 *VFilter : Array of 2 vertical filter taps. + * INT32 Width : Block width + * INT32 Height : Block height + * + * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. + * + * RETURNS : void + * + * FUNCTION : 2-D filters an input block by applying a 2-tap + * bi-linear filter horizontally followed by a 2-tap + * bi-linear filter vertically on the result. + * + * SPECIAL NOTES : The largest block size can be handled here is 16x16 + * + ****************************************************************************/ +static void filter_block2d_bil +( + unsigned char *src_ptr, + unsigned char *dst_ptr, + unsigned int src_pitch, + unsigned int dst_pitch, + const short *HFilter, + const short *VFilter, + int Width, + int Height +) +{ + + unsigned short FData[17*16]; /* Temp data buffer used in filtering */ + + /* First filter 1-D horizontally... */ + filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); + + /* then 1-D vertically... */ + filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); +} + + +void vp8_bilinear_predict4x4_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; +#if 0 + { + int i; + unsigned char temp1[16]; + unsigned char temp2[16]; + + bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); + filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); + + for (i = 0; i < 16; i++) + { + if (temp1[i] != temp2[i]) + { + bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); + filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); + } + } + } +#endif + filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); + +} + +void vp8_bilinear_predict8x8_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); + +} + +void vp8_bilinear_predict8x4_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); + +} + +void vp8_bilinear_predict16x16_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); +} diff --git a/thirdparty/libvpx/vp8/common/filter.h b/thirdparty/libvpx/vp8/common/filter.h new file mode 100644 index 00000000000..cfba775fce4 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/filter.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_FILTER_H_ +#define VP8_COMMON_FILTER_H_ + +#include "vpx_ports/mem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define BLOCK_HEIGHT_WIDTH 4 +#define VP8_FILTER_WEIGHT 128 +#define VP8_FILTER_SHIFT 7 + +extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]); +extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_FILTER_H_ diff --git a/thirdparty/libvpx/vp8/common/findnearmv.c b/thirdparty/libvpx/vp8/common/findnearmv.c new file mode 100644 index 00000000000..e8ee40f56c6 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/findnearmv.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "findnearmv.h" + +const unsigned char vp8_mbsplit_offset[4][16] = { + { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} +}; + +/* Predict motion vectors using those from already-decoded nearby blocks. + Note that we only consider one 4x4 subblock from each candidate 16x16 + macroblock. */ +void vp8_find_near_mvs +( + MACROBLOCKD *xd, + const MODE_INFO *here, + int_mv *nearest, + int_mv *nearby, + int_mv *best_mv, + int cnt[4], + int refframe, + int *ref_frame_sign_bias +) +{ + const MODE_INFO *above = here - xd->mode_info_stride; + const MODE_INFO *left = here - 1; + const MODE_INFO *aboveleft = above - 1; + int_mv near_mvs[4]; + int_mv *mv = near_mvs; + int *cntx = cnt; + enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV}; + + /* Zero accumulators */ + mv[0].as_int = mv[1].as_int = mv[2].as_int = 0; + cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0; + + /* Process above */ + if (above->mbmi.ref_frame != INTRA_FRAME) + { + if (above->mbmi.mv.as_int) + { + (++mv)->as_int = above->mbmi.mv.as_int; + mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias); + ++cntx; + } + + *cntx += 2; + } + + /* Process left */ + if (left->mbmi.ref_frame != INTRA_FRAME) + { + if (left->mbmi.mv.as_int) + { + int_mv this_mv; + + this_mv.as_int = left->mbmi.mv.as_int; + mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias); + + if (this_mv.as_int != mv->as_int) + { + (++mv)->as_int = this_mv.as_int; + ++cntx; + } + + *cntx += 2; + } + else + cnt[CNT_INTRA] += 2; + } + + /* Process above left */ + if (aboveleft->mbmi.ref_frame != INTRA_FRAME) + { + if (aboveleft->mbmi.mv.as_int) + { + int_mv this_mv; + + this_mv.as_int = aboveleft->mbmi.mv.as_int; + mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias); + + if (this_mv.as_int != mv->as_int) + { + (++mv)->as_int = this_mv.as_int; + ++cntx; + } + + *cntx += 1; + } + else + cnt[CNT_INTRA] += 1; + } + + /* If we have three distinct MV's ... */ + if (cnt[CNT_SPLITMV]) + { + /* See if above-left MV can be merged with NEAREST */ + if (mv->as_int == near_mvs[CNT_NEAREST].as_int) + cnt[CNT_NEAREST] += 1; + } + + cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV) + + (left->mbmi.mode == SPLITMV)) * 2 + + (aboveleft->mbmi.mode == SPLITMV); + + /* Swap near and nearest if necessary */ + if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) + { + int tmp; + tmp = cnt[CNT_NEAREST]; + cnt[CNT_NEAREST] = cnt[CNT_NEAR]; + cnt[CNT_NEAR] = tmp; + tmp = near_mvs[CNT_NEAREST].as_int; + near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int; + near_mvs[CNT_NEAR].as_int = tmp; + } + + /* Use near_mvs[0] to store the "best" MV */ + if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA]) + near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST]; + + /* Set up return values */ + best_mv->as_int = near_mvs[0].as_int; + nearest->as_int = near_mvs[CNT_NEAREST].as_int; + nearby->as_int = near_mvs[CNT_NEAR].as_int; +} + + +static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd) +{ + inv->as_mv.row = src->as_mv.row * -1; + inv->as_mv.col = src->as_mv.col * -1; + vp8_clamp_mv2(inv, xd); + vp8_clamp_mv2(src, xd); +} + + +int vp8_find_near_mvs_bias +( + MACROBLOCKD *xd, + const MODE_INFO *here, + int_mv mode_mv_sb[2][MB_MODE_COUNT], + int_mv best_mv_sb[2], + int cnt[4], + int refframe, + int *ref_frame_sign_bias +) +{ + int sign_bias = ref_frame_sign_bias[refframe]; + + vp8_find_near_mvs(xd, + here, + &mode_mv_sb[sign_bias][NEARESTMV], + &mode_mv_sb[sign_bias][NEARMV], + &best_mv_sb[sign_bias], + cnt, + refframe, + ref_frame_sign_bias); + + invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV], + &mode_mv_sb[sign_bias][NEARESTMV], xd); + invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV], + &mode_mv_sb[sign_bias][NEARMV], xd); + invert_and_clamp_mvs(&best_mv_sb[!sign_bias], + &best_mv_sb[sign_bias], xd); + + return sign_bias; +} + + +vp8_prob *vp8_mv_ref_probs( + vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4] +) +{ + p[0] = vp8_mode_contexts [near_mv_ref_ct[0]] [0]; + p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1]; + p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2]; + p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3]; + /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/ + return p; +} + diff --git a/thirdparty/libvpx/vp8/common/findnearmv.h b/thirdparty/libvpx/vp8/common/findnearmv.h new file mode 100644 index 00000000000..472a7b5d8da --- /dev/null +++ b/thirdparty/libvpx/vp8/common/findnearmv.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_FINDNEARMV_H_ +#define VP8_COMMON_FINDNEARMV_H_ + +#include "./vpx_config.h" +#include "mv.h" +#include "blockd.h" +#include "modecont.h" +#include "treecoder.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe, + int_mv *mvp, const int *ref_frame_sign_bias) +{ + if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) + { + mvp->as_mv.row *= -1; + mvp->as_mv.col *= -1; + } +} + +#define LEFT_TOP_MARGIN (16 << 3) +#define RIGHT_BOTTOM_MARGIN (16 << 3) +static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) +{ + if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN)) + mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN; + else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN) + mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; + + if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN)) + mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN; + else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN) + mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; +} + +static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge, + int mb_to_right_edge, int mb_to_top_edge, + int mb_to_bottom_edge) +{ + mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ? + mb_to_left_edge : mv->as_mv.col; + mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ? + mb_to_right_edge : mv->as_mv.col; + mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ? + mb_to_top_edge : mv->as_mv.row; + mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ? + mb_to_bottom_edge : mv->as_mv.row; +} +static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge, + int mb_to_right_edge, + int mb_to_top_edge, + int mb_to_bottom_edge) +{ + unsigned int need_to_clamp; + need_to_clamp = (mv->as_mv.col < mb_to_left_edge); + need_to_clamp |= (mv->as_mv.col > mb_to_right_edge); + need_to_clamp |= (mv->as_mv.row < mb_to_top_edge); + need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge); + return need_to_clamp; +} + +void vp8_find_near_mvs +( + MACROBLOCKD *xd, + const MODE_INFO *here, + int_mv *nearest, int_mv *nearby, int_mv *best, + int near_mv_ref_cts[4], + int refframe, + int *ref_frame_sign_bias +); + + +int vp8_find_near_mvs_bias +( + MACROBLOCKD *xd, + const MODE_INFO *here, + int_mv mode_mv_sb[2][MB_MODE_COUNT], + int_mv best_mv_sb[2], + int cnt[4], + int refframe, + int *ref_frame_sign_bias +); + + +vp8_prob *vp8_mv_ref_probs( + vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4] +); + +extern const unsigned char vp8_mbsplit_offset[4][16]; + + +static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b) +{ + if (!(b & 3)) + { + /* On L edge, get from MB to left of us */ + --cur_mb; + + if(cur_mb->mbmi.mode != SPLITMV) + return cur_mb->mbmi.mv.as_int; + b += 4; + } + + return (cur_mb->bmi + b - 1)->mv.as_int; +} + +static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b, + int mi_stride) +{ + if (!(b >> 2)) + { + /* On top edge, get from MB above us */ + cur_mb -= mi_stride; + + if(cur_mb->mbmi.mode != SPLITMV) + return cur_mb->mbmi.mv.as_int; + b += 16; + } + + return (cur_mb->bmi + (b - 4))->mv.as_int; +} +static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) +{ + if (!(b & 3)) + { + /* On L edge, get from MB to left of us */ + --cur_mb; + switch (cur_mb->mbmi.mode) + { + case B_PRED: + return (cur_mb->bmi + b + 3)->as_mode; + case DC_PRED: + return B_DC_PRED; + case V_PRED: + return B_VE_PRED; + case H_PRED: + return B_HE_PRED; + case TM_PRED: + return B_TM_PRED; + default: + return B_DC_PRED; + } + } + + return (cur_mb->bmi + b - 1)->as_mode; +} + +static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, + int mi_stride) +{ + if (!(b >> 2)) + { + /* On top edge, get from MB above us */ + cur_mb -= mi_stride; + + switch (cur_mb->mbmi.mode) + { + case B_PRED: + return (cur_mb->bmi + b + 12)->as_mode; + case DC_PRED: + return B_DC_PRED; + case V_PRED: + return B_VE_PRED; + case H_PRED: + return B_HE_PRED; + case TM_PRED: + return B_TM_PRED; + default: + return B_DC_PRED; + } + } + + return (cur_mb->bmi + b - 4)->as_mode; +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_FINDNEARMV_H_ diff --git a/thirdparty/libvpx/vp8/common/generic/systemdependent.c b/thirdparty/libvpx/vp8/common/generic/systemdependent.c new file mode 100644 index 00000000000..6d5f302d7a4 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/generic/systemdependent.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#if ARCH_ARM +#include "vpx_ports/arm.h" +#elif ARCH_X86 || ARCH_X86_64 +#include "vpx_ports/x86.h" +#endif +#include "vp8/common/onyxc_int.h" +#include "vp8/common/systemdependent.h" + +#if CONFIG_MULTITHREAD +#if HAVE_UNISTD_H && !defined(__OS2__) +#include +#elif defined(_WIN32) +#include +typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO); +#elif defined(__OS2__) +#define INCL_DOS +#define INCL_DOSSPINLOCK +#include +#endif +#endif + +#if CONFIG_MULTITHREAD +static int get_cpu_count() +{ + int core_count = 16; + +#if HAVE_UNISTD_H && !defined(__OS2__) +#if defined(_SC_NPROCESSORS_ONLN) + core_count = sysconf(_SC_NPROCESSORS_ONLN); +#elif defined(_SC_NPROC_ONLN) + core_count = sysconf(_SC_NPROC_ONLN); +#endif +#elif defined(_WIN32) + { +#if _WIN32_WINNT >= 0x0501 + SYSTEM_INFO sysinfo; + GetNativeSystemInfo(&sysinfo); +#else + PGNSI pGNSI; + SYSTEM_INFO sysinfo; + + /* Call GetNativeSystemInfo if supported or + * GetSystemInfo otherwise. */ + + pGNSI = (PGNSI) GetProcAddress( + GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo"); + if (pGNSI != NULL) + pGNSI(&sysinfo); + else + GetSystemInfo(&sysinfo); +#endif + + core_count = sysinfo.dwNumberOfProcessors; + } +#elif defined(__OS2__) + { + ULONG proc_id; + ULONG status; + + core_count = 0; + for (proc_id = 1; ; proc_id++) + { + if (DosGetProcessorStatus(proc_id, &status)) + break; + + if (status == PROC_ONLINE) + core_count++; + } + } +#else + /* other platforms */ +#endif + + return core_count > 0 ? core_count : 1; +} +#endif + +void vp8_clear_system_state_c() {}; + +void vp8_machine_specific_config(VP8_COMMON *ctx) +{ +#if CONFIG_MULTITHREAD + ctx->processor_core_count = get_cpu_count(); +#else + (void)ctx; +#endif /* CONFIG_MULTITHREAD */ + +#if ARCH_ARM + ctx->cpu_caps = arm_cpu_caps(); +#elif ARCH_X86 || ARCH_X86_64 + ctx->cpu_caps = x86_simd_caps(); +#endif +} diff --git a/thirdparty/libvpx/vp8/common/header.h b/thirdparty/libvpx/vp8/common/header.h new file mode 100644 index 00000000000..e27bca16bd7 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/header.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_HEADER_H_ +#define VP8_COMMON_HEADER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* 24 bits total */ +typedef struct +{ + unsigned int type: 1; + unsigned int version: 3; + unsigned int show_frame: 1; + + /* Allow 2^20 bytes = 8 megabits for first partition */ + + unsigned int first_partition_length_in_bytes: 19; + +#ifdef PACKET_TESTING + unsigned int frame_number; + unsigned int update_gold: 1; + unsigned int uses_gold: 1; + unsigned int update_last: 1; + unsigned int uses_last: 1; +#endif + +} VP8_HEADER; + +#ifdef PACKET_TESTING +#define VP8_HEADER_SIZE 8 +#else +#define VP8_HEADER_SIZE 3 +#endif + + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_HEADER_H_ diff --git a/thirdparty/libvpx/vp8/common/idct_blk.c b/thirdparty/libvpx/vp8/common/idct_blk.c new file mode 100644 index 00000000000..8aa7d9bf0ff --- /dev/null +++ b/thirdparty/libvpx/vp8/common/idct_blk.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#include "vpx_mem/vpx_mem.h" + +void vp8_dequant_idct_add_c(short *input, short *dq, + unsigned char *dest, int stride); +void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred, + int pred_stride, unsigned char *dst_ptr, + int dst_stride); + +void vp8_dequant_idct_add_y_block_c + (short *q, short *dq, + unsigned char *dst, int stride, char *eobs) +{ + int i, j; + + for (i = 0; i < 4; i++) + { + for (j = 0; j < 4; j++) + { + if (*eobs++ > 1) + vp8_dequant_idct_add_c (q, dq, dst, stride); + else + { + vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride); + memset(q, 0, 2 * sizeof(q[0])); + } + + q += 16; + dst += 4; + } + + dst += 4*stride - 16; + } +} + +void vp8_dequant_idct_add_uv_block_c + (short *q, short *dq, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) +{ + int i, j; + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + if (*eobs++ > 1) + vp8_dequant_idct_add_c (q, dq, dstu, stride); + else + { + vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride); + memset(q, 0, 2 * sizeof(q[0])); + } + + q += 16; + dstu += 4; + } + + dstu += 4*stride - 8; + } + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + if (*eobs++ > 1) + vp8_dequant_idct_add_c (q, dq, dstv, stride); + else + { + vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride); + memset(q, 0, 2 * sizeof(q[0])); + } + + q += 16; + dstv += 4; + } + + dstv += 4*stride - 8; + } +} diff --git a/thirdparty/libvpx/vp8/common/idctllm.c b/thirdparty/libvpx/vp8/common/idctllm.c new file mode 100644 index 00000000000..f5403c5aaf7 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/idctllm.c @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vp8_rtcd.h" + +/**************************************************************************** + * Notes: + * + * This implementation makes use of 16 bit fixed point verio of two multiply + * constants: + * 1. sqrt(2) * cos (pi/8) + * 2. sqrt(2) * sin (pi/8) + * Becuase the first constant is bigger than 1, to maintain the same 16 bit + * fixed point precision as the second one, we use a trick of + * x * a = x + x*(a-1) + * so + * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). + **************************************************************************/ +static const int cospi8sqrt2minus1 = 20091; +static const int sinpi8sqrt2 = 35468; + +void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr, + int pred_stride, unsigned char *dst_ptr, + int dst_stride) +{ + int i; + int r, c; + int a1, b1, c1, d1; + short output[16]; + short *ip = input; + short *op = output; + int temp1, temp2; + int shortpitch = 4; + + for (i = 0; i < 4; i++) + { + a1 = ip[0] + ip[8]; + b1 = ip[0] - ip[8]; + + temp1 = (ip[4] * sinpi8sqrt2) >> 16; + temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); + c1 = temp1 - temp2; + + temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); + temp2 = (ip[12] * sinpi8sqrt2) >> 16; + d1 = temp1 + temp2; + + op[shortpitch*0] = a1 + d1; + op[shortpitch*3] = a1 - d1; + + op[shortpitch*1] = b1 + c1; + op[shortpitch*2] = b1 - c1; + + ip++; + op++; + } + + ip = output; + op = output; + + for (i = 0; i < 4; i++) + { + a1 = ip[0] + ip[2]; + b1 = ip[0] - ip[2]; + + temp1 = (ip[1] * sinpi8sqrt2) >> 16; + temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); + c1 = temp1 - temp2; + + temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); + temp2 = (ip[3] * sinpi8sqrt2) >> 16; + d1 = temp1 + temp2; + + + op[0] = (a1 + d1 + 4) >> 3; + op[3] = (a1 - d1 + 4) >> 3; + + op[1] = (b1 + c1 + 4) >> 3; + op[2] = (b1 - c1 + 4) >> 3; + + ip += shortpitch; + op += shortpitch; + } + + ip = output; + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int a = ip[c] + pred_ptr[c] ; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dst_ptr[c] = (unsigned char) a ; + } + ip += 4; + dst_ptr += dst_stride; + pred_ptr += pred_stride; + } +} + +void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, + int pred_stride, unsigned char *dst_ptr, + int dst_stride) +{ + int a1 = ((input_dc + 4) >> 3); + int r, c; + + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int a = a1 + pred_ptr[c] ; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dst_ptr[c] = (unsigned char) a ; + } + + dst_ptr += dst_stride; + pred_ptr += pred_stride; + } + +} + +void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff) +{ + short output[16]; + int i; + int a1, b1, c1, d1; + int a2, b2, c2, d2; + short *ip = input; + short *op = output; + + for (i = 0; i < 4; i++) + { + a1 = ip[0] + ip[12]; + b1 = ip[4] + ip[8]; + c1 = ip[4] - ip[8]; + d1 = ip[0] - ip[12]; + + op[0] = a1 + b1; + op[4] = c1 + d1; + op[8] = a1 - b1; + op[12] = d1 - c1; + ip++; + op++; + } + + ip = output; + op = output; + + for (i = 0; i < 4; i++) + { + a1 = ip[0] + ip[3]; + b1 = ip[1] + ip[2]; + c1 = ip[1] - ip[2]; + d1 = ip[0] - ip[3]; + + a2 = a1 + b1; + b2 = c1 + d1; + c2 = a1 - b1; + d2 = d1 - c1; + + op[0] = (a2 + 3) >> 3; + op[1] = (b2 + 3) >> 3; + op[2] = (c2 + 3) >> 3; + op[3] = (d2 + 3) >> 3; + + ip += 4; + op += 4; + } + + for(i = 0; i < 16; i++) + { + mb_dqcoeff[i * 16] = output[i]; + } +} + +void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff) +{ + int i; + int a1; + + a1 = ((input[0] + 3) >> 3); + for(i = 0; i < 16; i++) + { + mb_dqcoeff[i * 16] = a1; + } +} diff --git a/thirdparty/libvpx/vp8/common/invtrans.h b/thirdparty/libvpx/vp8/common/invtrans.h new file mode 100644 index 00000000000..9cfea8d5131 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/invtrans.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_INVTRANS_H_ +#define VP8_COMMON_INVTRANS_H_ + +#include "./vpx_config.h" +#include "vp8_rtcd.h" +#include "blockd.h" +#include "onyxc_int.h" + +#if CONFIG_MULTITHREAD +#include "vpx_mem/vpx_mem.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +static void eob_adjust(char *eobs, short *diff) +{ + /* eob adjust.... the idct can only skip if both the dc and eob are zero */ + int js; + for(js = 0; js < 16; js++) + { + if((eobs[js] == 0) && (diff[0] != 0)) + eobs[js]++; + diff+=16; + } +} + +static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd) +{ + short *DQC = xd->dequant_y1; + + if (xd->mode_info_context->mbmi.mode != SPLITMV) + { + /* do 2nd order transform on the dc block */ + if (xd->eobs[24] > 1) + { + vp8_short_inv_walsh4x4 + (&xd->block[24].dqcoeff[0], xd->qcoeff); + } + else + { + vp8_short_inv_walsh4x4_1 + (&xd->block[24].dqcoeff[0], xd->qcoeff); + } + eob_adjust(xd->eobs, xd->qcoeff); + + DQC = xd->dequant_y1_dc; + } + vp8_dequant_idct_add_y_block + (xd->qcoeff, DQC, + xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs); +} +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_INVTRANS_H_ diff --git a/thirdparty/libvpx/vp8/common/loopfilter.h b/thirdparty/libvpx/vp8/common/loopfilter.h new file mode 100644 index 00000000000..20a6bd375b6 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/loopfilter.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_LOOPFILTER_H_ +#define VP8_COMMON_LOOPFILTER_H_ + +#include "vpx_ports/mem.h" +#include "vpx_config.h" +#include "vp8_rtcd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_LOOP_FILTER 63 +/* fraction of total macroblock rows to be used in fast filter level picking */ +/* has to be > 2 */ +#define PARTIAL_FRAME_FRACTION 8 + +typedef enum +{ + NORMAL_LOOPFILTER = 0, + SIMPLE_LOOPFILTER = 1 +} LOOPFILTERTYPE; + +#if ARCH_ARM +#define SIMD_WIDTH 1 +#else +#define SIMD_WIDTH 16 +#endif + +/* Need to align this structure so when it is declared and + * passed it can be loaded into vector registers. + */ +typedef struct +{ + DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); + DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); + DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); + DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]); + unsigned char lvl[4][4][4]; + unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1]; + unsigned char mode_lf_lut[10]; +} loop_filter_info_n; + +typedef struct loop_filter_info +{ + const unsigned char * mblim; + const unsigned char * blim; + const unsigned char * lim; + const unsigned char * hev_thr; +} loop_filter_info; + + +typedef void loop_filter_uvfunction +( + unsigned char *u, /* source pointer */ + int p, /* pitch */ + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + unsigned char *v +); + +/* assorted loopfilter functions which get used elsewhere */ +struct VP8Common; +struct macroblockd; +struct modeinfo; + +void vp8_loop_filter_init(struct VP8Common *cm); + +void vp8_loop_filter_frame_init(struct VP8Common *cm, + struct macroblockd *mbd, + int default_filt_lvl); + +void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd, + int frame_type); + +void vp8_loop_filter_partial_frame(struct VP8Common *cm, + struct macroblockd *mbd, + int default_filt_lvl); + +void vp8_loop_filter_frame_yonly(struct VP8Common *cm, + struct macroblockd *mbd, + int default_filt_lvl); + +void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, + int sharpness_lvl); + +void vp8_loop_filter_row_normal(struct VP8Common *cm, + struct modeinfo *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr); + +void vp8_loop_filter_row_simple(struct VP8Common *cm, + struct modeinfo *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr); +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_LOOPFILTER_H_ diff --git a/thirdparty/libvpx/vp8/common/loopfilter_filters.c b/thirdparty/libvpx/vp8/common/loopfilter_filters.c new file mode 100644 index 00000000000..1d51696ff7e --- /dev/null +++ b/thirdparty/libvpx/vp8/common/loopfilter_filters.c @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include "loopfilter.h" +#include "onyxc_int.h" + +typedef unsigned char uc; + +static signed char vp8_signed_char_clamp(int t) +{ + t = (t < -128 ? -128 : t); + t = (t > 127 ? 127 : t); + return (signed char) t; +} + + +/* should we apply any filter at all ( 11111111 yes, 00000000 no) */ +static signed char vp8_filter_mask(uc limit, uc blimit, + uc p3, uc p2, uc p1, uc p0, + uc q0, uc q1, uc q2, uc q3) +{ + signed char mask = 0; + mask |= (abs(p3 - p2) > limit); + mask |= (abs(p2 - p1) > limit); + mask |= (abs(p1 - p0) > limit); + mask |= (abs(q1 - q0) > limit); + mask |= (abs(q2 - q1) > limit); + mask |= (abs(q3 - q2) > limit); + mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit); + return mask - 1; +} + +/* is there high variance internal edge ( 11111111 yes, 00000000 no) */ +static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) +{ + signed char hev = 0; + hev |= (abs(p1 - p0) > thresh) * -1; + hev |= (abs(q1 - q0) > thresh) * -1; + return hev; +} + +static void vp8_filter(signed char mask, uc hev, uc *op1, + uc *op0, uc *oq0, uc *oq1) + +{ + signed char ps0, qs0; + signed char ps1, qs1; + signed char filter_value, Filter1, Filter2; + signed char u; + + ps1 = (signed char) * op1 ^ 0x80; + ps0 = (signed char) * op0 ^ 0x80; + qs0 = (signed char) * oq0 ^ 0x80; + qs1 = (signed char) * oq1 ^ 0x80; + + /* add outer taps if we have high edge variance */ + filter_value = vp8_signed_char_clamp(ps1 - qs1); + filter_value &= hev; + + /* inner taps */ + filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0)); + filter_value &= mask; + + /* save bottom 3 bits so that we round one side +4 and the other +3 + * if it equals 4 we'll set to adjust by -1 to account for the fact + * we'd round 3 the other way + */ + Filter1 = vp8_signed_char_clamp(filter_value + 4); + Filter2 = vp8_signed_char_clamp(filter_value + 3); + Filter1 >>= 3; + Filter2 >>= 3; + u = vp8_signed_char_clamp(qs0 - Filter1); + *oq0 = u ^ 0x80; + u = vp8_signed_char_clamp(ps0 + Filter2); + *op0 = u ^ 0x80; + filter_value = Filter1; + + /* outer tap adjustments */ + filter_value += 1; + filter_value >>= 1; + filter_value &= ~hev; + + u = vp8_signed_char_clamp(qs1 - filter_value); + *oq1 = u ^ 0x80; + u = vp8_signed_char_clamp(ps1 + filter_value); + *op1 = u ^ 0x80; + +} +void vp8_loop_filter_horizontal_edge_c +( + unsigned char *s, + int p, /* pitch */ + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count +) +{ + int hev = 0; /* high edge variance */ + signed char mask = 0; + int i = 0; + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + do + { + mask = vp8_filter_mask(limit[0], blimit[0], + s[-4*p], s[-3*p], s[-2*p], s[-1*p], + s[0*p], s[1*p], s[2*p], s[3*p]); + + hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]); + + vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p); + + ++s; + } + while (++i < count * 8); +} + +void vp8_loop_filter_vertical_edge_c +( + unsigned char *s, + int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count +) +{ + int hev = 0; /* high edge variance */ + signed char mask = 0; + int i = 0; + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + do + { + mask = vp8_filter_mask(limit[0], blimit[0], + s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]); + + hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); + + vp8_filter(mask, hev, s - 2, s - 1, s, s + 1); + + s += p; + } + while (++i < count * 8); +} + +static void vp8_mbfilter(signed char mask, uc hev, + uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2) +{ + signed char s, u; + signed char filter_value, Filter1, Filter2; + signed char ps2 = (signed char) * op2 ^ 0x80; + signed char ps1 = (signed char) * op1 ^ 0x80; + signed char ps0 = (signed char) * op0 ^ 0x80; + signed char qs0 = (signed char) * oq0 ^ 0x80; + signed char qs1 = (signed char) * oq1 ^ 0x80; + signed char qs2 = (signed char) * oq2 ^ 0x80; + + /* add outer taps if we have high edge variance */ + filter_value = vp8_signed_char_clamp(ps1 - qs1); + filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0)); + filter_value &= mask; + + Filter2 = filter_value; + Filter2 &= hev; + + /* save bottom 3 bits so that we round one side +4 and the other +3 */ + Filter1 = vp8_signed_char_clamp(Filter2 + 4); + Filter2 = vp8_signed_char_clamp(Filter2 + 3); + Filter1 >>= 3; + Filter2 >>= 3; + qs0 = vp8_signed_char_clamp(qs0 - Filter1); + ps0 = vp8_signed_char_clamp(ps0 + Filter2); + + + /* only apply wider filter if not high edge variance */ + filter_value &= ~hev; + Filter2 = filter_value; + + /* roughly 3/7th difference across boundary */ + u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7); + s = vp8_signed_char_clamp(qs0 - u); + *oq0 = s ^ 0x80; + s = vp8_signed_char_clamp(ps0 + u); + *op0 = s ^ 0x80; + + /* roughly 2/7th difference across boundary */ + u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7); + s = vp8_signed_char_clamp(qs1 - u); + *oq1 = s ^ 0x80; + s = vp8_signed_char_clamp(ps1 + u); + *op1 = s ^ 0x80; + + /* roughly 1/7th difference across boundary */ + u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7); + s = vp8_signed_char_clamp(qs2 - u); + *oq2 = s ^ 0x80; + s = vp8_signed_char_clamp(ps2 + u); + *op2 = s ^ 0x80; +} + +void vp8_mbloop_filter_horizontal_edge_c +( + unsigned char *s, + int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count +) +{ + signed char hev = 0; /* high edge variance */ + signed char mask = 0; + int i = 0; + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + do + { + + mask = vp8_filter_mask(limit[0], blimit[0], + s[-4*p], s[-3*p], s[-2*p], s[-1*p], + s[0*p], s[1*p], s[2*p], s[3*p]); + + hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]); + + vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p); + + ++s; + } + while (++i < count * 8); + +} + + +void vp8_mbloop_filter_vertical_edge_c +( + unsigned char *s, + int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count +) +{ + signed char hev = 0; /* high edge variance */ + signed char mask = 0; + int i = 0; + + do + { + + mask = vp8_filter_mask(limit[0], blimit[0], + s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]); + + hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); + + vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2); + + s += p; + } + while (++i < count * 8); + +} + +/* should we apply any filter at all ( 11111111 yes, 00000000 no) */ +static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1) +{ +/* Why does this cause problems for win32? + * error C2143: syntax error : missing ';' before 'type' + * (void) limit; + */ + signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1; + return mask; +} + +static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1) +{ + signed char filter_value, Filter1, Filter2; + signed char p1 = (signed char) * op1 ^ 0x80; + signed char p0 = (signed char) * op0 ^ 0x80; + signed char q0 = (signed char) * oq0 ^ 0x80; + signed char q1 = (signed char) * oq1 ^ 0x80; + signed char u; + + filter_value = vp8_signed_char_clamp(p1 - q1); + filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0)); + filter_value &= mask; + + /* save bottom 3 bits so that we round one side +4 and the other +3 */ + Filter1 = vp8_signed_char_clamp(filter_value + 4); + Filter1 >>= 3; + u = vp8_signed_char_clamp(q0 - Filter1); + *oq0 = u ^ 0x80; + + Filter2 = vp8_signed_char_clamp(filter_value + 3); + Filter2 >>= 3; + u = vp8_signed_char_clamp(p0 + Filter2); + *op0 = u ^ 0x80; +} + +void vp8_loop_filter_simple_horizontal_edge_c +( + unsigned char *s, + int p, + const unsigned char *blimit +) +{ + signed char mask = 0; + int i = 0; + + do + { + mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]); + vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p); + ++s; + } + while (++i < 16); +} + +void vp8_loop_filter_simple_vertical_edge_c +( + unsigned char *s, + int p, + const unsigned char *blimit +) +{ + signed char mask = 0; + int i = 0; + + do + { + mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]); + vp8_simple_filter(mask, s - 2, s - 1, s, s + 1); + s += p; + } + while (++i < 16); + +} + +/* Horizontal MB filtering */ +void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + loop_filter_info *lfi) +{ + vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); +} + +/* Vertical MB Filtering */ +void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + loop_filter_info *lfi) +{ + vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); +} + +/* Horizontal B Filtering */ +void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + loop_filter_info *lfi) +{ + vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); +} + +void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride, + const unsigned char *blimit) +{ + vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit); +} + +/* Vertical B Filtering */ +void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + loop_filter_info *lfi) +{ + vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); +} + +void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride, + const unsigned char *blimit) +{ + vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit); +} diff --git a/thirdparty/libvpx/vp8/common/mbpitch.c b/thirdparty/libvpx/vp8/common/mbpitch.c new file mode 100644 index 00000000000..32e1b66409f --- /dev/null +++ b/thirdparty/libvpx/vp8/common/mbpitch.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "blockd.h" + +void vp8_setup_block_dptrs(MACROBLOCKD *x) +{ + int r, c; + + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4; + } + } + + for (r = 0; r < 2; r++) + { + for (c = 0; c < 2; c++) + { + x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4; + + } + } + + for (r = 0; r < 2; r++) + { + for (c = 0; c < 2; c++) + { + x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4; + + } + } + + for (r = 0; r < 25; r++) + { + x->block[r].qcoeff = x->qcoeff + r * 16; + x->block[r].dqcoeff = x->dqcoeff + r * 16; + x->block[r].eob = x->eobs + r; + } +} + +void vp8_build_block_doffsets(MACROBLOCKD *x) +{ + int block; + + for (block = 0; block < 16; block++) /* y blocks */ + { + x->block[block].offset = + (block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4; + } + + for (block = 16; block < 20; block++) /* U and V blocks */ + { + x->block[block+4].offset = + x->block[block].offset = + ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4; + } +} diff --git a/thirdparty/libvpx/vp8/common/modecont.c b/thirdparty/libvpx/vp8/common/modecont.c new file mode 100644 index 00000000000..86a74bc0ff5 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/modecont.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "entropy.h" + +const int vp8_mode_contexts[6][4] = +{ + { + /* 0 */ + 7, 1, 1, 143, + }, + { + /* 1 */ + 14, 18, 14, 107, + }, + { + /* 2 */ + 135, 64, 57, 68, + }, + { + /* 3 */ + 60, 56, 128, 65, + }, + { + /* 4 */ + 159, 134, 128, 34, + }, + { + /* 5 */ + 234, 188, 128, 28, + }, +}; diff --git a/thirdparty/libvpx/vp8/common/modecont.h b/thirdparty/libvpx/vp8/common/modecont.h new file mode 100644 index 00000000000..ff34c33c557 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/modecont.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_MODECONT_H_ +#define VP8_COMMON_MODECONT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +extern const int vp8_mode_contexts[6][4]; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_MODECONT_H_ diff --git a/thirdparty/libvpx/vp8/common/mv.h b/thirdparty/libvpx/vp8/common/mv.h new file mode 100644 index 00000000000..111ccd63c72 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/mv.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_MV_H_ +#define VP8_COMMON_MV_H_ +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct +{ + short row; + short col; +} MV; + +typedef union int_mv +{ + uint32_t as_int; + MV as_mv; +} int_mv; /* facilitates faster equality tests and copies */ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_MV_H_ diff --git a/thirdparty/libvpx/vp8/common/onyxc_int.h b/thirdparty/libvpx/vp8/common/onyxc_int.h new file mode 100644 index 00000000000..6d89865c600 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/onyxc_int.h @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_ONYXC_INT_H_ +#define VP8_COMMON_ONYXC_INT_H_ + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#include "vpx/internal/vpx_codec_internal.h" +#include "loopfilter.h" +#include "entropymv.h" +#include "entropy.h" +#if CONFIG_POSTPROC +#include "postproc.h" +#endif + +/*#ifdef PACKET_TESTING*/ +#include "header.h" +/*#endif*/ + +#ifdef __cplusplus +extern "C" { +#endif + +#define MINQ 0 +#define MAXQ 127 +#define QINDEX_RANGE (MAXQ + 1) + +#define NUM_YV12_BUFFERS 4 + +#define MAX_PARTITIONS 9 + +typedef struct frame_contexts +{ + vp8_prob bmode_prob [VP8_BINTRAMODES-1]; + vp8_prob ymode_prob [VP8_YMODES-1]; /* interframe intra mode probs */ + vp8_prob uv_mode_prob [VP8_UV_MODES-1]; + vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1]; + vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + MV_CONTEXT mvc[2]; +} FRAME_CONTEXT; + +typedef enum +{ + ONE_PARTITION = 0, + TWO_PARTITION = 1, + FOUR_PARTITION = 2, + EIGHT_PARTITION = 3 +} TOKEN_PARTITION; + +typedef enum +{ + RECON_CLAMP_REQUIRED = 0, + RECON_CLAMP_NOTREQUIRED = 1 +} CLAMP_TYPE; + +typedef struct VP8Common + +{ + struct vpx_internal_error_info error; + + DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]); + DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]); + DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]); + + int Width; + int Height; + int horiz_scale; + int vert_scale; + + CLAMP_TYPE clamp_type; + + YV12_BUFFER_CONFIG *frame_to_show; + + YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS]; + int fb_idx_ref_cnt[NUM_YV12_BUFFERS]; + int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx; + + YV12_BUFFER_CONFIG temp_scale_frame; + +#if CONFIG_POSTPROC + YV12_BUFFER_CONFIG post_proc_buffer; + YV12_BUFFER_CONFIG post_proc_buffer_int; + int post_proc_buffer_int_used; + unsigned char *pp_limits_buffer; /* post-processing filter coefficients */ +#endif + + FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */ + FRAME_TYPE frame_type; + + int show_frame; + + int frame_flags; + int MBs; + int mb_rows; + int mb_cols; + int mode_info_stride; + + /* profile settings */ + int mb_no_coeff_skip; + int no_lpf; + int use_bilinear_mc_filter; + int full_pixel; + + int base_qindex; + + int y1dc_delta_q; + int y2dc_delta_q; + int y2ac_delta_q; + int uvdc_delta_q; + int uvac_delta_q; + + /* We allocate a MODE_INFO struct for each macroblock, together with + an extra row on top and column on the left to simplify prediction. */ + + MODE_INFO *mip; /* Base of allocated array */ + MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ +#if CONFIG_ERROR_CONCEALMENT + MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ + MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ +#endif + MODE_INFO *show_frame_mi; /* MODE_INFO for the last decoded frame + to show */ + LOOPFILTERTYPE filter_type; + + loop_filter_info_n lf_info; + + int filter_level; + int last_sharpness_level; + int sharpness_level; + + int refresh_last_frame; /* Two state 0 = NO, 1 = YES */ + int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */ + int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */ + + int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */ + int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */ + + int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */ + + int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ + + /* Y,U,V,Y2 */ + ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ + ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */ + + FRAME_CONTEXT lfc; /* last frame entropy */ + FRAME_CONTEXT fc; /* this frame entropy */ + + unsigned int current_video_frame; + + int version; + + TOKEN_PARTITION multi_token_partition; + +#ifdef PACKET_TESTING + VP8_HEADER oh; +#endif +#if CONFIG_POSTPROC_VISUALIZER + double bitrate; + double framerate; +#endif + +#if CONFIG_MULTITHREAD + int processor_core_count; +#endif +#if CONFIG_POSTPROC + struct postproc_state postproc_state; +#endif + int cpu_caps; +} VP8_COMMON; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_ONYXC_INT_H_ diff --git a/thirdparty/libvpx/vp8/common/onyxd.h b/thirdparty/libvpx/vp8/common/onyxd.h new file mode 100644 index 00000000000..e37b29f32cf --- /dev/null +++ b/thirdparty/libvpx/vp8/common/onyxd.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_ONYXD_H_ +#define VP8_COMMON_ONYXD_H_ + + +/* Create/destroy static data structures. */ +#ifdef __cplusplus +extern "C" +{ +#endif +#include "vpx_scale/yv12config.h" +#include "ppflags.h" +#include "vpx_ports/mem.h" +#include "vpx/vpx_codec.h" +#include "vpx/vp8.h" + + struct VP8D_COMP; + + typedef struct + { + int Width; + int Height; + int Version; + int postprocess; + int max_threads; + int error_concealment; + } VP8D_CONFIG; + + typedef enum + { + VP8D_OK = 0 + } VP8D_SETTING; + + void vp8dx_initialize(void); + + void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x); + + int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst); + + int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, + size_t size, const uint8_t *dest, + int64_t time_stamp); + int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags); + + vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); + vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); + +#ifdef __cplusplus +} +#endif + + +#endif // VP8_COMMON_ONYXD_H_ diff --git a/thirdparty/libvpx/vp8/common/ppflags.h b/thirdparty/libvpx/vp8/common/ppflags.h new file mode 100644 index 00000000000..768224aad5e --- /dev/null +++ b/thirdparty/libvpx/vp8/common/ppflags.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_PPFLAGS_H_ +#define VP8_COMMON_PPFLAGS_H_ + +#ifdef __cplusplus +extern "C" { +#endif +enum +{ + VP8D_NOFILTERING = 0, + VP8D_DEBLOCK = 1<<0, + VP8D_DEMACROBLOCK = 1<<1, + VP8D_ADDNOISE = 1<<2, + VP8D_DEBUG_TXT_FRAME_INFO = 1<<3, + VP8D_DEBUG_TXT_MBLK_MODES = 1<<4, + VP8D_DEBUG_TXT_DC_DIFF = 1<<5, + VP8D_DEBUG_TXT_RATE_INFO = 1<<6, + VP8D_DEBUG_DRAW_MV = 1<<7, + VP8D_DEBUG_CLR_BLK_MODES = 1<<8, + VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9, + VP8D_MFQE = 1<<10 +}; + +typedef struct +{ + int post_proc_flag; + int deblocking_level; + int noise_level; + int display_ref_frame_flag; + int display_mb_modes_flag; + int display_b_modes_flag; + int display_mv_flag; +} vp8_ppflags_t; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_PPFLAGS_H_ diff --git a/thirdparty/libvpx/vp8/common/quant_common.c b/thirdparty/libvpx/vp8/common/quant_common.c new file mode 100644 index 00000000000..05f92107028 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/quant_common.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "quant_common.h" + +static const int dc_qlookup[QINDEX_RANGE] = +{ + 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17, + 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, + 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157, +}; + +static const int ac_qlookup[QINDEX_RANGE] = +{ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, + 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, + 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152, + 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209, + 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284, +}; + + +int vp8_dc_quant(int QIndex, int Delta) +{ + int retval; + + QIndex = QIndex + Delta; + + if (QIndex > 127) + QIndex = 127; + else if (QIndex < 0) + QIndex = 0; + + retval = dc_qlookup[ QIndex ]; + return retval; +} + +int vp8_dc2quant(int QIndex, int Delta) +{ + int retval; + + QIndex = QIndex + Delta; + + if (QIndex > 127) + QIndex = 127; + else if (QIndex < 0) + QIndex = 0; + + retval = dc_qlookup[ QIndex ] * 2; + return retval; + +} +int vp8_dc_uv_quant(int QIndex, int Delta) +{ + int retval; + + QIndex = QIndex + Delta; + + if (QIndex > 127) + QIndex = 127; + else if (QIndex < 0) + QIndex = 0; + + retval = dc_qlookup[ QIndex ]; + + if (retval > 132) + retval = 132; + + return retval; +} + +int vp8_ac_yquant(int QIndex) +{ + int retval; + + if (QIndex > 127) + QIndex = 127; + else if (QIndex < 0) + QIndex = 0; + + retval = ac_qlookup[ QIndex ]; + return retval; +} + +int vp8_ac2quant(int QIndex, int Delta) +{ + int retval; + + QIndex = QIndex + Delta; + + if (QIndex > 127) + QIndex = 127; + else if (QIndex < 0) + QIndex = 0; + + /* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16. + * The smallest precision for that is '(x*6349) >> 12' but 16 is a good + * word size. */ + retval = (ac_qlookup[ QIndex ] * 101581) >> 16; + + if (retval < 8) + retval = 8; + + return retval; +} +int vp8_ac_uv_quant(int QIndex, int Delta) +{ + int retval; + + QIndex = QIndex + Delta; + + if (QIndex > 127) + QIndex = 127; + else if (QIndex < 0) + QIndex = 0; + + retval = ac_qlookup[ QIndex ]; + return retval; +} diff --git a/thirdparty/libvpx/vp8/common/quant_common.h b/thirdparty/libvpx/vp8/common/quant_common.h new file mode 100644 index 00000000000..700b5e6d726 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/quant_common.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP8_COMMON_QUANT_COMMON_H_ +#define VP8_COMMON_QUANT_COMMON_H_ + + +#include "string.h" +#include "blockd.h" +#include "onyxc_int.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern int vp8_ac_yquant(int QIndex); +extern int vp8_dc_quant(int QIndex, int Delta); +extern int vp8_dc2quant(int QIndex, int Delta); +extern int vp8_ac2quant(int QIndex, int Delta); +extern int vp8_dc_uv_quant(int QIndex, int Delta); +extern int vp8_ac_uv_quant(int QIndex, int Delta); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_QUANT_COMMON_H_ diff --git a/thirdparty/libvpx/vp8/common/reconinter.c b/thirdparty/libvpx/vp8/common/reconinter.c new file mode 100644 index 00000000000..e3025955871 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/reconinter.c @@ -0,0 +1,544 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#include "vpx/vpx_integer.h" +#include "blockd.h" +#include "reconinter.h" +#if CONFIG_RUNTIME_CPU_DETECT +#include "onyxc_int.h" +#endif + +void vp8_copy_mem16x16_c( + unsigned char *src, + int src_stride, + unsigned char *dst, + int dst_stride) +{ + + int r; + + for (r = 0; r < 16; r++) + { + memcpy(dst, src, 16); + + src += src_stride; + dst += dst_stride; + + } + +} + +void vp8_copy_mem8x8_c( + unsigned char *src, + int src_stride, + unsigned char *dst, + int dst_stride) +{ + int r; + + for (r = 0; r < 8; r++) + { + memcpy(dst, src, 8); + + src += src_stride; + dst += dst_stride; + + } + +} + +void vp8_copy_mem8x4_c( + unsigned char *src, + int src_stride, + unsigned char *dst, + int dst_stride) +{ + int r; + + for (r = 0; r < 4; r++) + { + memcpy(dst, src, 8); + + src += src_stride; + dst += dst_stride; + + } + +} + + +void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf) +{ + int r; + unsigned char *pred_ptr = d->predictor; + unsigned char *ptr; + ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); + + if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) + { + sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch); + } + else + { + for (r = 0; r < 4; r++) + { + pred_ptr[0] = ptr[0]; + pred_ptr[1] = ptr[1]; + pred_ptr[2] = ptr[2]; + pred_ptr[3] = ptr[3]; + pred_ptr += pitch; + ptr += pre_stride; + } + } +} + +static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride) +{ + unsigned char *ptr; + ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); + + if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) + { + x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); + } + else + { + vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride); + } +} + +static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride) +{ + unsigned char *ptr; + ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); + + if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) + { + x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); + } + else + { + vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride); + } +} + +static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf) +{ + int r; + unsigned char *ptr; + ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); + + if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) + { + sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); + } + else + { + for (r = 0; r < 4; r++) + { + dst[0] = ptr[0]; + dst[1] = ptr[1]; + dst[2] = ptr[2]; + dst[3] = ptr[3]; + dst += dst_stride; + ptr += pre_stride; + } + } +} + + +/*encoder only*/ +void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x) +{ + unsigned char *uptr, *vptr; + unsigned char *upred_ptr = &x->predictor[256]; + unsigned char *vpred_ptr = &x->predictor[320]; + + int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; + int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; + int offset; + int pre_stride = x->pre.uv_stride; + + /* calc uv motion vectors */ + mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1)); + mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1)); + mv_row /= 2; + mv_col /= 2; + mv_row &= x->fullpixel_mask; + mv_col &= x->fullpixel_mask; + + offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); + uptr = x->pre.u_buffer + offset; + vptr = x->pre.v_buffer + offset; + + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8); + x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8); + } + else + { + vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8); + vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8); + } +} + +/*encoder only*/ +void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x) +{ + int i, j; + int pre_stride = x->pre.uv_stride; + unsigned char *base_pre; + + /* build uv mvs */ + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + int yoffset = i * 8 + j * 2; + int uoffset = 16 + i * 2 + j; + int voffset = 20 + i * 2 + j; + + int temp; + + temp = x->block[yoffset ].bmi.mv.as_mv.row + + x->block[yoffset+1].bmi.mv.as_mv.row + + x->block[yoffset+4].bmi.mv.as_mv.row + + x->block[yoffset+5].bmi.mv.as_mv.row; + + temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); + + x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; + + temp = x->block[yoffset ].bmi.mv.as_mv.col + + x->block[yoffset+1].bmi.mv.as_mv.col + + x->block[yoffset+4].bmi.mv.as_mv.col + + x->block[yoffset+5].bmi.mv.as_mv.col; + + temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); + + x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; + + x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; + } + } + + base_pre = x->pre.u_buffer; + for (i = 16; i < 20; i += 2) + { + BLOCKD *d0 = &x->block[i]; + BLOCKD *d1 = &x->block[i+1]; + + if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) + build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); + else + { + vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); + vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict); + } + } + + base_pre = x->pre.v_buffer; + for (i = 20; i < 24; i += 2) + { + BLOCKD *d0 = &x->block[i]; + BLOCKD *d1 = &x->block[i+1]; + + if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) + build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); + else + { + vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); + vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict); + } + } +} + + +/*encoder only*/ +void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, + unsigned char *dst_y, + int dst_ystride) +{ + unsigned char *ptr_base; + unsigned char *ptr; + int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; + int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; + int pre_stride = x->pre.y_stride; + + ptr_base = x->pre.y_buffer; + ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); + + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, + dst_y, dst_ystride); + } + else + { + vp8_copy_mem16x16(ptr, pre_stride, dst_y, + dst_ystride); + } +} + +static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) +{ + /* If the MV points so far into the UMV border that no visible pixels + * are used for reconstruction, the subpel part of the MV can be + * discarded and the MV limited to 16 pixels with equivalent results. + * + * This limit kicks in at 19 pixels for the top and left edges, for + * the 16 pixels plus 3 taps right of the central pixel when subpel + * filtering. The bottom and right edges use 16 pixels plus 2 pixels + * left of the central pixel when filtering. + */ + if (mv->col < (xd->mb_to_left_edge - (19 << 3))) + mv->col = xd->mb_to_left_edge - (16 << 3); + else if (mv->col > xd->mb_to_right_edge + (18 << 3)) + mv->col = xd->mb_to_right_edge + (16 << 3); + + if (mv->row < (xd->mb_to_top_edge - (19 << 3))) + mv->row = xd->mb_to_top_edge - (16 << 3); + else if (mv->row > xd->mb_to_bottom_edge + (18 << 3)) + mv->row = xd->mb_to_bottom_edge + (16 << 3); +} + +/* A version of the above function for chroma block MVs.*/ +static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) +{ + mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ? + (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col; + mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ? + (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col; + + mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ? + (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row; + mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ? + (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row; +} + +void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, + unsigned char *dst_y, + unsigned char *dst_u, + unsigned char *dst_v, + int dst_ystride, + int dst_uvstride) +{ + int offset; + unsigned char *ptr; + unsigned char *uptr, *vptr; + + int_mv _16x16mv; + + unsigned char *ptr_base = x->pre.y_buffer; + int pre_stride = x->pre.y_stride; + + _16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int; + + if (x->mode_info_context->mbmi.need_to_clamp_mvs) + { + clamp_mv_to_umv_border(&_16x16mv.as_mv, x); + } + + ptr = ptr_base + ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); + + if ( _16x16mv.as_int & 0x00070007) + { + x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride); + } + else + { + vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride); + } + + /* calc uv motion vectors */ + _16x16mv.as_mv.row += 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1)); + _16x16mv.as_mv.col += 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1)); + _16x16mv.as_mv.row /= 2; + _16x16mv.as_mv.col /= 2; + _16x16mv.as_mv.row &= x->fullpixel_mask; + _16x16mv.as_mv.col &= x->fullpixel_mask; + + pre_stride >>= 1; + offset = ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); + uptr = x->pre.u_buffer + offset; + vptr = x->pre.v_buffer + offset; + + if ( _16x16mv.as_int & 0x00070007) + { + x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride); + x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride); + } + else + { + vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride); + vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride); + } +} + +static void build_inter4x4_predictors_mb(MACROBLOCKD *x) +{ + int i; + unsigned char *base_dst = x->dst.y_buffer; + unsigned char *base_pre = x->pre.y_buffer; + + if (x->mode_info_context->mbmi.partitioning < 3) + { + BLOCKD *b; + int dst_stride = x->dst.y_stride; + + x->block[ 0].bmi = x->mode_info_context->bmi[ 0]; + x->block[ 2].bmi = x->mode_info_context->bmi[ 2]; + x->block[ 8].bmi = x->mode_info_context->bmi[ 8]; + x->block[10].bmi = x->mode_info_context->bmi[10]; + if (x->mode_info_context->mbmi.need_to_clamp_mvs) + { + clamp_mv_to_umv_border(&x->block[ 0].bmi.mv.as_mv, x); + clamp_mv_to_umv_border(&x->block[ 2].bmi.mv.as_mv, x); + clamp_mv_to_umv_border(&x->block[ 8].bmi.mv.as_mv, x); + clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x); + } + + b = &x->block[ 0]; + build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); + b = &x->block[ 2]; + build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); + b = &x->block[ 8]; + build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); + b = &x->block[10]; + build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); + } + else + { + for (i = 0; i < 16; i += 2) + { + BLOCKD *d0 = &x->block[i]; + BLOCKD *d1 = &x->block[i+1]; + int dst_stride = x->dst.y_stride; + + x->block[i+0].bmi = x->mode_info_context->bmi[i+0]; + x->block[i+1].bmi = x->mode_info_context->bmi[i+1]; + if (x->mode_info_context->mbmi.need_to_clamp_mvs) + { + clamp_mv_to_umv_border(&x->block[i+0].bmi.mv.as_mv, x); + clamp_mv_to_umv_border(&x->block[i+1].bmi.mv.as_mv, x); + } + + if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) + build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); + else + { + build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); + build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); + } + + } + + } + base_dst = x->dst.u_buffer; + base_pre = x->pre.u_buffer; + for (i = 16; i < 20; i += 2) + { + BLOCKD *d0 = &x->block[i]; + BLOCKD *d1 = &x->block[i+1]; + int dst_stride = x->dst.uv_stride; + + /* Note: uv mvs already clamped in build_4x4uvmvs() */ + + if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) + build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); + else + { + build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); + build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); + } + } + + base_dst = x->dst.v_buffer; + base_pre = x->pre.v_buffer; + for (i = 20; i < 24; i += 2) + { + BLOCKD *d0 = &x->block[i]; + BLOCKD *d1 = &x->block[i+1]; + int dst_stride = x->dst.uv_stride; + + /* Note: uv mvs already clamped in build_4x4uvmvs() */ + + if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) + build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); + else + { + build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); + build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); + } + } +} + +static +void build_4x4uvmvs(MACROBLOCKD *x) +{ + int i, j; + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + int yoffset = i * 8 + j * 2; + int uoffset = 16 + i * 2 + j; + int voffset = 20 + i * 2 + j; + + int temp; + + temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row + + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row + + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row + + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row; + + temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); + + x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; + + temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col + + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col + + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col + + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col; + + temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); + + x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; + + if (x->mode_info_context->mbmi.need_to_clamp_mvs) + clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x); + + x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; + } + } +} + +void vp8_build_inter_predictors_mb(MACROBLOCKD *xd) +{ + if (xd->mode_info_context->mbmi.mode != SPLITMV) + { + vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + } + else + { + build_4x4uvmvs(xd); + build_inter4x4_predictors_mb(xd); + } +} diff --git a/thirdparty/libvpx/vp8/common/reconinter.h b/thirdparty/libvpx/vp8/common/reconinter.h new file mode 100644 index 00000000000..ba979b9664a --- /dev/null +++ b/thirdparty/libvpx/vp8/common/reconinter.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_RECONINTER_H_ +#define VP8_COMMON_RECONINTER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x); +extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, + unsigned char *dst_y, + unsigned char *dst_u, + unsigned char *dst_v, + int dst_ystride, + int dst_uvstride); + + +extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, + unsigned char *dst_y, + int dst_ystride); +extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, + unsigned char *base_pre, + int pre_stride, + vp8_subpix_fn_t sppf); + +extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x); +extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_RECONINTER_H_ diff --git a/thirdparty/libvpx/vp8/common/reconintra.c b/thirdparty/libvpx/vp8/common/reconintra.c new file mode 100644 index 00000000000..356655dac7b --- /dev/null +++ b/thirdparty/libvpx/vp8/common/reconintra.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "./vp8_rtcd.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/vpx_once.h" +#include "blockd.h" +#include "vp8/common/reconintra.h" +#include "vp8/common/reconintra4x4.h" + +enum { + SIZE_16, + SIZE_8, + NUM_SIZES, +}; + +typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left); + +static intra_pred_fn pred[4][NUM_SIZES]; +static intra_pred_fn dc_pred[2][2][NUM_SIZES]; + +static void vp8_init_intra_predictors_internal(void) +{ +#define INIT_SIZE(sz) \ + pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \ + pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \ + pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \ + \ + dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \ + dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \ + dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \ + dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz + + INIT_SIZE(16); + INIT_SIZE(8); + vp8_init_intra4x4_predictors_internal(); +} + +void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, + unsigned char * yabove_row, + unsigned char * yleft, + int left_stride, + unsigned char * ypred_ptr, + int y_stride) +{ + MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode; + DECLARE_ALIGNED(16, uint8_t, yleft_col[16]); + int i; + intra_pred_fn fn; + + for (i = 0; i < 16; i++) + { + yleft_col[i] = yleft[i* left_stride]; + } + + if (mode == DC_PRED) + { + fn = dc_pred[x->left_available][x->up_available][SIZE_16]; + } + else + { + fn = pred[mode][SIZE_16]; + } + + fn(ypred_ptr, y_stride, yabove_row, yleft_col); +} + +void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x, + unsigned char * uabove_row, + unsigned char * vabove_row, + unsigned char * uleft, + unsigned char * vleft, + int left_stride, + unsigned char * upred_ptr, + unsigned char * vpred_ptr, + int pred_stride) +{ + MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode; + unsigned char uleft_col[8]; + unsigned char vleft_col[8]; + int i; + intra_pred_fn fn; + + for (i = 0; i < 8; i++) + { + uleft_col[i] = uleft[i * left_stride]; + vleft_col[i] = vleft[i * left_stride]; + } + + if (uvmode == DC_PRED) + { + fn = dc_pred[x->left_available][x->up_available][SIZE_8]; + } + else + { + fn = pred[uvmode][SIZE_8]; + } + + fn(upred_ptr, pred_stride, uabove_row, uleft_col); + fn(vpred_ptr, pred_stride, vabove_row, vleft_col); +} + +void vp8_init_intra_predictors(void) +{ + once(vp8_init_intra_predictors_internal); +} diff --git a/thirdparty/libvpx/vp8/common/reconintra.h b/thirdparty/libvpx/vp8/common/reconintra.h new file mode 100644 index 00000000000..b6225a66379 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/reconintra.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_RECONINTRA_H_ +#define VP8_COMMON_RECONINTRA_H_ + +#include "vp8/common/blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, + unsigned char *yabove_row, + unsigned char *yleft, + int left_stride, + unsigned char *ypred_ptr, + int y_stride); + +void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x, + unsigned char * uabove_row, + unsigned char * vabove_row, + unsigned char * uleft, + unsigned char * vleft, + int left_stride, + unsigned char * upred_ptr, + unsigned char * vpred_ptr, + int pred_stride); + +void vp8_init_intra_predictors(void); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_RECONINTRA_H_ diff --git a/thirdparty/libvpx/vp8/common/reconintra4x4.c b/thirdparty/libvpx/vp8/common/reconintra4x4.c new file mode 100644 index 00000000000..35ad891eff4 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/reconintra4x4.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vp8_rtcd.h" +#include "blockd.h" + +typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left); + +static intra_pred_fn pred[10]; + +void vp8_init_intra4x4_predictors_internal(void) +{ + pred[B_DC_PRED] = vpx_dc_predictor_4x4; + pred[B_TM_PRED] = vpx_tm_predictor_4x4; + pred[B_VE_PRED] = vpx_ve_predictor_4x4; + pred[B_HE_PRED] = vpx_he_predictor_4x4; + pred[B_LD_PRED] = vpx_d45e_predictor_4x4; + pred[B_RD_PRED] = vpx_d135_predictor_4x4; + pred[B_VR_PRED] = vpx_d117_predictor_4x4; + pred[B_VL_PRED] = vpx_d63f_predictor_4x4; + pred[B_HD_PRED] = vpx_d153_predictor_4x4; + pred[B_HU_PRED] = vpx_d207_predictor_4x4; +} + +void vp8_intra4x4_predict(unsigned char *above, + unsigned char *yleft, int left_stride, + B_PREDICTION_MODE b_mode, + unsigned char *dst, int dst_stride, + unsigned char top_left) +{ + unsigned char Left[4]; + unsigned char Aboveb[12], *Above = Aboveb + 4; + + Left[0] = yleft[0]; + Left[1] = yleft[left_stride]; + Left[2] = yleft[2 * left_stride]; + Left[3] = yleft[3 * left_stride]; + memcpy(Above, above, 8); + Above[-1] = top_left; + + pred[b_mode](dst, dst_stride, Above, Left); +} diff --git a/thirdparty/libvpx/vp8/common/reconintra4x4.h b/thirdparty/libvpx/vp8/common/reconintra4x4.h new file mode 100644 index 00000000000..5dc5d13a5c1 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/reconintra4x4.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_RECONINTRA4X4_H_ +#define VP8_COMMON_RECONINTRA4X4_H_ +#include "vp8/common/blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static INLINE void intra_prediction_down_copy(MACROBLOCKD *xd, + unsigned char *above_right_src) +{ + int dst_stride = xd->dst.y_stride; + unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16; + + unsigned int *src_ptr = (unsigned int *)above_right_src; + unsigned int *dst_ptr0 = (unsigned int *)(above_right_dst + 4 * dst_stride); + unsigned int *dst_ptr1 = (unsigned int *)(above_right_dst + 8 * dst_stride); + unsigned int *dst_ptr2 = (unsigned int *)(above_right_dst + 12 * dst_stride); + + *dst_ptr0 = *src_ptr; + *dst_ptr1 = *src_ptr; + *dst_ptr2 = *src_ptr; +} + +void vp8_intra4x4_predict(unsigned char *Above, + unsigned char *yleft, int left_stride, + B_PREDICTION_MODE b_mode, + unsigned char *dst, int dst_stride, + unsigned char top_left); + +void vp8_init_intra4x4_predictors_internal(void); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_RECONINTRA4X4_H_ diff --git a/thirdparty/libvpx/vp8/common/rtcd.c b/thirdparty/libvpx/vp8/common/rtcd.c new file mode 100644 index 00000000000..ab0e9b47fe8 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/rtcd.c @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#define RTCD_C +#include "./vp8_rtcd.h" +#include "vpx_ports/vpx_once.h" + + +void vp8_rtcd() +{ + once(setup_rtcd_internal); +} diff --git a/thirdparty/libvpx/vp8/common/setupintrarecon.c b/thirdparty/libvpx/vp8/common/setupintrarecon.c new file mode 100644 index 00000000000..669564db42b --- /dev/null +++ b/thirdparty/libvpx/vp8/common/setupintrarecon.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "setupintrarecon.h" +#include "vpx_mem/vpx_mem.h" + +void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) +{ + int i; + + /* set up frame new frame for intra coded blocks */ + memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); + for (i = 0; i < ybf->y_height; i++) + ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129; + + memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); + for (i = 0; i < ybf->uv_height; i++) + ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; + + memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); + for (i = 0; i < ybf->uv_height; i++) + ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; + +} + +void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf) +{ + memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); + memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); + memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); +} diff --git a/thirdparty/libvpx/vp8/common/setupintrarecon.h b/thirdparty/libvpx/vp8/common/setupintrarecon.h new file mode 100644 index 00000000000..1857c4e26a5 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/setupintrarecon.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP8_COMMON_SETUPINTRARECON_H_ +#define VP8_COMMON_SETUPINTRARECON_H_ + +#include "./vpx_config.h" +#include "vpx_scale/yv12config.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); +extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf); + +static INLINE void setup_intra_recon_left(unsigned char *y_buffer, + unsigned char *u_buffer, + unsigned char *v_buffer, + int y_stride, + int uv_stride) +{ + int i; + + for (i = 0; i < 16; i++) + y_buffer[y_stride *i] = (unsigned char) 129; + + for (i = 0; i < 8; i++) + u_buffer[uv_stride *i] = (unsigned char) 129; + + for (i = 0; i < 8; i++) + v_buffer[uv_stride *i] = (unsigned char) 129; +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_SETUPINTRARECON_H_ diff --git a/thirdparty/libvpx/vp8/common/swapyv12buffer.c b/thirdparty/libvpx/vp8/common/swapyv12buffer.c new file mode 100644 index 00000000000..73656b3d721 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/swapyv12buffer.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "swapyv12buffer.h" + +void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame) +{ + unsigned char *temp; + + temp = last_frame->buffer_alloc; + last_frame->buffer_alloc = new_frame->buffer_alloc; + new_frame->buffer_alloc = temp; + + temp = last_frame->y_buffer; + last_frame->y_buffer = new_frame->y_buffer; + new_frame->y_buffer = temp; + + temp = last_frame->u_buffer; + last_frame->u_buffer = new_frame->u_buffer; + new_frame->u_buffer = temp; + + temp = last_frame->v_buffer; + last_frame->v_buffer = new_frame->v_buffer; + new_frame->v_buffer = temp; + +} diff --git a/thirdparty/libvpx/vp8/common/swapyv12buffer.h b/thirdparty/libvpx/vp8/common/swapyv12buffer.h new file mode 100644 index 00000000000..1d66cd3d62e --- /dev/null +++ b/thirdparty/libvpx/vp8/common/swapyv12buffer.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_SWAPYV12BUFFER_H_ +#define VP8_COMMON_SWAPYV12BUFFER_H_ + +#include "vpx_scale/yv12config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_SWAPYV12BUFFER_H_ diff --git a/thirdparty/libvpx/vp8/common/systemdependent.h b/thirdparty/libvpx/vp8/common/systemdependent.h new file mode 100644 index 00000000000..3d44e37cf24 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/systemdependent.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP8_COMMON_SYSTEMDEPENDENT_H_ +#define VP8_COMMON_SYSTEMDEPENDENT_H_ + +#include "vpx_config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP8Common; +void vp8_machine_specific_config(struct VP8Common *); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_SYSTEMDEPENDENT_H_ diff --git a/thirdparty/libvpx/vp8/common/threading.h b/thirdparty/libvpx/vp8/common/threading.h new file mode 100644 index 00000000000..183b49b8ff1 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/threading.h @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_THREADING_H_ +#define VP8_COMMON_THREADING_H_ + +#include "./vpx_config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD + +/* Thread management macros */ +#if defined(_WIN32) && !HAVE_PTHREAD_H +/* Win32 */ +#include +#include +#define THREAD_FUNCTION unsigned int __stdcall +#define THREAD_FUNCTION_RETURN DWORD +#define THREAD_SPECIFIC_INDEX DWORD +#define pthread_t HANDLE +#define pthread_attr_t DWORD +#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread) +#define thread_sleep(nms) Sleep(nms) +#define pthread_cancel(thread) terminate_thread(thread,0) +#define ts_key_create(ts_key, destructor) {ts_key = TlsAlloc();}; +#define pthread_getspecific(ts_key) TlsGetValue(ts_key) +#define pthread_setspecific(ts_key, value) TlsSetValue(ts_key, (void *)value) +#define pthread_self() GetCurrentThreadId() + +#elif defined(__OS2__) +/* OS/2 */ +#define INCL_DOS +#include + +#include +#define THREAD_FUNCTION void * +#define THREAD_FUNCTION_RETURN void * +#define THREAD_SPECIFIC_INDEX PULONG +#define pthread_t TID +#define pthread_attr_t ULONG +#define pthread_detach(thread) 0 +#define thread_sleep(nms) DosSleep(nms) +#define pthread_cancel(thread) DosKillThread(thread) +#define ts_key_create(ts_key, destructor) \ + DosAllocThreadLocalMemory(1, &(ts_key)); +#define pthread_getspecific(ts_key) ((void *)(*(ts_key))) +#define pthread_setspecific(ts_key, value) (*(ts_key)=(ULONG)(value)) +#define pthread_self() _gettid() +#else +#ifdef __APPLE__ +#include +#include +#include +#include +#include + +#else +#include +#endif + +#include +/* pthreads */ +/* Nearly everything is already defined */ +#define THREAD_FUNCTION void * +#define THREAD_FUNCTION_RETURN void * +#define THREAD_SPECIFIC_INDEX pthread_key_t +#define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor); +#endif + +/* Synchronization macros: Win32 and Pthreads */ +#if defined(_WIN32) && !HAVE_PTHREAD_H +#define sem_t HANDLE +#define pause(voidpara) __asm PAUSE +#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL) +#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE)) +#define sem_post(sem) ReleaseSemaphore(*sem,1,NULL) +#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE) +#define thread_sleep(nms) Sleep(nms) + +#elif defined(__OS2__) +typedef struct +{ + HEV event; + HMTX wait_mutex; + HMTX count_mutex; + int count; +} sem_t; + +static inline int sem_init(sem_t *sem, int pshared, unsigned int value) +{ + DosCreateEventSem(NULL, &sem->event, pshared ? DC_SEM_SHARED : 0, + value > 0 ? TRUE : FALSE); + DosCreateMutexSem(NULL, &sem->wait_mutex, 0, FALSE); + DosCreateMutexSem(NULL, &sem->count_mutex, 0, FALSE); + + sem->count = value; + + return 0; +} + +static inline int sem_wait(sem_t * sem) +{ + DosRequestMutexSem(sem->wait_mutex, -1); + + DosWaitEventSem(sem->event, -1); + + DosRequestMutexSem(sem->count_mutex, -1); + + sem->count--; + if (sem->count == 0) + { + ULONG post_count; + + DosResetEventSem(sem->event, &post_count); + } + + DosReleaseMutexSem(sem->count_mutex); + + DosReleaseMutexSem(sem->wait_mutex); + + return 0; +} + +static inline int sem_post(sem_t * sem) +{ + DosRequestMutexSem(sem->count_mutex, -1); + + if (sem->count < 32768) + { + sem->count++; + DosPostEventSem(sem->event); + } + + DosReleaseMutexSem(sem->count_mutex); + + return 0; +} + +static inline int sem_destroy(sem_t * sem) +{ + DosCloseEventSem(sem->event); + DosCloseMutexSem(sem->wait_mutex); + DosCloseMutexSem(sem->count_mutex); + + return 0; +} + +#define thread_sleep(nms) DosSleep(nms) + +#else + +#ifdef __APPLE__ +#define sem_t semaphore_t +#define sem_init(X,Y,Z) semaphore_create(mach_task_self(), X, SYNC_POLICY_FIFO, Z) +#define sem_wait(sem) (semaphore_wait(*sem) ) +#define sem_post(sem) semaphore_signal(*sem) +#define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem) +#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ +#else +#include +#include +#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ +#endif +/* Not Windows. Assume pthreads */ + +#endif + +#if ARCH_X86 || ARCH_X86_64 +#include "vpx_ports/x86.h" +#else +#define x86_pause_hint() +#endif + +#include "vpx_util/vpx_thread.h" + +static INLINE void mutex_lock(pthread_mutex_t *const mutex) { + const int kMaxTryLocks = 4000; + int locked = 0; + int i; + + for (i = 0; i < kMaxTryLocks; ++i) { + if (!pthread_mutex_trylock(mutex)) { + locked = 1; + break; + } + } + + if (!locked) + pthread_mutex_lock(mutex); +} + +static INLINE int protected_read(pthread_mutex_t *const mutex, const int *p) { + int ret; + mutex_lock(mutex); + ret = *p; + pthread_mutex_unlock(mutex); + return ret; +} + +static INLINE void sync_read(pthread_mutex_t *const mutex, int mb_col, + const int *last_row_current_mb_col, + const int nsync) { + while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) { + x86_pause_hint(); + thread_sleep(0); + } +} + +static INLINE void protected_write(pthread_mutex_t *mutex, int *p, int v) { + mutex_lock(mutex); + *p = v; + pthread_mutex_unlock(mutex); +} + +#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_THREADING_H_ diff --git a/thirdparty/libvpx/vp8/common/treecoder.c b/thirdparty/libvpx/vp8/common/treecoder.c new file mode 100644 index 00000000000..d80c64bdfa1 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/treecoder.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#if CONFIG_DEBUG +#include +#endif +#include + +#include "treecoder.h" + +static void tree2tok( + struct vp8_token_struct *const p, + vp8_tree t, + int i, + int v, + int L +) +{ + v += v; + ++L; + + do + { + const vp8_tree_index j = t[i++]; + + if (j <= 0) + { + p[-j].value = v; + p[-j].Len = L; + } + else + tree2tok(p, t, j, v, L); + } + while (++v & 1); +} + +void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t) +{ + tree2tok(p, t, 0, 0, 0); +} + +void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t, + int offset) +{ + tree2tok(p - offset, t, 0, 0, 0); +} + +static void branch_counts( + int n, /* n = size of alphabet */ + vp8_token tok [ /* n */ ], + vp8_tree tree, + unsigned int branch_ct [ /* n-1 */ ] [2], + const unsigned int num_events[ /* n */ ] +) +{ + const int tree_len = n - 1; + int t = 0; + +#if CONFIG_DEBUG + assert(tree_len); +#endif + + do + { + branch_ct[t][0] = branch_ct[t][1] = 0; + } + while (++t < tree_len); + + t = 0; + + do + { + int L = tok[t].Len; + const int enc = tok[t].value; + const unsigned int ct = num_events[t]; + + vp8_tree_index i = 0; + + do + { + const int b = (enc >> --L) & 1; + const int j = i >> 1; +#if CONFIG_DEBUG + assert(j < tree_len && 0 <= L); +#endif + + branch_ct [j] [b] += ct; + i = tree[ i + b]; + } + while (i > 0); + +#if CONFIG_DEBUG + assert(!L); +#endif + } + while (++t < n); + +} + + +void vp8_tree_probs_from_distribution( + int n, /* n = size of alphabet */ + vp8_token tok [ /* n */ ], + vp8_tree tree, + vp8_prob probs [ /* n-1 */ ], + unsigned int branch_ct [ /* n-1 */ ] [2], + const unsigned int num_events[ /* n */ ], + unsigned int Pfac, + int rd +) +{ + const int tree_len = n - 1; + int t = 0; + + branch_counts(n, tok, tree, branch_ct, num_events); + + do + { + const unsigned int *const c = branch_ct[t]; + const unsigned int tot = c[0] + c[1]; + +#if CONFIG_DEBUG + assert(tot < (1 << 24)); /* no overflow below */ +#endif + + if (tot) + { + const unsigned int p = ((c[0] * Pfac) + (rd ? tot >> 1 : 0)) / tot; + probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */ + } + else + probs[t] = vp8_prob_half; + } + while (++t < tree_len); +} diff --git a/thirdparty/libvpx/vp8/common/treecoder.h b/thirdparty/libvpx/vp8/common/treecoder.h new file mode 100644 index 00000000000..d22b7c570cb --- /dev/null +++ b/thirdparty/libvpx/vp8/common/treecoder.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_TREECODER_H_ +#define VP8_COMMON_TREECODER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned char vp8bc_index_t; /* probability index */ + + +typedef unsigned char vp8_prob; + +#define vp8_prob_half ( (vp8_prob) 128) + +typedef signed char vp8_tree_index; +struct bool_coder_spec; + +typedef struct bool_coder_spec bool_coder_spec; +typedef struct bool_writer bool_writer; +typedef struct bool_reader bool_reader; + +typedef const bool_coder_spec c_bool_coder_spec; +typedef const bool_writer c_bool_writer; +typedef const bool_reader c_bool_reader; + + + +# define vp8_complement( x) (255 - x) + + +/* We build coding trees compactly in arrays. + Each node of the tree is a pair of vp8_tree_indices. + Array index often references a corresponding probability table. + Index <= 0 means done encoding/decoding and value = -Index, + Index > 0 means need another bit, specification at index. + Nonnegative indices are always even; processing begins at node 0. */ + +typedef const vp8_tree_index vp8_tree[], *vp8_tree_p; + + +typedef const struct vp8_token_struct +{ + int value; + int Len; +} vp8_token; + +/* Construct encoding array from tree. */ + +void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree); +void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree, + int offset); + + +/* Convert array of token occurrence counts into a table of probabilities + for the associated binary encoding tree. Also writes count of branches + taken for each node on the tree; this facilitiates decisions as to + probability updates. */ + +void vp8_tree_probs_from_distribution( + int n, /* n = size of alphabet */ + vp8_token tok [ /* n */ ], + vp8_tree tree, + vp8_prob probs [ /* n-1 */ ], + unsigned int branch_ct [ /* n-1 */ ] [2], + const unsigned int num_events[ /* n */ ], + unsigned int Pfactor, + int Round +); + +/* Variant of above using coder spec rather than hardwired 8-bit probs. */ + +void vp8bc_tree_probs_from_distribution( + int n, /* n = size of alphabet */ + vp8_token tok [ /* n */ ], + vp8_tree tree, + vp8_prob probs [ /* n-1 */ ], + unsigned int branch_ct [ /* n-1 */ ] [2], + const unsigned int num_events[ /* n */ ], + c_bool_coder_spec *s +); + + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_TREECODER_H_ diff --git a/thirdparty/libvpx/vp8/common/vp8_entropymodedata.h b/thirdparty/libvpx/vp8/common/vp8_entropymodedata.h new file mode 100644 index 00000000000..c4aed498970 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/vp8_entropymodedata.h @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. +*/ + +#ifndef VP8_COMMON_VP8_ENTROPYMODEDATA_H_ +#define VP8_COMMON_VP8_ENTROPYMODEDATA_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/*Generated file, included by entropymode.c*/ + + +const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES] = +{ + { 0, 1 }, + { 2, 2 }, + { 6, 3 }, + { 28, 5 }, + { 30, 5 }, + { 58, 6 }, + { 59, 6 }, + { 62, 6 }, + { 126, 7 }, + { 127, 7 } +}; + +const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES] = +{ + { 0, 1 }, + { 4, 3 }, + { 5, 3 }, + { 6, 3 }, + { 7, 3 } +}; + +const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES] = +{ + { 4, 3 }, + { 5, 3 }, + { 6, 3 }, + { 7, 3 }, + { 0, 1 } +}; + +const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES] = +{ + { 0, 1 }, + { 2, 2 }, + { 6, 3 }, + { 7, 3 } +}; + +const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS] = +{ + { 6, 3 }, + { 7, 3 }, + { 2, 2 }, + { 0, 1 } +}; + +const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS] = +{ + { 2, 2 }, + { 6, 3 }, + { 0, 1 }, + { 14, 4 }, + { 15, 4 } +}; + +const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS] = +{ + { 0, 1 }, + { 2, 2 }, + { 6, 3 }, + { 7, 3 } +}; + +const struct vp8_token_struct vp8_small_mvencodings[8] = +{ + { 0, 3 }, + { 1, 3 }, + { 2, 3 }, + { 3, 3 }, + { 4, 3 }, + { 5, 3 }, + { 6, 3 }, + { 7, 3 } +}; + +const vp8_prob vp8_ymode_prob[VP8_YMODES-1] = +{ + 112, 86, 140, 37 +}; + +const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1] = +{ + 145, 156, 163, 128 +}; + +const vp8_prob vp8_uv_mode_prob[VP8_UV_MODES-1] = +{ + 162, 101, 204 +}; + +const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1] = +{ + 142, 114, 183 +}; + +const vp8_prob vp8_bmode_prob[VP8_BINTRAMODES-1] = +{ + 120, 90, 79, 133, 87, 85, 80, 111, 151 +}; + + + +const vp8_prob vp8_kf_bmode_prob +[VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1] = +{ + { + { 231, 120, 48, 89, 115, 113, 120, 152, 112 }, + { 152, 179, 64, 126, 170, 118, 46, 70, 95 }, + { 175, 69, 143, 80, 85, 82, 72, 155, 103 }, + { 56, 58, 10, 171, 218, 189, 17, 13, 152 }, + { 144, 71, 10, 38, 171, 213, 144, 34, 26 }, + { 114, 26, 17, 163, 44, 195, 21, 10, 173 }, + { 121, 24, 80, 195, 26, 62, 44, 64, 85 }, + { 170, 46, 55, 19, 136, 160, 33, 206, 71 }, + { 63, 20, 8, 114, 114, 208, 12, 9, 226 }, + { 81, 40, 11, 96, 182, 84, 29, 16, 36 } + }, + { + { 134, 183, 89, 137, 98, 101, 106, 165, 148 }, + { 72, 187, 100, 130, 157, 111, 32, 75, 80 }, + { 66, 102, 167, 99, 74, 62, 40, 234, 128 }, + { 41, 53, 9, 178, 241, 141, 26, 8, 107 }, + { 104, 79, 12, 27, 217, 255, 87, 17, 7 }, + { 74, 43, 26, 146, 73, 166, 49, 23, 157 }, + { 65, 38, 105, 160, 51, 52, 31, 115, 128 }, + { 87, 68, 71, 44, 114, 51, 15, 186, 23 }, + { 47, 41, 14, 110, 182, 183, 21, 17, 194 }, + { 66, 45, 25, 102, 197, 189, 23, 18, 22 } + }, + { + { 88, 88, 147, 150, 42, 46, 45, 196, 205 }, + { 43, 97, 183, 117, 85, 38, 35, 179, 61 }, + { 39, 53, 200, 87, 26, 21, 43, 232, 171 }, + { 56, 34, 51, 104, 114, 102, 29, 93, 77 }, + { 107, 54, 32, 26, 51, 1, 81, 43, 31 }, + { 39, 28, 85, 171, 58, 165, 90, 98, 64 }, + { 34, 22, 116, 206, 23, 34, 43, 166, 73 }, + { 68, 25, 106, 22, 64, 171, 36, 225, 114 }, + { 34, 19, 21, 102, 132, 188, 16, 76, 124 }, + { 62, 18, 78, 95, 85, 57, 50, 48, 51 } + }, + { + { 193, 101, 35, 159, 215, 111, 89, 46, 111 }, + { 60, 148, 31, 172, 219, 228, 21, 18, 111 }, + { 112, 113, 77, 85, 179, 255, 38, 120, 114 }, + { 40, 42, 1, 196, 245, 209, 10, 25, 109 }, + { 100, 80, 8, 43, 154, 1, 51, 26, 71 }, + { 88, 43, 29, 140, 166, 213, 37, 43, 154 }, + { 61, 63, 30, 155, 67, 45, 68, 1, 209 }, + { 142, 78, 78, 16, 255, 128, 34, 197, 171 }, + { 41, 40, 5, 102, 211, 183, 4, 1, 221 }, + { 51, 50, 17, 168, 209, 192, 23, 25, 82 } + }, + { + { 125, 98, 42, 88, 104, 85, 117, 175, 82 }, + { 95, 84, 53, 89, 128, 100, 113, 101, 45 }, + { 75, 79, 123, 47, 51, 128, 81, 171, 1 }, + { 57, 17, 5, 71, 102, 57, 53, 41, 49 }, + { 115, 21, 2, 10, 102, 255, 166, 23, 6 }, + { 38, 33, 13, 121, 57, 73, 26, 1, 85 }, + { 41, 10, 67, 138, 77, 110, 90, 47, 114 }, + { 101, 29, 16, 10, 85, 128, 101, 196, 26 }, + { 57, 18, 10, 102, 102, 213, 34, 20, 43 }, + { 117, 20, 15, 36, 163, 128, 68, 1, 26 } + }, + { + { 138, 31, 36, 171, 27, 166, 38, 44, 229 }, + { 67, 87, 58, 169, 82, 115, 26, 59, 179 }, + { 63, 59, 90, 180, 59, 166, 93, 73, 154 }, + { 40, 40, 21, 116, 143, 209, 34, 39, 175 }, + { 57, 46, 22, 24, 128, 1, 54, 17, 37 }, + { 47, 15, 16, 183, 34, 223, 49, 45, 183 }, + { 46, 17, 33, 183, 6, 98, 15, 32, 183 }, + { 65, 32, 73, 115, 28, 128, 23, 128, 205 }, + { 40, 3, 9, 115, 51, 192, 18, 6, 223 }, + { 87, 37, 9, 115, 59, 77, 64, 21, 47 } + }, + { + { 104, 55, 44, 218, 9, 54, 53, 130, 226 }, + { 64, 90, 70, 205, 40, 41, 23, 26, 57 }, + { 54, 57, 112, 184, 5, 41, 38, 166, 213 }, + { 30, 34, 26, 133, 152, 116, 10, 32, 134 }, + { 75, 32, 12, 51, 192, 255, 160, 43, 51 }, + { 39, 19, 53, 221, 26, 114, 32, 73, 255 }, + { 31, 9, 65, 234, 2, 15, 1, 118, 73 }, + { 88, 31, 35, 67, 102, 85, 55, 186, 85 }, + { 56, 21, 23, 111, 59, 205, 45, 37, 192 }, + { 55, 38, 70, 124, 73, 102, 1, 34, 98 } + }, + { + { 102, 61, 71, 37, 34, 53, 31, 243, 192 }, + { 69, 60, 71, 38, 73, 119, 28, 222, 37 }, + { 68, 45, 128, 34, 1, 47, 11, 245, 171 }, + { 62, 17, 19, 70, 146, 85, 55, 62, 70 }, + { 75, 15, 9, 9, 64, 255, 184, 119, 16 }, + { 37, 43, 37, 154, 100, 163, 85, 160, 1 }, + { 63, 9, 92, 136, 28, 64, 32, 201, 85 }, + { 86, 6, 28, 5, 64, 255, 25, 248, 1 }, + { 56, 8, 17, 132, 137, 255, 55, 116, 128 }, + { 58, 15, 20, 82, 135, 57, 26, 121, 40 } + }, + { + { 164, 50, 31, 137, 154, 133, 25, 35, 218 }, + { 51, 103, 44, 131, 131, 123, 31, 6, 158 }, + { 86, 40, 64, 135, 148, 224, 45, 183, 128 }, + { 22, 26, 17, 131, 240, 154, 14, 1, 209 }, + { 83, 12, 13, 54, 192, 255, 68, 47, 28 }, + { 45, 16, 21, 91, 64, 222, 7, 1, 197 }, + { 56, 21, 39, 155, 60, 138, 23, 102, 213 }, + { 85, 26, 85, 85, 128, 128, 32, 146, 171 }, + { 18, 11, 7, 63, 144, 171, 4, 4, 246 }, + { 35, 27, 10, 146, 174, 171, 12, 26, 128 } + }, + { + { 190, 80, 35, 99, 180, 80, 126, 54, 45 }, + { 85, 126, 47, 87, 176, 51, 41, 20, 32 }, + { 101, 75, 128, 139, 118, 146, 116, 128, 85 }, + { 56, 41, 15, 176, 236, 85, 37, 9, 62 }, + { 146, 36, 19, 30, 171, 255, 97, 27, 20 }, + { 71, 30, 17, 119, 118, 255, 17, 18, 138 }, + { 101, 38, 60, 138, 55, 70, 43, 26, 142 }, + { 138, 45, 61, 62, 219, 1, 81, 188, 64 }, + { 32, 41, 20, 117, 151, 142, 20, 21, 163 }, + { 112, 19, 12, 61, 195, 128, 48, 4, 24 } + } +}; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_VP8_ENTROPYMODEDATA_H_ diff --git a/thirdparty/libvpx/vp8/common/vp8_loopfilter.c b/thirdparty/libvpx/vp8/common/vp8_loopfilter.c new file mode 100644 index 00000000000..756ad488f95 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/vp8_loopfilter.c @@ -0,0 +1,661 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#include "loopfilter.h" +#include "onyxc_int.h" +#include "vpx_mem/vpx_mem.h" + + +static void lf_init_lut(loop_filter_info_n *lfi) +{ + int filt_lvl; + + for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++) + { + if (filt_lvl >= 40) + { + lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2; + lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3; + } + else if (filt_lvl >= 20) + { + lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; + lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2; + } + else if (filt_lvl >= 15) + { + lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; + lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1; + } + else + { + lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0; + lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0; + } + } + + lfi->mode_lf_lut[DC_PRED] = 1; + lfi->mode_lf_lut[V_PRED] = 1; + lfi->mode_lf_lut[H_PRED] = 1; + lfi->mode_lf_lut[TM_PRED] = 1; + lfi->mode_lf_lut[B_PRED] = 0; + + lfi->mode_lf_lut[ZEROMV] = 1; + lfi->mode_lf_lut[NEARESTMV] = 2; + lfi->mode_lf_lut[NEARMV] = 2; + lfi->mode_lf_lut[NEWMV] = 2; + lfi->mode_lf_lut[SPLITMV] = 3; + +} + +void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, + int sharpness_lvl) +{ + int i; + + /* For each possible value for the loop filter fill out limits */ + for (i = 0; i <= MAX_LOOP_FILTER; i++) + { + int filt_lvl = i; + int block_inside_limit = 0; + + /* Set loop filter paramaeters that control sharpness. */ + block_inside_limit = filt_lvl >> (sharpness_lvl > 0); + block_inside_limit = block_inside_limit >> (sharpness_lvl > 4); + + if (sharpness_lvl > 0) + { + if (block_inside_limit > (9 - sharpness_lvl)) + block_inside_limit = (9 - sharpness_lvl); + } + + if (block_inside_limit < 1) + block_inside_limit = 1; + + memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH); + memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), SIMD_WIDTH); + memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit), + SIMD_WIDTH); + } +} + +void vp8_loop_filter_init(VP8_COMMON *cm) +{ + loop_filter_info_n *lfi = &cm->lf_info; + int i; + + /* init limits for given sharpness*/ + vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level); + cm->last_sharpness_level = cm->sharpness_level; + + /* init LUT for lvl and hev thr picking */ + lf_init_lut(lfi); + + /* init hev threshold const vectors */ + for(i = 0; i < 4 ; i++) + { + memset(lfi->hev_thr[i], i, SIMD_WIDTH); + } +} + +void vp8_loop_filter_frame_init(VP8_COMMON *cm, + MACROBLOCKD *mbd, + int default_filt_lvl) +{ + int seg, /* segment number */ + ref, /* index in ref_lf_deltas */ + mode; /* index in mode_lf_deltas */ + + loop_filter_info_n *lfi = &cm->lf_info; + + /* update limits if sharpness has changed */ + if(cm->last_sharpness_level != cm->sharpness_level) + { + vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level); + cm->last_sharpness_level = cm->sharpness_level; + } + + for(seg = 0; seg < MAX_MB_SEGMENTS; seg++) + { + int lvl_seg = default_filt_lvl; + int lvl_ref, lvl_mode; + + /* Note the baseline filter values for each segment */ + if (mbd->segmentation_enabled) + { + /* Abs value */ + if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) + { + lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; + } + else /* Delta Value */ + { + lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; + } + lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0; + } + + if (!mbd->mode_ref_lf_delta_enabled) + { + /* we could get rid of this if we assume that deltas are set to + * zero when not in use; encoder always uses deltas + */ + memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 ); + continue; + } + + /* INTRA_FRAME */ + ref = INTRA_FRAME; + + /* Apply delta for reference frame */ + lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref]; + + /* Apply delta for Intra modes */ + mode = 0; /* B_PRED */ + /* Only the split mode BPRED has a further special case */ + lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode]; + /* clamp */ + lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; + + lfi->lvl[seg][ref][mode] = lvl_mode; + + mode = 1; /* all the rest of Intra modes */ + /* clamp */ + lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0; + lfi->lvl[seg][ref][mode] = lvl_mode; + + /* LAST, GOLDEN, ALT */ + for(ref = 1; ref < MAX_REF_FRAMES; ref++) + { + /* Apply delta for reference frame */ + lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref]; + + /* Apply delta for Inter modes */ + for (mode = 1; mode < 4; mode++) + { + lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode]; + /* clamp */ + lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; + + lfi->lvl[seg][ref][mode] = lvl_mode; + } + } + } +} + + +void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr) +{ + int mb_col; + int filter_level; + loop_filter_info_n *lfi_n = &cm->lf_info; + loop_filter_info lfi; + FRAME_TYPE frame_type = cm->frame_type; + + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + int skip_lf = (mode_info_context->mbmi.mode != B_PRED && + mode_info_context->mbmi.mode != SPLITMV && + mode_info_context->mbmi.mb_skip_coeff); + + const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; + const int seg = mode_info_context->mbmi.segment_id; + const int ref_frame = mode_info_context->mbmi.ref_frame; + + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if (filter_level) + { + const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; + lfi.mblim = lfi_n->mblim[filter_level]; + lfi.blim = lfi_n->blim[filter_level]; + lfi.lim = lfi_n->lim[filter_level]; + lfi.hev_thr = lfi_n->hev_thr[hev_index]; + + if (mb_col > 0) + vp8_loop_filter_mbv + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + + if (!skip_lf) + vp8_loop_filter_bv + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_mbh + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + + if (!skip_lf) + vp8_loop_filter_bh + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + } + + y_ptr += 16; + u_ptr += 8; + v_ptr += 8; + + mode_info_context++; /* step to next MB */ + } + +} + +void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr) +{ + int mb_col; + int filter_level; + loop_filter_info_n *lfi_n = &cm->lf_info; + (void)post_uvstride; + + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + int skip_lf = (mode_info_context->mbmi.mode != B_PRED && + mode_info_context->mbmi.mode != SPLITMV && + mode_info_context->mbmi.mb_skip_coeff); + + const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; + const int seg = mode_info_context->mbmi.segment_id; + const int ref_frame = mode_info_context->mbmi.ref_frame; + + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if (filter_level) + { + if (mb_col > 0) + vp8_loop_filter_simple_mbv + (y_ptr, post_ystride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bv + (y_ptr, post_ystride, lfi_n->blim[filter_level]); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_simple_mbh + (y_ptr, post_ystride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bh + (y_ptr, post_ystride, lfi_n->blim[filter_level]); + } + + y_ptr += 16; + u_ptr += 8; + v_ptr += 8; + + mode_info_context++; /* step to next MB */ + } + +} +void vp8_loop_filter_frame(VP8_COMMON *cm, + MACROBLOCKD *mbd, + int frame_type) +{ + YV12_BUFFER_CONFIG *post = cm->frame_to_show; + loop_filter_info_n *lfi_n = &cm->lf_info; + loop_filter_info lfi; + + int mb_row; + int mb_col; + int mb_rows = cm->mb_rows; + int mb_cols = cm->mb_cols; + + int filter_level; + + unsigned char *y_ptr, *u_ptr, *v_ptr; + + /* Point at base of Mb MODE_INFO list */ + const MODE_INFO *mode_info_context = cm->mi; + int post_y_stride = post->y_stride; + int post_uv_stride = post->uv_stride; + + /* Initialize the loop filter for this frame. */ + vp8_loop_filter_frame_init(cm, mbd, cm->filter_level); + + /* Set up the buffer pointers */ + y_ptr = post->y_buffer; + u_ptr = post->u_buffer; + v_ptr = post->v_buffer; + + /* vp8_filter each macro block */ + if (cm->filter_type == NORMAL_LOOPFILTER) + { + for (mb_row = 0; mb_row < mb_rows; mb_row++) + { + for (mb_col = 0; mb_col < mb_cols; mb_col++) + { + int skip_lf = (mode_info_context->mbmi.mode != B_PRED && + mode_info_context->mbmi.mode != SPLITMV && + mode_info_context->mbmi.mb_skip_coeff); + + const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; + const int seg = mode_info_context->mbmi.segment_id; + const int ref_frame = mode_info_context->mbmi.ref_frame; + + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if (filter_level) + { + const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; + lfi.mblim = lfi_n->mblim[filter_level]; + lfi.blim = lfi_n->blim[filter_level]; + lfi.lim = lfi_n->lim[filter_level]; + lfi.hev_thr = lfi_n->hev_thr[hev_index]; + + if (mb_col > 0) + vp8_loop_filter_mbv + (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); + + if (!skip_lf) + vp8_loop_filter_bv + (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_mbh + (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); + + if (!skip_lf) + vp8_loop_filter_bh + (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); + } + + y_ptr += 16; + u_ptr += 8; + v_ptr += 8; + + mode_info_context++; /* step to next MB */ + } + y_ptr += post_y_stride * 16 - post->y_width; + u_ptr += post_uv_stride * 8 - post->uv_width; + v_ptr += post_uv_stride * 8 - post->uv_width; + + mode_info_context++; /* Skip border mb */ + + } + } + else /* SIMPLE_LOOPFILTER */ + { + for (mb_row = 0; mb_row < mb_rows; mb_row++) + { + for (mb_col = 0; mb_col < mb_cols; mb_col++) + { + int skip_lf = (mode_info_context->mbmi.mode != B_PRED && + mode_info_context->mbmi.mode != SPLITMV && + mode_info_context->mbmi.mb_skip_coeff); + + const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; + const int seg = mode_info_context->mbmi.segment_id; + const int ref_frame = mode_info_context->mbmi.ref_frame; + + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + if (filter_level) + { + const unsigned char * mblim = lfi_n->mblim[filter_level]; + const unsigned char * blim = lfi_n->blim[filter_level]; + + if (mb_col > 0) + vp8_loop_filter_simple_mbv + (y_ptr, post_y_stride, mblim); + + if (!skip_lf) + vp8_loop_filter_simple_bv + (y_ptr, post_y_stride, blim); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_simple_mbh + (y_ptr, post_y_stride, mblim); + + if (!skip_lf) + vp8_loop_filter_simple_bh + (y_ptr, post_y_stride, blim); + } + + y_ptr += 16; + u_ptr += 8; + v_ptr += 8; + + mode_info_context++; /* step to next MB */ + } + y_ptr += post_y_stride * 16 - post->y_width; + u_ptr += post_uv_stride * 8 - post->uv_width; + v_ptr += post_uv_stride * 8 - post->uv_width; + + mode_info_context++; /* Skip border mb */ + + } + } +} + +void vp8_loop_filter_frame_yonly +( + VP8_COMMON *cm, + MACROBLOCKD *mbd, + int default_filt_lvl +) +{ + YV12_BUFFER_CONFIG *post = cm->frame_to_show; + + unsigned char *y_ptr; + int mb_row; + int mb_col; + + loop_filter_info_n *lfi_n = &cm->lf_info; + loop_filter_info lfi; + + int filter_level; + FRAME_TYPE frame_type = cm->frame_type; + + /* Point at base of Mb MODE_INFO list */ + const MODE_INFO *mode_info_context = cm->mi; + +#if 0 + if(default_filt_lvl == 0) /* no filter applied */ + return; +#endif + + /* Initialize the loop filter for this frame. */ + vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl); + + /* Set up the buffer pointers */ + y_ptr = post->y_buffer; + + /* vp8_filter each macro block */ + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) + { + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + int skip_lf = (mode_info_context->mbmi.mode != B_PRED && + mode_info_context->mbmi.mode != SPLITMV && + mode_info_context->mbmi.mb_skip_coeff); + + const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; + const int seg = mode_info_context->mbmi.segment_id; + const int ref_frame = mode_info_context->mbmi.ref_frame; + + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if (filter_level) + { + if (cm->filter_type == NORMAL_LOOPFILTER) + { + const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; + lfi.mblim = lfi_n->mblim[filter_level]; + lfi.blim = lfi_n->blim[filter_level]; + lfi.lim = lfi_n->lim[filter_level]; + lfi.hev_thr = lfi_n->hev_thr[hev_index]; + + if (mb_col > 0) + vp8_loop_filter_mbv + (y_ptr, 0, 0, post->y_stride, 0, &lfi); + + if (!skip_lf) + vp8_loop_filter_bv + (y_ptr, 0, 0, post->y_stride, 0, &lfi); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_mbh + (y_ptr, 0, 0, post->y_stride, 0, &lfi); + + if (!skip_lf) + vp8_loop_filter_bh + (y_ptr, 0, 0, post->y_stride, 0, &lfi); + } + else + { + if (mb_col > 0) + vp8_loop_filter_simple_mbv + (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bv + (y_ptr, post->y_stride, lfi_n->blim[filter_level]); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_simple_mbh + (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bh + (y_ptr, post->y_stride, lfi_n->blim[filter_level]); + } + } + + y_ptr += 16; + mode_info_context ++; /* step to next MB */ + + } + + y_ptr += post->y_stride * 16 - post->y_width; + mode_info_context ++; /* Skip border mb */ + } + +} + +void vp8_loop_filter_partial_frame +( + VP8_COMMON *cm, + MACROBLOCKD *mbd, + int default_filt_lvl +) +{ + YV12_BUFFER_CONFIG *post = cm->frame_to_show; + + unsigned char *y_ptr; + int mb_row; + int mb_col; + int mb_cols = post->y_width >> 4; + int mb_rows = post->y_height >> 4; + + int linestocopy; + + loop_filter_info_n *lfi_n = &cm->lf_info; + loop_filter_info lfi; + + int filter_level; + FRAME_TYPE frame_type = cm->frame_type; + + const MODE_INFO *mode_info_context; + +#if 0 + if(default_filt_lvl == 0) /* no filter applied */ + return; +#endif + + /* Initialize the loop filter for this frame. */ + vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl); + + /* number of MB rows to use in partial filtering */ + linestocopy = mb_rows / PARTIAL_FRAME_FRACTION; + linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */ + + /* Set up the buffer pointers; partial image starts at ~middle of frame */ + y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride; + mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); + + /* vp8_filter each macro block */ + for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++) + { + for (mb_col = 0; mb_col < mb_cols; mb_col++) + { + int skip_lf = (mode_info_context->mbmi.mode != B_PRED && + mode_info_context->mbmi.mode != SPLITMV && + mode_info_context->mbmi.mb_skip_coeff); + + const int mode_index = + lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; + const int seg = mode_info_context->mbmi.segment_id; + const int ref_frame = mode_info_context->mbmi.ref_frame; + + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if (filter_level) + { + if (cm->filter_type == NORMAL_LOOPFILTER) + { + const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; + lfi.mblim = lfi_n->mblim[filter_level]; + lfi.blim = lfi_n->blim[filter_level]; + lfi.lim = lfi_n->lim[filter_level]; + lfi.hev_thr = lfi_n->hev_thr[hev_index]; + + if (mb_col > 0) + vp8_loop_filter_mbv + (y_ptr, 0, 0, post->y_stride, 0, &lfi); + + if (!skip_lf) + vp8_loop_filter_bv + (y_ptr, 0, 0, post->y_stride, 0, &lfi); + + vp8_loop_filter_mbh + (y_ptr, 0, 0, post->y_stride, 0, &lfi); + + if (!skip_lf) + vp8_loop_filter_bh + (y_ptr, 0, 0, post->y_stride, 0, &lfi); + } + else + { + if (mb_col > 0) + vp8_loop_filter_simple_mbv + (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bv + (y_ptr, post->y_stride, lfi_n->blim[filter_level]); + + vp8_loop_filter_simple_mbh + (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bh + (y_ptr, post->y_stride, lfi_n->blim[filter_level]); + } + } + + y_ptr += 16; + mode_info_context += 1; /* step to next MB */ + } + + y_ptr += post->y_stride * 16 - post->y_width; + mode_info_context += 1; /* Skip border mb */ + } +} diff --git a/thirdparty/libvpx/vp8/common/x86/copy_sse2.asm b/thirdparty/libvpx/vp8/common/x86/copy_sse2.asm new file mode 100644 index 00000000000..86fae269563 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/copy_sse2.asm @@ -0,0 +1,93 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + + +;void vp8_copy32xn_sse2( +; unsigned char *src_ptr, +; int src_stride, +; unsigned char *dst_ptr, +; int dst_stride, +; int height); +global sym(vp8_copy32xn_sse2) PRIVATE +sym(vp8_copy32xn_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;dst_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;dst_stride + movsxd rcx, dword ptr arg(4) ;height + +.block_copy_sse2_loopx4: + movdqu xmm0, XMMWORD PTR [rsi] + movdqu xmm1, XMMWORD PTR [rsi + 16] + movdqu xmm2, XMMWORD PTR [rsi + rax] + movdqu xmm3, XMMWORD PTR [rsi + rax + 16] + + lea rsi, [rsi+rax*2] + + movdqu xmm4, XMMWORD PTR [rsi] + movdqu xmm5, XMMWORD PTR [rsi + 16] + movdqu xmm6, XMMWORD PTR [rsi + rax] + movdqu xmm7, XMMWORD PTR [rsi + rax + 16] + + lea rsi, [rsi+rax*2] + + movdqa XMMWORD PTR [rdi], xmm0 + movdqa XMMWORD PTR [rdi + 16], xmm1 + movdqa XMMWORD PTR [rdi + rdx], xmm2 + movdqa XMMWORD PTR [rdi + rdx + 16], xmm3 + + lea rdi, [rdi+rdx*2] + + movdqa XMMWORD PTR [rdi], xmm4 + movdqa XMMWORD PTR [rdi + 16], xmm5 + movdqa XMMWORD PTR [rdi + rdx], xmm6 + movdqa XMMWORD PTR [rdi + rdx + 16], xmm7 + + lea rdi, [rdi+rdx*2] + + sub rcx, 4 + cmp rcx, 4 + jge .block_copy_sse2_loopx4 + + cmp rcx, 0 + je .copy_is_done + +.block_copy_sse2_loop: + movdqu xmm0, XMMWORD PTR [rsi] + movdqu xmm1, XMMWORD PTR [rsi + 16] + lea rsi, [rsi+rax] + + movdqa XMMWORD PTR [rdi], xmm0 + movdqa XMMWORD PTR [rdi + 16], xmm1 + lea rdi, [rdi+rdx] + + sub rcx, 1 + jne .block_copy_sse2_loop + +.copy_is_done: + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret diff --git a/thirdparty/libvpx/vp8/common/x86/copy_sse3.asm b/thirdparty/libvpx/vp8/common/x86/copy_sse3.asm new file mode 100644 index 00000000000..d789a40ccf7 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/copy_sse3.asm @@ -0,0 +1,146 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "vpx_ports/x86_abi_support.asm" + +%macro STACK_FRAME_CREATE_X3 0 +%if ABI_IS_32BIT + %define src_ptr rsi + %define src_stride rax + %define ref_ptr rdi + %define ref_stride rdx + %define end_ptr rcx + %define ret_var rbx + %define result_ptr arg(4) + %define max_sad arg(4) + %define height dword ptr arg(4) + push rbp + mov rbp, rsp + push rsi + push rdi + push rbx + + mov rsi, arg(0) ; src_ptr + mov rdi, arg(2) ; ref_ptr + + movsxd rax, dword ptr arg(1) ; src_stride + movsxd rdx, dword ptr arg(3) ; ref_stride +%else + %if LIBVPX_YASM_WIN64 + SAVE_XMM 7, u + %define src_ptr rcx + %define src_stride rdx + %define ref_ptr r8 + %define ref_stride r9 + %define end_ptr r10 + %define ret_var r11 + %define result_ptr [rsp+xmm_stack_space+8+4*8] + %define max_sad [rsp+xmm_stack_space+8+4*8] + %define height dword ptr [rsp+xmm_stack_space+8+4*8] + %else + %define src_ptr rdi + %define src_stride rsi + %define ref_ptr rdx + %define ref_stride rcx + %define end_ptr r9 + %define ret_var r10 + %define result_ptr r8 + %define max_sad r8 + %define height r8 + %endif +%endif + +%endmacro + +%macro STACK_FRAME_DESTROY_X3 0 + %define src_ptr + %define src_stride + %define ref_ptr + %define ref_stride + %define end_ptr + %define ret_var + %define result_ptr + %define max_sad + %define height + +%if ABI_IS_32BIT + pop rbx + pop rdi + pop rsi + pop rbp +%else + %if LIBVPX_YASM_WIN64 + RESTORE_XMM + %endif +%endif + ret +%endmacro + + +;void vp8_copy32xn_sse3( +; unsigned char *src_ptr, +; int src_stride, +; unsigned char *dst_ptr, +; int dst_stride, +; int height); +global sym(vp8_copy32xn_sse3) PRIVATE +sym(vp8_copy32xn_sse3): + + STACK_FRAME_CREATE_X3 + +.block_copy_sse3_loopx4: + lea end_ptr, [src_ptr+src_stride*2] + + movdqu xmm0, XMMWORD PTR [src_ptr] + movdqu xmm1, XMMWORD PTR [src_ptr + 16] + movdqu xmm2, XMMWORD PTR [src_ptr + src_stride] + movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16] + movdqu xmm4, XMMWORD PTR [end_ptr] + movdqu xmm5, XMMWORD PTR [end_ptr + 16] + movdqu xmm6, XMMWORD PTR [end_ptr + src_stride] + movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16] + + lea src_ptr, [src_ptr+src_stride*4] + + lea end_ptr, [ref_ptr+ref_stride*2] + + movdqa XMMWORD PTR [ref_ptr], xmm0 + movdqa XMMWORD PTR [ref_ptr + 16], xmm1 + movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2 + movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3 + movdqa XMMWORD PTR [end_ptr], xmm4 + movdqa XMMWORD PTR [end_ptr + 16], xmm5 + movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6 + movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7 + + lea ref_ptr, [ref_ptr+ref_stride*4] + + sub height, 4 + cmp height, 4 + jge .block_copy_sse3_loopx4 + + ;Check to see if there is more rows need to be copied. + cmp height, 0 + je .copy_is_done + +.block_copy_sse3_loop: + movdqu xmm0, XMMWORD PTR [src_ptr] + movdqu xmm1, XMMWORD PTR [src_ptr + 16] + lea src_ptr, [src_ptr+src_stride] + + movdqa XMMWORD PTR [ref_ptr], xmm0 + movdqa XMMWORD PTR [ref_ptr + 16], xmm1 + lea ref_ptr, [ref_ptr+ref_stride] + + sub height, 1 + jne .block_copy_sse3_loop + +.copy_is_done: + STACK_FRAME_DESTROY_X3 diff --git a/thirdparty/libvpx/vp8/common/x86/dequantize_mmx.asm b/thirdparty/libvpx/vp8/common/x86/dequantize_mmx.asm new file mode 100644 index 00000000000..4e551f00aa6 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/dequantize_mmx.asm @@ -0,0 +1,258 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + + +;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q) +global sym(vp8_dequantize_b_impl_mmx) PRIVATE +sym(vp8_dequantize_b_impl_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 3 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;sq + mov rdi, arg(1) ;dq + mov rax, arg(2) ;q + + movq mm1, [rsi] + pmullw mm1, [rax+0] ; mm4 *= kernel 0 modifiers. + movq [rdi], mm1 + + movq mm1, [rsi+8] + pmullw mm1, [rax+8] ; mm4 *= kernel 0 modifiers. + movq [rdi+8], mm1 + + movq mm1, [rsi+16] + pmullw mm1, [rax+16] ; mm4 *= kernel 0 modifiers. + movq [rdi+16], mm1 + + movq mm1, [rsi+24] + pmullw mm1, [rax+24] ; mm4 *= kernel 0 modifiers. + movq [rdi+24], mm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void dequant_idct_add_mmx( +;short *input, 0 +;short *dq, 1 +;unsigned char *dest, 2 +;int stride) 3 +global sym(vp8_dequant_idct_add_mmx) PRIVATE +sym(vp8_dequant_idct_add_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + GET_GOT rbx + push rdi + ; end prolog + + mov rax, arg(0) ;input + mov rdx, arg(1) ;dq + + + movq mm0, [rax ] + pmullw mm0, [rdx] + + movq mm1, [rax +8] + pmullw mm1, [rdx +8] + + movq mm2, [rax+16] + pmullw mm2, [rdx+16] + + movq mm3, [rax+24] + pmullw mm3, [rdx+24] + + mov rdx, arg(2) ;dest + + pxor mm7, mm7 + + + movq [rax], mm7 + movq [rax+8], mm7 + + movq [rax+16],mm7 + movq [rax+24],mm7 + + + movsxd rdi, dword ptr arg(3) ;stride + + psubw mm0, mm2 ; b1= 0-2 + paddw mm2, mm2 ; + + movq mm5, mm1 + paddw mm2, mm0 ; a1 =0+2 + + pmulhw mm5, [GLOBAL(x_s1sqr2)]; + paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) + + movq mm7, mm3 ; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; + + paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw mm7, mm5 ; c1 + + movq mm5, mm1 + movq mm4, mm3 + + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] + paddw mm5, mm1 + + pmulhw mm3, [GLOBAL(x_s1sqr2)] + paddw mm3, mm4 + + paddw mm3, mm5 ; d1 + movq mm6, mm2 ; a1 + + movq mm4, mm0 ; b1 + paddw mm2, mm3 ;0 + + paddw mm4, mm7 ;1 + psubw mm0, mm7 ;2 + + psubw mm6, mm3 ;3 + + movq mm1, mm2 ; 03 02 01 00 + movq mm3, mm4 ; 23 22 21 20 + + punpcklwd mm1, mm0 ; 11 01 10 00 + punpckhwd mm2, mm0 ; 13 03 12 02 + + punpcklwd mm3, mm6 ; 31 21 30 20 + punpckhwd mm4, mm6 ; 33 23 32 22 + + movq mm0, mm1 ; 11 01 10 00 + movq mm5, mm2 ; 13 03 12 02 + + punpckldq mm0, mm3 ; 30 20 10 00 + punpckhdq mm1, mm3 ; 31 21 11 01 + + punpckldq mm2, mm4 ; 32 22 12 02 + punpckhdq mm5, mm4 ; 33 23 13 03 + + movq mm3, mm5 ; 33 23 13 03 + + psubw mm0, mm2 ; b1= 0-2 + paddw mm2, mm2 ; + + movq mm5, mm1 + paddw mm2, mm0 ; a1 =0+2 + + pmulhw mm5, [GLOBAL(x_s1sqr2)]; + paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) + + movq mm7, mm3 ; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; + + paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw mm7, mm5 ; c1 + + movq mm5, mm1 + movq mm4, mm3 + + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] + paddw mm5, mm1 + + pmulhw mm3, [GLOBAL(x_s1sqr2)] + paddw mm3, mm4 + + paddw mm3, mm5 ; d1 + paddw mm0, [GLOBAL(fours)] + + paddw mm2, [GLOBAL(fours)] + movq mm6, mm2 ; a1 + + movq mm4, mm0 ; b1 + paddw mm2, mm3 ;0 + + paddw mm4, mm7 ;1 + psubw mm0, mm7 ;2 + + psubw mm6, mm3 ;3 + psraw mm2, 3 + + psraw mm0, 3 + psraw mm4, 3 + + psraw mm6, 3 + + movq mm1, mm2 ; 03 02 01 00 + movq mm3, mm4 ; 23 22 21 20 + + punpcklwd mm1, mm0 ; 11 01 10 00 + punpckhwd mm2, mm0 ; 13 03 12 02 + + punpcklwd mm3, mm6 ; 31 21 30 20 + punpckhwd mm4, mm6 ; 33 23 32 22 + + movq mm0, mm1 ; 11 01 10 00 + movq mm5, mm2 ; 13 03 12 02 + + punpckldq mm0, mm3 ; 30 20 10 00 + punpckhdq mm1, mm3 ; 31 21 11 01 + + punpckldq mm2, mm4 ; 32 22 12 02 + punpckhdq mm5, mm4 ; 33 23 13 03 + + pxor mm7, mm7 + + movd mm4, [rdx] + punpcklbw mm4, mm7 + paddsw mm0, mm4 + packuswb mm0, mm7 + movd [rdx], mm0 + + movd mm4, [rdx+rdi] + punpcklbw mm4, mm7 + paddsw mm1, mm4 + packuswb mm1, mm7 + movd [rdx+rdi], mm1 + + movd mm4, [rdx+2*rdi] + punpcklbw mm4, mm7 + paddsw mm2, mm4 + packuswb mm2, mm7 + movd [rdx+rdi*2], mm2 + + add rdx, rdi + + movd mm4, [rdx+2*rdi] + punpcklbw mm4, mm7 + paddsw mm5, mm4 + packuswb mm5, mm7 + movd [rdx+rdi*2], mm5 + + ; begin epilog + pop rdi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +x_s1sqr2: + times 4 dw 0x8A8C +align 16 +x_c1sqr2less1: + times 4 dw 0x4E7B +align 16 +fours: + times 4 dw 0x0004 diff --git a/thirdparty/libvpx/vp8/common/x86/filter_x86.c b/thirdparty/libvpx/vp8/common/x86/filter_x86.c new file mode 100644 index 00000000000..7f496ed7dba --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/filter_x86.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp8/common/x86/filter_x86.h" + +DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) = +{ + { 128, 128, 128, 128, 0, 0, 0, 0 }, + { 112, 112, 112, 112, 16, 16, 16, 16 }, + { 96, 96, 96, 96, 32, 32, 32, 32 }, + { 80, 80, 80, 80, 48, 48, 48, 48 }, + { 64, 64, 64, 64, 64, 64, 64, 64 }, + { 48, 48, 48, 48, 80, 80, 80, 80 }, + { 32, 32, 32, 32, 96, 96, 96, 96 }, + { 16, 16, 16, 16, 112, 112, 112, 112 } +}; + +DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) = +{ + { 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 }, + { 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 }, + { 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 }, + { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }, + { 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 }, + { 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 }, + { 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 } +}; diff --git a/thirdparty/libvpx/vp8/common/x86/filter_x86.h b/thirdparty/libvpx/vp8/common/x86/filter_x86.h new file mode 100644 index 00000000000..d282841bee4 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/filter_x86.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP8_COMMON_X86_FILTER_X86_H_ +#define VP8_COMMON_X86_FILTER_X86_H_ + +#include "vpx_ports/mem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with + * duplicated values */ + +/* duplicated 4x */ +extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]); + +/* duplicated 8x */ +extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_X86_FILTER_X86_H_ diff --git a/thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c b/thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c new file mode 100644 index 00000000000..f2532b34da2 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#include "vp8/common/blockd.h" +#include "vpx_mem/vpx_mem.h" + +extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q); + +void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC) +{ + short *sq = (short *) d->qcoeff; + short *dq = (short *) d->dqcoeff; + + vp8_dequantize_b_impl_mmx(sq, dq, DQC); +} + +void vp8_dequant_idct_add_y_block_mmx + (short *q, short *dq, + unsigned char *dst, int stride, char *eobs) +{ + int i; + + for (i = 0; i < 4; i++) + { + if (eobs[0] > 1) + vp8_dequant_idct_add_mmx (q, dq, dst, stride); + else if (eobs[0] == 1) + { + vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride); + memset(q, 0, 2 * sizeof(q[0])); + } + + if (eobs[1] > 1) + vp8_dequant_idct_add_mmx (q+16, dq, dst+4, stride); + else if (eobs[1] == 1) + { + vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride, + dst+4, stride); + memset(q + 16, 0, 2 * sizeof(q[0])); + } + + if (eobs[2] > 1) + vp8_dequant_idct_add_mmx (q+32, dq, dst+8, stride); + else if (eobs[2] == 1) + { + vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride, + dst+8, stride); + memset(q + 32, 0, 2 * sizeof(q[0])); + } + + if (eobs[3] > 1) + vp8_dequant_idct_add_mmx (q+48, dq, dst+12, stride); + else if (eobs[3] == 1) + { + vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride, + dst+12, stride); + memset(q + 48, 0, 2 * sizeof(q[0])); + } + + q += 64; + dst += 4*stride; + eobs += 4; + } +} + +void vp8_dequant_idct_add_uv_block_mmx + (short *q, short *dq, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) +{ + int i; + + for (i = 0; i < 2; i++) + { + if (eobs[0] > 1) + vp8_dequant_idct_add_mmx (q, dq, dstu, stride); + else if (eobs[0] == 1) + { + vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride); + memset(q, 0, 2 * sizeof(q[0])); + } + + if (eobs[1] > 1) + vp8_dequant_idct_add_mmx (q+16, dq, dstu+4, stride); + else if (eobs[1] == 1) + { + vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride, + dstu+4, stride); + memset(q + 16, 0, 2 * sizeof(q[0])); + } + + q += 32; + dstu += 4*stride; + eobs += 2; + } + + for (i = 0; i < 2; i++) + { + if (eobs[0] > 1) + vp8_dequant_idct_add_mmx (q, dq, dstv, stride); + else if (eobs[0] == 1) + { + vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride); + memset(q, 0, 2 * sizeof(q[0])); + } + + if (eobs[1] > 1) + vp8_dequant_idct_add_mmx (q+16, dq, dstv+4, stride); + else if (eobs[1] == 1) + { + vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride, + dstv+4, stride); + memset(q + 16, 0, 2 * sizeof(q[0])); + } + + q += 32; + dstv += 4*stride; + eobs += 2; + } +} diff --git a/thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c b/thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c new file mode 100644 index 00000000000..ae96ec858ce --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_config.h" +#include "vp8_rtcd.h" + +void vp8_idct_dequant_0_2x_sse2 + (short *q, short *dq , + unsigned char *dst, int dst_stride); +void vp8_idct_dequant_full_2x_sse2 + (short *q, short *dq , + unsigned char *dst, int dst_stride); + +void vp8_dequant_idct_add_y_block_sse2 + (short *q, short *dq, + unsigned char *dst, int stride, char *eobs) +{ + int i; + + for (i = 0; i < 4; i++) + { + if (((short *)(eobs))[0]) + { + if (((short *)(eobs))[0] & 0xfefe) + vp8_idct_dequant_full_2x_sse2 (q, dq, dst, stride); + else + vp8_idct_dequant_0_2x_sse2 (q, dq, dst, stride); + } + if (((short *)(eobs))[1]) + { + if (((short *)(eobs))[1] & 0xfefe) + vp8_idct_dequant_full_2x_sse2 (q+32, dq, dst+8, stride); + else + vp8_idct_dequant_0_2x_sse2 (q+32, dq, dst+8, stride); + } + q += 64; + dst += stride*4; + eobs += 4; + } +} + +void vp8_dequant_idct_add_uv_block_sse2 + (short *q, short *dq, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) +{ + if (((short *)(eobs))[0]) + { + if (((short *)(eobs))[0] & 0xfefe) + vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride); + else + vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride); + } + q += 32; + dstu += stride*4; + + if (((short *)(eobs))[1]) + { + if (((short *)(eobs))[1] & 0xfefe) + vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride); + else + vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride); + } + q += 32; + + if (((short *)(eobs))[2]) + { + if (((short *)(eobs))[2] & 0xfefe) + vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride); + else + vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride); + } + q += 32; + dstv += stride*4; + + if (((short *)(eobs))[3]) + { + if (((short *)(eobs))[3] & 0xfefe) + vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride); + else + vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride); + } +} diff --git a/thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm b/thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm new file mode 100644 index 00000000000..96fa2c60d0f --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm @@ -0,0 +1,295 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +; /**************************************************************************** +; * Notes: +; * +; * This implementation makes use of 16 bit fixed point version of two multiply +; * constants: +; * 1. sqrt(2) * cos (pi/8) +; * 2. sqrt(2) * sin (pi/8) +; * Because the first constant is bigger than 1, to maintain the same 16 bit +; * fixed point precision as the second one, we use a trick of +; * x * a = x + x*(a-1) +; * so +; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). +; * +; * For the second constant, because of the 16bit version is 35468, which +; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative +; * number. +; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x +; * +; **************************************************************************/ + + +;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, +;int pitch, unsigned char *dest,int stride) +global sym(vp8_short_idct4x4llm_mmx) PRIVATE +sym(vp8_short_idct4x4llm_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rax, arg(0) ;input + mov rsi, arg(1) ;pred + + movq mm0, [rax ] + movq mm1, [rax+ 8] + movq mm2, [rax+16] + movq mm3, [rax+24] + +%if 0 + pxor mm7, mm7 + movq [rax], mm7 + movq [rax+8], mm7 + movq [rax+16],mm7 + movq [rax+24],mm7 +%endif + movsxd rax, dword ptr arg(2) ;pitch + mov rdx, arg(3) ;dest + movsxd rdi, dword ptr arg(4) ;stride + + + psubw mm0, mm2 ; b1= 0-2 + paddw mm2, mm2 ; + + movq mm5, mm1 + paddw mm2, mm0 ; a1 =0+2 + + pmulhw mm5, [GLOBAL(x_s1sqr2)]; + paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) + + movq mm7, mm3 ; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; + + paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw mm7, mm5 ; c1 + + movq mm5, mm1 + movq mm4, mm3 + + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] + paddw mm5, mm1 + + pmulhw mm3, [GLOBAL(x_s1sqr2)] + paddw mm3, mm4 + + paddw mm3, mm5 ; d1 + movq mm6, mm2 ; a1 + + movq mm4, mm0 ; b1 + paddw mm2, mm3 ;0 + + paddw mm4, mm7 ;1 + psubw mm0, mm7 ;2 + + psubw mm6, mm3 ;3 + + movq mm1, mm2 ; 03 02 01 00 + movq mm3, mm4 ; 23 22 21 20 + + punpcklwd mm1, mm0 ; 11 01 10 00 + punpckhwd mm2, mm0 ; 13 03 12 02 + + punpcklwd mm3, mm6 ; 31 21 30 20 + punpckhwd mm4, mm6 ; 33 23 32 22 + + movq mm0, mm1 ; 11 01 10 00 + movq mm5, mm2 ; 13 03 12 02 + + punpckldq mm0, mm3 ; 30 20 10 00 + punpckhdq mm1, mm3 ; 31 21 11 01 + + punpckldq mm2, mm4 ; 32 22 12 02 + punpckhdq mm5, mm4 ; 33 23 13 03 + + movq mm3, mm5 ; 33 23 13 03 + + psubw mm0, mm2 ; b1= 0-2 + paddw mm2, mm2 ; + + movq mm5, mm1 + paddw mm2, mm0 ; a1 =0+2 + + pmulhw mm5, [GLOBAL(x_s1sqr2)]; + paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) + + movq mm7, mm3 ; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; + + paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw mm7, mm5 ; c1 + + movq mm5, mm1 + movq mm4, mm3 + + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] + paddw mm5, mm1 + + pmulhw mm3, [GLOBAL(x_s1sqr2)] + paddw mm3, mm4 + + paddw mm3, mm5 ; d1 + paddw mm0, [GLOBAL(fours)] + + paddw mm2, [GLOBAL(fours)] + movq mm6, mm2 ; a1 + + movq mm4, mm0 ; b1 + paddw mm2, mm3 ;0 + + paddw mm4, mm7 ;1 + psubw mm0, mm7 ;2 + + psubw mm6, mm3 ;3 + psraw mm2, 3 + + psraw mm0, 3 + psraw mm4, 3 + + psraw mm6, 3 + + movq mm1, mm2 ; 03 02 01 00 + movq mm3, mm4 ; 23 22 21 20 + + punpcklwd mm1, mm0 ; 11 01 10 00 + punpckhwd mm2, mm0 ; 13 03 12 02 + + punpcklwd mm3, mm6 ; 31 21 30 20 + punpckhwd mm4, mm6 ; 33 23 32 22 + + movq mm0, mm1 ; 11 01 10 00 + movq mm5, mm2 ; 13 03 12 02 + + punpckldq mm0, mm3 ; 30 20 10 00 + punpckhdq mm1, mm3 ; 31 21 11 01 + + punpckldq mm2, mm4 ; 32 22 12 02 + punpckhdq mm5, mm4 ; 33 23 13 03 + + pxor mm7, mm7 + + movd mm4, [rsi] + punpcklbw mm4, mm7 + paddsw mm0, mm4 + packuswb mm0, mm7 + movd [rdx], mm0 + + movd mm4, [rsi+rax] + punpcklbw mm4, mm7 + paddsw mm1, mm4 + packuswb mm1, mm7 + movd [rdx+rdi], mm1 + + movd mm4, [rsi+2*rax] + punpcklbw mm4, mm7 + paddsw mm2, mm4 + packuswb mm2, mm7 + movd [rdx+rdi*2], mm2 + + add rdx, rdi + add rsi, rax + + movd mm4, [rsi+2*rax] + punpcklbw mm4, mm7 + paddsw mm5, mm4 + packuswb mm5, mm7 + movd [rdx+rdi*2], mm5 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_dc_only_idct_add_mmx( +;short input_dc, +;unsigned char *pred_ptr, +;int pred_stride, +;unsigned char *dst_ptr, +;int stride) +global sym(vp8_dc_only_idct_add_mmx) PRIVATE +sym(vp8_dc_only_idct_add_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + GET_GOT rbx + ; end prolog + + movd mm5, arg(0) ;input_dc + mov rax, arg(1) ;pred_ptr + movsxd rdx, dword ptr arg(2) ;pred_stride + + pxor mm0, mm0 + + paddw mm5, [GLOBAL(fours)] + lea rcx, [rdx + rdx*2] + + psraw mm5, 3 + + punpcklwd mm5, mm5 + + punpckldq mm5, mm5 + + movd mm1, [rax] + movd mm2, [rax+rdx] + movd mm3, [rax+2*rdx] + movd mm4, [rax+rcx] + + mov rax, arg(3) ;d -- destination + movsxd rdx, dword ptr arg(4) ;dst_stride + + punpcklbw mm1, mm0 + paddsw mm1, mm5 + packuswb mm1, mm0 ; pack and unpack to saturate + lea rcx, [rdx + rdx*2] + + punpcklbw mm2, mm0 + paddsw mm2, mm5 + packuswb mm2, mm0 ; pack and unpack to saturate + + punpcklbw mm3, mm0 + paddsw mm3, mm5 + packuswb mm3, mm0 ; pack and unpack to saturate + + punpcklbw mm4, mm0 + paddsw mm4, mm5 + packuswb mm4, mm0 ; pack and unpack to saturate + + movd [rax], mm1 + movd [rax+rdx], mm2 + movd [rax+2*rdx], mm3 + movd [rax+rcx], mm4 + + ; begin epilog + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +x_s1sqr2: + times 4 dw 0x8A8C +align 16 +x_c1sqr2less1: + times 4 dw 0x4E7B +align 16 +fours: + times 4 dw 0x0004 diff --git a/thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm b/thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm new file mode 100644 index 00000000000..bf8e2c40210 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm @@ -0,0 +1,708 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void vp8_idct_dequant_0_2x_sse2 +; ( +; short *qcoeff - 0 +; short *dequant - 1 +; unsigned char *dst - 2 +; int dst_stride - 3 +; ) + +global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE +sym(vp8_idct_dequant_0_2x_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + GET_GOT rbx + ; end prolog + + mov rdx, arg(1) ; dequant + mov rax, arg(0) ; qcoeff + + movd xmm4, [rax] + movd xmm5, [rdx] + + pinsrw xmm4, [rax+32], 4 + pinsrw xmm5, [rdx], 4 + + pmullw xmm4, xmm5 + + ; Zero out xmm5, for use unpacking + pxor xmm5, xmm5 + + ; clear coeffs + movd [rax], xmm5 + movd [rax+32], xmm5 +;pshufb + mov rax, arg(2) ; dst + movsxd rdx, dword ptr arg(3) ; dst_stride + + pshuflw xmm4, xmm4, 00000000b + pshufhw xmm4, xmm4, 00000000b + + lea rcx, [rdx + rdx*2] + paddw xmm4, [GLOBAL(fours)] + + psraw xmm4, 3 + + movq xmm0, [rax] + movq xmm1, [rax+rdx] + movq xmm2, [rax+2*rdx] + movq xmm3, [rax+rcx] + + punpcklbw xmm0, xmm5 + punpcklbw xmm1, xmm5 + punpcklbw xmm2, xmm5 + punpcklbw xmm3, xmm5 + + + ; Add to predict buffer + paddw xmm0, xmm4 + paddw xmm1, xmm4 + paddw xmm2, xmm4 + paddw xmm3, xmm4 + + ; pack up before storing + packuswb xmm0, xmm5 + packuswb xmm1, xmm5 + packuswb xmm2, xmm5 + packuswb xmm3, xmm5 + + ; store blocks back out + movq [rax], xmm0 + movq [rax + rdx], xmm1 + + lea rax, [rax + 2*rdx] + + movq [rax], xmm2 + movq [rax + rdx], xmm3 + + ; begin epilog + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_idct_dequant_full_2x_sse2 +; ( +; short *qcoeff - 0 +; short *dequant - 1 +; unsigned char *dst - 2 +; int dst_stride - 3 +; ) +global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE +sym(vp8_idct_dequant_full_2x_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ; special case when 2 blocks have 0 or 1 coeffs + ; dc is set as first coeff, so no need to load qcoeff + mov rax, arg(0) ; qcoeff + mov rdx, arg(1) ; dequant + mov rdi, arg(2) ; dst + + + ; Zero out xmm7, for use unpacking + pxor xmm7, xmm7 + + + ; note the transpose of xmm1 and xmm2, necessary for shuffle + ; to spit out sensicle data + movdqa xmm0, [rax] + movdqa xmm2, [rax+16] + movdqa xmm1, [rax+32] + movdqa xmm3, [rax+48] + + ; Clear out coeffs + movdqa [rax], xmm7 + movdqa [rax+16], xmm7 + movdqa [rax+32], xmm7 + movdqa [rax+48], xmm7 + + ; dequantize qcoeff buffer + pmullw xmm0, [rdx] + pmullw xmm2, [rdx+16] + pmullw xmm1, [rdx] + pmullw xmm3, [rdx+16] + movsxd rdx, dword ptr arg(3) ; dst_stride + + ; repack so block 0 row x and block 1 row x are together + movdqa xmm4, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm4, xmm1 + + pshufd xmm0, xmm0, 11011000b + pshufd xmm1, xmm4, 11011000b + + movdqa xmm4, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm4, xmm3 + + pshufd xmm2, xmm2, 11011000b + pshufd xmm3, xmm4, 11011000b + + ; first pass + psubw xmm0, xmm2 ; b1 = 0-2 + paddw xmm2, xmm2 ; + + movdqa xmm5, xmm1 + paddw xmm2, xmm0 ; a1 = 0+2 + + pmulhw xmm5, [GLOBAL(x_s1sqr2)] + lea rcx, [rdx + rdx*2] ;dst_stride * 3 + paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) + + movdqa xmm7, xmm3 + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] + + paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw xmm7, xmm5 ; c1 + + movdqa xmm5, xmm1 + movdqa xmm4, xmm3 + + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] + paddw xmm5, xmm1 + + pmulhw xmm3, [GLOBAL(x_s1sqr2)] + paddw xmm3, xmm4 + + paddw xmm3, xmm5 ; d1 + movdqa xmm6, xmm2 ; a1 + + movdqa xmm4, xmm0 ; b1 + paddw xmm2, xmm3 ;0 + + paddw xmm4, xmm7 ;1 + psubw xmm0, xmm7 ;2 + + psubw xmm6, xmm3 ;3 + + ; transpose for the second pass + movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 + punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 + punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 + + movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 + punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 + punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 + + + movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 + punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 + punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 + + movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 + punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 + punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 + + + movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 + punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 + punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 + + movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 + punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 + punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 + + pshufd xmm0, xmm2, 11011000b + pshufd xmm2, xmm1, 11011000b + + pshufd xmm1, xmm5, 11011000b + pshufd xmm3, xmm7, 11011000b + + ; second pass + psubw xmm0, xmm2 ; b1 = 0-2 + paddw xmm2, xmm2 + + movdqa xmm5, xmm1 + paddw xmm2, xmm0 ; a1 = 0+2 + + pmulhw xmm5, [GLOBAL(x_s1sqr2)] + paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) + + movdqa xmm7, xmm3 + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] + + paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw xmm7, xmm5 ; c1 + + movdqa xmm5, xmm1 + movdqa xmm4, xmm3 + + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] + paddw xmm5, xmm1 + + pmulhw xmm3, [GLOBAL(x_s1sqr2)] + paddw xmm3, xmm4 + + paddw xmm3, xmm5 ; d1 + paddw xmm0, [GLOBAL(fours)] + + paddw xmm2, [GLOBAL(fours)] + movdqa xmm6, xmm2 ; a1 + + movdqa xmm4, xmm0 ; b1 + paddw xmm2, xmm3 ;0 + + paddw xmm4, xmm7 ;1 + psubw xmm0, xmm7 ;2 + + psubw xmm6, xmm3 ;3 + psraw xmm2, 3 + + psraw xmm0, 3 + psraw xmm4, 3 + + psraw xmm6, 3 + + ; transpose to save + movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 + punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 + punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 + + movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 + punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 + punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 + + + movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 + punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 + punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 + + movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 + punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 + punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 + + + movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 + punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 + punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 + + movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 + punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 + punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 + + pshufd xmm0, xmm2, 11011000b + pshufd xmm2, xmm1, 11011000b + + pshufd xmm1, xmm5, 11011000b + pshufd xmm3, xmm7, 11011000b + + pxor xmm7, xmm7 + + ; Load up predict blocks + movq xmm4, [rdi] + movq xmm5, [rdi+rdx] + + punpcklbw xmm4, xmm7 + punpcklbw xmm5, xmm7 + + paddw xmm0, xmm4 + paddw xmm1, xmm5 + + movq xmm4, [rdi+2*rdx] + movq xmm5, [rdi+rcx] + + punpcklbw xmm4, xmm7 + punpcklbw xmm5, xmm7 + + paddw xmm2, xmm4 + paddw xmm3, xmm5 + +.finish: + + ; pack up before storing + packuswb xmm0, xmm7 + packuswb xmm1, xmm7 + packuswb xmm2, xmm7 + packuswb xmm3, xmm7 + + ; store blocks back out + movq [rdi], xmm0 + movq [rdi + rdx], xmm1 + movq [rdi + rdx*2], xmm2 + movq [rdi + rcx], xmm3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_idct_dequant_dc_0_2x_sse2 +; ( +; short *qcoeff - 0 +; short *dequant - 1 +; unsigned char *dst - 2 +; int dst_stride - 3 +; short *dc - 4 +; ) +global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE +sym(vp8_idct_dequant_dc_0_2x_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + GET_GOT rbx + push rdi + ; end prolog + + ; special case when 2 blocks have 0 or 1 coeffs + ; dc is set as first coeff, so no need to load qcoeff + mov rax, arg(0) ; qcoeff + + mov rdi, arg(2) ; dst + mov rdx, arg(4) ; dc + + ; Zero out xmm5, for use unpacking + pxor xmm5, xmm5 + + ; load up 2 dc words here == 2*16 = doubleword + movd xmm4, [rdx] + + movsxd rdx, dword ptr arg(3) ; dst_stride + lea rcx, [rdx + rdx*2] + ; Load up predict blocks + movq xmm0, [rdi] + movq xmm1, [rdi+rdx*1] + movq xmm2, [rdi+rdx*2] + movq xmm3, [rdi+rcx] + + ; Duplicate and expand dc across + punpcklwd xmm4, xmm4 + punpckldq xmm4, xmm4 + + ; Rounding to dequant and downshift + paddw xmm4, [GLOBAL(fours)] + psraw xmm4, 3 + + ; Predict buffer needs to be expanded from bytes to words + punpcklbw xmm0, xmm5 + punpcklbw xmm1, xmm5 + punpcklbw xmm2, xmm5 + punpcklbw xmm3, xmm5 + + ; Add to predict buffer + paddw xmm0, xmm4 + paddw xmm1, xmm4 + paddw xmm2, xmm4 + paddw xmm3, xmm4 + + ; pack up before storing + packuswb xmm0, xmm5 + packuswb xmm1, xmm5 + packuswb xmm2, xmm5 + packuswb xmm3, xmm5 + + ; store blocks back out + movq [rdi], xmm0 + movq [rdi + rdx], xmm1 + movq [rdi + rdx*2], xmm2 + movq [rdi + rcx], xmm3 + + ; begin epilog + pop rdi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret +;void vp8_idct_dequant_dc_full_2x_sse2 +; ( +; short *qcoeff - 0 +; short *dequant - 1 +; unsigned char *dst - 2 +; int dst_stride - 3 +; short *dc - 4 +; ) +global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE +sym(vp8_idct_dequant_dc_full_2x_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + SAVE_XMM 7 + GET_GOT rbx + push rdi + ; end prolog + + ; special case when 2 blocks have 0 or 1 coeffs + ; dc is set as first coeff, so no need to load qcoeff + mov rax, arg(0) ; qcoeff + mov rdx, arg(1) ; dequant + + mov rdi, arg(2) ; dst + + ; Zero out xmm7, for use unpacking + pxor xmm7, xmm7 + + + ; note the transpose of xmm1 and xmm2, necessary for shuffle + ; to spit out sensicle data + movdqa xmm0, [rax] + movdqa xmm2, [rax+16] + movdqa xmm1, [rax+32] + movdqa xmm3, [rax+48] + + ; Clear out coeffs + movdqa [rax], xmm7 + movdqa [rax+16], xmm7 + movdqa [rax+32], xmm7 + movdqa [rax+48], xmm7 + + ; dequantize qcoeff buffer + pmullw xmm0, [rdx] + pmullw xmm2, [rdx+16] + pmullw xmm1, [rdx] + pmullw xmm3, [rdx+16] + + ; DC component + mov rdx, arg(4) + + ; repack so block 0 row x and block 1 row x are together + movdqa xmm4, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm4, xmm1 + + pshufd xmm0, xmm0, 11011000b + pshufd xmm1, xmm4, 11011000b + + movdqa xmm4, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm4, xmm3 + + pshufd xmm2, xmm2, 11011000b + pshufd xmm3, xmm4, 11011000b + + ; insert DC component + pinsrw xmm0, [rdx], 0 + pinsrw xmm0, [rdx+2], 4 + + ; first pass + psubw xmm0, xmm2 ; b1 = 0-2 + paddw xmm2, xmm2 ; + + movdqa xmm5, xmm1 + paddw xmm2, xmm0 ; a1 = 0+2 + + pmulhw xmm5, [GLOBAL(x_s1sqr2)] + paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) + + movdqa xmm7, xmm3 + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] + + paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw xmm7, xmm5 ; c1 + + movdqa xmm5, xmm1 + movdqa xmm4, xmm3 + + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] + paddw xmm5, xmm1 + + pmulhw xmm3, [GLOBAL(x_s1sqr2)] + paddw xmm3, xmm4 + + paddw xmm3, xmm5 ; d1 + movdqa xmm6, xmm2 ; a1 + + movdqa xmm4, xmm0 ; b1 + paddw xmm2, xmm3 ;0 + + paddw xmm4, xmm7 ;1 + psubw xmm0, xmm7 ;2 + + psubw xmm6, xmm3 ;3 + + ; transpose for the second pass + movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 + punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 + punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 + + movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 + punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 + punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 + + + movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 + punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 + punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 + + movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 + punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 + punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 + + + movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 + punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 + punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 + + movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 + punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 + punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 + + pshufd xmm0, xmm2, 11011000b + pshufd xmm2, xmm1, 11011000b + + pshufd xmm1, xmm5, 11011000b + pshufd xmm3, xmm7, 11011000b + + ; second pass + psubw xmm0, xmm2 ; b1 = 0-2 + paddw xmm2, xmm2 + + movdqa xmm5, xmm1 + paddw xmm2, xmm0 ; a1 = 0+2 + + pmulhw xmm5, [GLOBAL(x_s1sqr2)] + paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) + + movdqa xmm7, xmm3 + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] + + paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw xmm7, xmm5 ; c1 + + movdqa xmm5, xmm1 + movdqa xmm4, xmm3 + + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] + paddw xmm5, xmm1 + + pmulhw xmm3, [GLOBAL(x_s1sqr2)] + paddw xmm3, xmm4 + + paddw xmm3, xmm5 ; d1 + paddw xmm0, [GLOBAL(fours)] + + paddw xmm2, [GLOBAL(fours)] + movdqa xmm6, xmm2 ; a1 + + movdqa xmm4, xmm0 ; b1 + paddw xmm2, xmm3 ;0 + + paddw xmm4, xmm7 ;1 + psubw xmm0, xmm7 ;2 + + psubw xmm6, xmm3 ;3 + psraw xmm2, 3 + + psraw xmm0, 3 + psraw xmm4, 3 + + psraw xmm6, 3 + + ; transpose to save + movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 + punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 + punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 + + movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 + punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 + punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 + + + movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 + punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 + punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 + + movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 + punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 + punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 + + + movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 + punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 + punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 + + movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 + punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 + punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 + + pshufd xmm0, xmm2, 11011000b + pshufd xmm2, xmm1, 11011000b + + pshufd xmm1, xmm5, 11011000b + pshufd xmm3, xmm7, 11011000b + + pxor xmm7, xmm7 + + ; Load up predict blocks + movsxd rdx, dword ptr arg(3) ; dst_stride + movq xmm4, [rdi] + movq xmm5, [rdi+rdx] + lea rcx, [rdx + rdx*2] + + punpcklbw xmm4, xmm7 + punpcklbw xmm5, xmm7 + + paddw xmm0, xmm4 + paddw xmm1, xmm5 + + movq xmm4, [rdi+rdx*2] + movq xmm5, [rdi+rcx] + + punpcklbw xmm4, xmm7 + punpcklbw xmm5, xmm7 + + paddw xmm2, xmm4 + paddw xmm3, xmm5 + +.finish: + + ; pack up before storing + packuswb xmm0, xmm7 + packuswb xmm1, xmm7 + packuswb xmm2, xmm7 + packuswb xmm3, xmm7 + + ; Load destination stride before writing out, + ; doesn't need to persist + movsxd rdx, dword ptr arg(3) ; dst_stride + + ; store blocks back out + movq [rdi], xmm0 + movq [rdi + rdx], xmm1 + + lea rdi, [rdi + 2*rdx] + + movq [rdi], xmm2 + movq [rdi + rdx], xmm3 + + + ; begin epilog + pop rdi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +fours: + times 8 dw 0x0004 +align 16 +x_s1sqr2: + times 8 dw 0x8A8C +align 16 +x_c1sqr2less1: + times 8 dw 0x4E7B diff --git a/thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm b/thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm new file mode 100644 index 00000000000..158c3b74583 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm @@ -0,0 +1,140 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void vp8_short_inv_walsh4x4_mmx(short *input, short *output) +global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE +sym(vp8_short_inv_walsh4x4_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 2 + ; end prolog + + mov rdx, arg(0) + mov rax, 30003h + + movq mm0, [rdx + 0] ;ip[0] + movq mm1, [rdx + 8] ;ip[4] + movq mm7, rax + + movq mm2, [rdx + 16] ;ip[8] + movq mm3, [rdx + 24] ;ip[12] + punpcklwd mm7, mm7 ;0003000300030003h + mov rdx, arg(1) + + movq mm4, mm0 + movq mm5, mm1 + + paddw mm4, mm3 ;ip[0] + ip[12] aka al + paddw mm5, mm2 ;ip[4] + ip[8] aka bl + + movq mm6, mm4 ;temp al + paddw mm4, mm5 ;al + bl + psubw mm6, mm5 ;al - bl + + psubw mm0, mm3 ;ip[0] - ip[12] aka d1 + psubw mm1, mm2 ;ip[4] - ip[8] aka c1 + + movq mm5, mm0 ;temp dl + paddw mm0, mm1 ;dl + cl + psubw mm5, mm1 ;dl - cl + + ; 03 02 01 00 + ; 13 12 11 10 + ; 23 22 21 20 + ; 33 32 31 30 + + movq mm3, mm4 ; 03 02 01 00 + punpcklwd mm4, mm0 ; 11 01 10 00 + punpckhwd mm3, mm0 ; 13 03 12 02 + + movq mm1, mm6 ; 23 22 21 20 + punpcklwd mm6, mm5 ; 31 21 30 20 + punpckhwd mm1, mm5 ; 33 23 32 22 + + movq mm0, mm4 ; 11 01 10 00 + movq mm2, mm3 ; 13 03 12 02 + + punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0] + punpckhdq mm4, mm6 ; 31 21 11 01 aka ip[4] + + punpckldq mm2, mm1 ; 32 22 12 02 aka ip[8] + punpckhdq mm3, mm1 ; 33 23 13 03 aka ip[12] +;~~~~~~~~~~~~~~~~~~~~~ + movq mm1, mm0 + movq mm5, mm4 + paddw mm1, mm3 ;ip[0] + ip[12] aka al + paddw mm5, mm2 ;ip[4] + ip[8] aka bl + + movq mm6, mm1 ;temp al + paddw mm1, mm5 ;al + bl + psubw mm6, mm5 ;al - bl + paddw mm1, mm7 + paddw mm6, mm7 + psraw mm1, 3 + psraw mm6, 3 + + psubw mm0, mm3 ;ip[0] - ip[12] aka d1 + psubw mm4, mm2 ;ip[4] - ip[8] aka c1 + + movq mm5, mm0 ;temp dl + paddw mm0, mm4 ;dl + cl + psubw mm5, mm4 ;dl - cl + paddw mm0, mm7 + paddw mm5, mm7 + psraw mm0, 3 + psraw mm5, 3 +;~~~~~~~~~~~~~~~~~~~~~ + + movd eax, mm1 + movd ecx, mm0 + psrlq mm0, 32 + psrlq mm1, 32 + mov word ptr[rdx+32*0], ax + mov word ptr[rdx+32*1], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*4], ax + mov word ptr[rdx+32*5], cx + movd eax, mm1 + movd ecx, mm0 + mov word ptr[rdx+32*8], ax + mov word ptr[rdx+32*9], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*12], ax + mov word ptr[rdx+32*13], cx + + movd eax, mm6 + movd ecx, mm5 + psrlq mm5, 32 + psrlq mm6, 32 + mov word ptr[rdx+32*2], ax + mov word ptr[rdx+32*3], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*6], ax + mov word ptr[rdx+32*7], cx + movd eax, mm6 + movd ecx, mm5 + mov word ptr[rdx+32*10], ax + mov word ptr[rdx+32*11], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*14], ax + mov word ptr[rdx+32*15], cx + + ; begin epilog + UNSHADOW_ARGS + pop rbp + ret + diff --git a/thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm b/thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm new file mode 100644 index 00000000000..06e86a80b69 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm @@ -0,0 +1,121 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void vp8_short_inv_walsh4x4_sse2(short *input, short *output) +global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE +sym(vp8_short_inv_walsh4x4_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 2 + ; end prolog + + mov rcx, arg(0) + mov rdx, arg(1) + mov rax, 30003h + + movdqa xmm0, [rcx + 0] ;ip[4] ip[0] + movdqa xmm1, [rcx + 16] ;ip[12] ip[8] + + + pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] + movdqa xmm3, xmm0 ;ip[4] ip[0] + + paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 + psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 + + movdqa xmm4, xmm0 + punpcklqdq xmm0, xmm3 ;d1 a1 + punpckhqdq xmm4, xmm3 ;c1 b1 + + movdqa xmm1, xmm4 ;c1 b1 + paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0] + psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] + + ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; 13 12 11 10 03 02 01 00 + ; + ; 33 32 31 30 23 22 21 20 + ; + movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00 + punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00 + punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10 + movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00 + punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00 + punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02 + ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + movd xmm0, eax + pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] + movdqa xmm3, xmm4 ;ip[4] ip[0] + + pshufd xmm0, xmm0, 0 ;03 03 03 03 03 03 03 03 + + paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 + psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 + + movdqa xmm5, xmm4 + punpcklqdq xmm4, xmm3 ;d1 a1 + punpckhqdq xmm5, xmm3 ;c1 b1 + + movdqa xmm1, xmm5 ;c1 b1 + paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0] + psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] + + paddw xmm5, xmm0 + paddw xmm4, xmm0 + psraw xmm5, 3 + psraw xmm4, 3 + + movd eax, xmm5 + movd ecx, xmm4 + psrldq xmm5, 4 + psrldq xmm4, 4 + mov word ptr[rdx+32*0], ax + mov word ptr[rdx+32*2], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*4], ax + mov word ptr[rdx+32*6], cx + movd eax, xmm5 + movd ecx, xmm4 + psrldq xmm5, 4 + psrldq xmm4, 4 + mov word ptr[rdx+32*8], ax + mov word ptr[rdx+32*10], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*12], ax + mov word ptr[rdx+32*14], cx + + movd eax, xmm5 + movd ecx, xmm4 + psrldq xmm5, 4 + psrldq xmm4, 4 + mov word ptr[rdx+32*1], ax + mov word ptr[rdx+32*3], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*5], ax + mov word ptr[rdx+32*7], cx + movd eax, xmm5 + movd ecx, xmm4 + mov word ptr[rdx+32*9], ax + mov word ptr[rdx+32*11], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*13], ax + mov word ptr[rdx+32*15], cx + + ; begin epilog + UNSHADOW_ARGS + pop rbp + ret diff --git a/thirdparty/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm b/thirdparty/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm new file mode 100644 index 00000000000..6d5aaa19db7 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm @@ -0,0 +1,815 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +%macro LF_ABS 2 + ; %1 value not preserved + ; %2 value preserved + ; output in %1 + movdqa scratch1, %2 ; v2 + + psubusb scratch1, %1 ; v2 - v1 + psubusb %1, %2 ; v1 - v2 + por %1, scratch1 ; abs(v2 - v1) +%endmacro + +%macro LF_FILTER_HEV_MASK 8-9 + + LF_ABS %1, %2 ; abs(p3 - p2) + LF_ABS %2, %3 ; abs(p2 - p1) + pmaxub %1, %2 ; accumulate mask +%if %0 == 8 + movdqa scratch2, %3 ; save p1 + LF_ABS scratch2, %4 ; abs(p1 - p0) +%endif + LF_ABS %4, %5 ; abs(p0 - q0) + LF_ABS %5, %6 ; abs(q0 - q1) +%if %0 == 8 + pmaxub %5, scratch2 ; accumulate hev +%else + pmaxub %5, %9 +%endif + pmaxub %1, %5 ; accumulate mask + + LF_ABS %3, %6 ; abs(p1 - q1) + LF_ABS %6, %7 ; abs(q1 - q2) + pmaxub %1, %6 ; accumulate mask + LF_ABS %7, %8 ; abs(q2 - q3) + pmaxub %1, %7 ; accumulate mask + + paddusb %4, %4 ; 2 * abs(p0 - q0) + pand %3, [GLOBAL(tfe)] + psrlw %3, 1 ; abs(p1 - q1) / 2 + paddusb %4, %3 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 + + psubusb %1, [limit] + psubusb %4, [blimit] + por %1, %4 + pcmpeqb %1, zero ; mask + + psubusb %5, [thresh] + pcmpeqb %5, zero ; ~hev +%endmacro + +%macro LF_FILTER 6 + ; %1-%4: p1-q1 + ; %5: mask + ; %6: hev + + movdqa scratch2, %6 ; save hev + + pxor %1, [GLOBAL(t80)] ; ps1 + pxor %4, [GLOBAL(t80)] ; qs1 + movdqa scratch1, %1 + psubsb scratch1, %4 ; signed_char_clamp(ps1 - qs1) + pandn scratch2, scratch1 ; vp8_filter &= hev + + pxor %2, [GLOBAL(t80)] ; ps0 + pxor %3, [GLOBAL(t80)] ; qs0 + movdqa scratch1, %3 + psubsb scratch1, %2 ; qs0 - ps0 + paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) + paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) + paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) + pand %5, scratch2 ; &= mask + + movdqa scratch2, %5 + paddsb %5, [GLOBAL(t4)] ; Filter1 + paddsb scratch2, [GLOBAL(t3)] ; Filter2 + + ; Filter1 >> 3 + movdqa scratch1, zero + pcmpgtb scratch1, %5 + psrlw %5, 3 + pand scratch1, [GLOBAL(te0)] + pand %5, [GLOBAL(t1f)] + por %5, scratch1 + + psubsb %3, %5 ; qs0 - Filter1 + pxor %3, [GLOBAL(t80)] + + ; Filter2 >> 3 + movdqa scratch1, zero + pcmpgtb scratch1, scratch2 + psrlw scratch2, 3 + pand scratch1, [GLOBAL(te0)] + pand scratch2, [GLOBAL(t1f)] + por scratch2, scratch1 + + paddsb %2, scratch2 ; ps0 + Filter2 + pxor %2, [GLOBAL(t80)] + + ; outer tap adjustments + paddsb %5, [GLOBAL(t1)] + movdqa scratch1, zero + pcmpgtb scratch1, %5 + psrlw %5, 1 + pand scratch1, [GLOBAL(t80)] + pand %5, [GLOBAL(t7f)] + por %5, scratch1 + pand %5, %6 ; vp8_filter &= ~hev + + psubsb %4, %5 ; qs1 - vp8_filter + pxor %4, [GLOBAL(t80)] + + paddsb %1, %5 ; ps1 + vp8_filter + pxor %1, [GLOBAL(t80)] +%endmacro + +;void vp8_loop_filter_bh_y_sse2 +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh +;) +global sym(vp8_loop_filter_bh_y_sse2) PRIVATE +sym(vp8_loop_filter_bh_y_sse2): + +%if LIBVPX_YASM_WIN64 + %define src rcx ; src_ptr + %define stride rdx ; src_pixel_step + %define blimit r8 + %define limit r9 + %define thresh r10 + + %define spp rax + %define stride3 r11 + %define stride5 r12 + %define stride7 r13 + + push rbp + mov rbp, rsp + SAVE_XMM 11 + push r12 + push r13 + mov thresh, arg(4) +%else + %define src rdi ; src_ptr + %define stride rsi ; src_pixel_step + %define blimit rdx + %define limit rcx + %define thresh r8 + + %define spp rax + %define stride3 r9 + %define stride5 r10 + %define stride7 r11 +%endif + + %define scratch1 xmm5 + %define scratch2 xmm6 + %define zero xmm7 + + %define i0 [src] + %define i1 [spp] + %define i2 [src + 2 * stride] + %define i3 [spp + 2 * stride] + %define i4 [src + 4 * stride] + %define i5 [spp + 4 * stride] + %define i6 [src + 2 * stride3] + %define i7 [spp + 2 * stride3] + %define i8 [src + 8 * stride] + %define i9 [spp + 8 * stride] + %define i10 [src + 2 * stride5] + %define i11 [spp + 2 * stride5] + %define i12 [src + 4 * stride3] + %define i13 [spp + 4 * stride3] + %define i14 [src + 2 * stride7] + %define i15 [spp + 2 * stride7] + + ; prep work + lea spp, [src + stride] + lea stride3, [stride + 2 * stride] + lea stride5, [stride3 + 2 * stride] + lea stride7, [stride3 + 4 * stride] + pxor zero, zero + + ; load the first set into registers + movdqa xmm0, i0 + movdqa xmm1, i1 + movdqa xmm2, i2 + movdqa xmm3, i3 + movdqa xmm4, i4 + movdqa xmm8, i5 + movdqa xmm9, i6 ; q2, will contain abs(p1-p0) + movdqa xmm10, i7 +LF_FILTER_HEV_MASK xmm0, xmm1, xmm2, xmm3, xmm4, xmm8, xmm9, xmm10 + + movdqa xmm1, i2 + movdqa xmm2, i3 + movdqa xmm3, i4 + movdqa xmm8, i5 +LF_FILTER xmm1, xmm2, xmm3, xmm8, xmm0, xmm4 + movdqa i2, xmm1 + movdqa i3, xmm2 + +; second set + movdqa i4, xmm3 + movdqa i5, xmm8 + + movdqa xmm0, i6 + movdqa xmm1, i7 + movdqa xmm2, i8 + movdqa xmm4, i9 + movdqa xmm10, i10 ; q2, will contain abs(p1-p0) + movdqa xmm11, i11 +LF_FILTER_HEV_MASK xmm3, xmm8, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm9 + + movdqa xmm0, i6 + movdqa xmm1, i7 + movdqa xmm4, i8 + movdqa xmm8, i9 +LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 + movdqa i6, xmm0 + movdqa i7, xmm1 + +; last set + movdqa i8, xmm4 + movdqa i9, xmm8 + + movdqa xmm0, i10 + movdqa xmm1, i11 + movdqa xmm2, i12 + movdqa xmm3, i13 + movdqa xmm9, i14 ; q2, will contain abs(p1-p0) + movdqa xmm11, i15 +LF_FILTER_HEV_MASK xmm4, xmm8, xmm0, xmm1, xmm2, xmm3, xmm9, xmm11, xmm10 + + movdqa xmm0, i10 + movdqa xmm1, i11 + movdqa xmm3, i12 + movdqa xmm8, i13 +LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 + movdqa i10, xmm0 + movdqa i11, xmm1 + movdqa i12, xmm3 + movdqa i13, xmm8 + +%if LIBVPX_YASM_WIN64 + pop r13 + pop r12 + RESTORE_XMM + pop rbp +%endif + + ret + + +;void vp8_loop_filter_bv_y_sse2 +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh +;) + +global sym(vp8_loop_filter_bv_y_sse2) PRIVATE +sym(vp8_loop_filter_bv_y_sse2): + +%if LIBVPX_YASM_WIN64 + %define src rcx ; src_ptr + %define stride rdx ; src_pixel_step + %define blimit r8 + %define limit r9 + %define thresh r10 + + %define spp rax + %define stride3 r11 + %define stride5 r12 + %define stride7 r13 + + push rbp + mov rbp, rsp + SAVE_XMM 15 + push r12 + push r13 + mov thresh, arg(4) +%else + %define src rdi + %define stride rsi + %define blimit rdx + %define limit rcx + %define thresh r8 + + %define spp rax + %define stride3 r9 + %define stride5 r10 + %define stride7 r11 +%endif + + %define scratch1 xmm5 + %define scratch2 xmm6 + %define zero xmm7 + + %define s0 [src] + %define s1 [spp] + %define s2 [src + 2 * stride] + %define s3 [spp + 2 * stride] + %define s4 [src + 4 * stride] + %define s5 [spp + 4 * stride] + %define s6 [src + 2 * stride3] + %define s7 [spp + 2 * stride3] + %define s8 [src + 8 * stride] + %define s9 [spp + 8 * stride] + %define s10 [src + 2 * stride5] + %define s11 [spp + 2 * stride5] + %define s12 [src + 4 * stride3] + %define s13 [spp + 4 * stride3] + %define s14 [src + 2 * stride7] + %define s15 [spp + 2 * stride7] + + %define i0 [rsp] + %define i1 [rsp + 16] + %define i2 [rsp + 32] + %define i3 [rsp + 48] + %define i4 [rsp + 64] + %define i5 [rsp + 80] + %define i6 [rsp + 96] + %define i7 [rsp + 112] + %define i8 [rsp + 128] + %define i9 [rsp + 144] + %define i10 [rsp + 160] + %define i11 [rsp + 176] + %define i12 [rsp + 192] + %define i13 [rsp + 208] + %define i14 [rsp + 224] + %define i15 [rsp + 240] + + ALIGN_STACK 16, rax + + ; reserve stack space + %define temp_storage 0 ; size is 256 (16*16) + %define stack_size 256 + sub rsp, stack_size + + ; prep work + lea spp, [src + stride] + lea stride3, [stride + 2 * stride] + lea stride5, [stride3 + 2 * stride] + lea stride7, [stride3 + 4 * stride] + + ; 8-f + movdqa xmm0, s8 + movdqa xmm1, xmm0 + punpcklbw xmm0, s9 ; 80 90 + punpckhbw xmm1, s9 ; 88 98 + + movdqa xmm2, s10 + movdqa xmm3, xmm2 + punpcklbw xmm2, s11 ; a0 b0 + punpckhbw xmm3, s11 ; a8 b8 + + movdqa xmm4, xmm0 + punpcklwd xmm0, xmm2 ; 80 90 a0 b0 + punpckhwd xmm4, xmm2 ; 84 94 a4 b4 + + movdqa xmm2, xmm1 + punpcklwd xmm1, xmm3 ; 88 98 a8 b8 + punpckhwd xmm2, xmm3 ; 8c 9c ac bc + + ; using xmm[0124] + ; work on next 4 rows + + movdqa xmm3, s12 + movdqa xmm5, xmm3 + punpcklbw xmm3, s13 ; c0 d0 + punpckhbw xmm5, s13 ; c8 d8 + + movdqa xmm6, s14 + movdqa xmm7, xmm6 + punpcklbw xmm6, s15 ; e0 f0 + punpckhbw xmm7, s15 ; e8 f8 + + movdqa xmm8, xmm3 + punpcklwd xmm3, xmm6 ; c0 d0 e0 f0 + punpckhwd xmm8, xmm6 ; c4 d4 e4 f4 + + movdqa xmm6, xmm5 + punpcklwd xmm5, xmm7 ; c8 d8 e8 f8 + punpckhwd xmm6, xmm7 ; cc dc ec fc + + ; pull the third and fourth sets together + + movdqa xmm7, xmm0 + punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0 + punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2 + + movdqa xmm3, xmm4 + punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4 + punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6 + + movdqa xmm8, xmm1 + punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8 + punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa + + movdqa xmm5, xmm2 + punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc + punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe + + ; save the calculations. we only have 15 registers ... + movdqa i0, xmm0 + movdqa i1, xmm7 + movdqa i2, xmm4 + movdqa i3, xmm3 + movdqa i4, xmm1 + movdqa i5, xmm8 + movdqa i6, xmm2 + movdqa i7, xmm5 + + ; 0-7 + movdqa xmm0, s0 + movdqa xmm1, xmm0 + punpcklbw xmm0, s1 ; 00 10 + punpckhbw xmm1, s1 ; 08 18 + + movdqa xmm2, s2 + movdqa xmm3, xmm2 + punpcklbw xmm2, s3 ; 20 30 + punpckhbw xmm3, s3 ; 28 38 + + movdqa xmm4, xmm0 + punpcklwd xmm0, xmm2 ; 00 10 20 30 + punpckhwd xmm4, xmm2 ; 04 14 24 34 + + movdqa xmm2, xmm1 + punpcklwd xmm1, xmm3 ; 08 18 28 38 + punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c + + ; using xmm[0124] + ; work on next 4 rows + + movdqa xmm3, s4 + movdqa xmm5, xmm3 + punpcklbw xmm3, s5 ; 40 50 + punpckhbw xmm5, s5 ; 48 58 + + movdqa xmm6, s6 + movdqa xmm7, xmm6 + punpcklbw xmm6, s7 ; 60 70 + punpckhbw xmm7, s7 ; 68 78 + + movdqa xmm8, xmm3 + punpcklwd xmm3, xmm6 ; 40 50 60 70 + punpckhwd xmm8, xmm6 ; 44 54 64 74 + + movdqa xmm6, xmm5 + punpcklwd xmm5, xmm7 ; 48 58 68 78 + punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c + + ; pull the first two sets together + + movdqa xmm7, xmm0 + punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70 + punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72 + + movdqa xmm3, xmm4 + punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74 + punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76 + + movdqa xmm8, xmm1 + punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78 + punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a + + movdqa xmm5, xmm2 + punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c + punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e + ; final combination + + movdqa xmm6, xmm0 + punpcklqdq xmm0, i0 + punpckhqdq xmm6, i0 + + movdqa xmm9, xmm7 + punpcklqdq xmm7, i1 + punpckhqdq xmm9, i1 + + movdqa xmm10, xmm4 + punpcklqdq xmm4, i2 + punpckhqdq xmm10, i2 + + movdqa xmm11, xmm3 + punpcklqdq xmm3, i3 + punpckhqdq xmm11, i3 + + movdqa xmm12, xmm1 + punpcklqdq xmm1, i4 + punpckhqdq xmm12, i4 + + movdqa xmm13, xmm8 + punpcklqdq xmm8, i5 + punpckhqdq xmm13, i5 + + movdqa xmm14, xmm2 + punpcklqdq xmm2, i6 + punpckhqdq xmm14, i6 + + movdqa xmm15, xmm5 + punpcklqdq xmm5, i7 + punpckhqdq xmm15, i7 + + movdqa i0, xmm0 + movdqa i1, xmm6 + movdqa i2, xmm7 + movdqa i3, xmm9 + movdqa i4, xmm4 + movdqa i5, xmm10 + movdqa i6, xmm3 + movdqa i7, xmm11 + movdqa i8, xmm1 + movdqa i9, xmm12 + movdqa i10, xmm8 + movdqa i11, xmm13 + movdqa i12, xmm2 + movdqa i13, xmm14 + movdqa i14, xmm5 + movdqa i15, xmm15 + +; TRANSPOSED DATA AVAILABLE ON THE STACK + + movdqa xmm12, xmm6 + movdqa xmm13, xmm7 + + pxor zero, zero + +LF_FILTER_HEV_MASK xmm0, xmm12, xmm13, xmm9, xmm4, xmm10, xmm3, xmm11 + + movdqa xmm1, i2 + movdqa xmm2, i3 + movdqa xmm8, i4 + movdqa xmm9, i5 +LF_FILTER xmm1, xmm2, xmm8, xmm9, xmm0, xmm4 + movdqa i2, xmm1 + movdqa i3, xmm2 + +; second set + movdqa i4, xmm8 + movdqa i5, xmm9 + + movdqa xmm0, i6 + movdqa xmm1, i7 + movdqa xmm2, i8 + movdqa xmm4, i9 + movdqa xmm10, i10 ; q2, will contain abs(p1-p0) + movdqa xmm11, i11 +LF_FILTER_HEV_MASK xmm8, xmm9, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm3 + + movdqa xmm0, i6 + movdqa xmm1, i7 + movdqa xmm3, i8 + movdqa xmm4, i9 +LF_FILTER xmm0, xmm1, xmm3, xmm4, xmm8, xmm2 + movdqa i6, xmm0 + movdqa i7, xmm1 + +; last set + movdqa i8, xmm3 + movdqa i9, xmm4 + + movdqa xmm0, i10 + movdqa xmm1, i11 + movdqa xmm2, i12 + movdqa xmm8, i13 + movdqa xmm9, i14 ; q2, will contain abs(p1-p0) + movdqa xmm11, i15 +LF_FILTER_HEV_MASK xmm3, xmm4, xmm0, xmm1, xmm2, xmm8, xmm9, xmm11, xmm10 + + movdqa xmm0, i10 + movdqa xmm1, i11 + movdqa xmm4, i12 + movdqa xmm8, i13 +LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 + movdqa i10, xmm0 + movdqa i11, xmm1 + movdqa i12, xmm4 + movdqa i13, xmm8 + + +; RESHUFFLE AND WRITE OUT + ; 8-f + movdqa xmm0, i8 + movdqa xmm1, xmm0 + punpcklbw xmm0, i9 ; 80 90 + punpckhbw xmm1, i9 ; 88 98 + + movdqa xmm2, i10 + movdqa xmm3, xmm2 + punpcklbw xmm2, i11 ; a0 b0 + punpckhbw xmm3, i11 ; a8 b8 + + movdqa xmm4, xmm0 + punpcklwd xmm0, xmm2 ; 80 90 a0 b0 + punpckhwd xmm4, xmm2 ; 84 94 a4 b4 + + movdqa xmm2, xmm1 + punpcklwd xmm1, xmm3 ; 88 98 a8 b8 + punpckhwd xmm2, xmm3 ; 8c 9c ac bc + + ; using xmm[0124] + ; work on next 4 rows + + movdqa xmm3, i12 + movdqa xmm5, xmm3 + punpcklbw xmm3, i13 ; c0 d0 + punpckhbw xmm5, i13 ; c8 d8 + + movdqa xmm6, i14 + movdqa xmm7, xmm6 + punpcklbw xmm6, i15 ; e0 f0 + punpckhbw xmm7, i15 ; e8 f8 + + movdqa xmm8, xmm3 + punpcklwd xmm3, xmm6 ; c0 d0 e0 f0 + punpckhwd xmm8, xmm6 ; c4 d4 e4 f4 + + movdqa xmm6, xmm5 + punpcklwd xmm5, xmm7 ; c8 d8 e8 f8 + punpckhwd xmm6, xmm7 ; cc dc ec fc + + ; pull the third and fourth sets together + + movdqa xmm7, xmm0 + punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0 + punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2 + + movdqa xmm3, xmm4 + punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4 + punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6 + + movdqa xmm8, xmm1 + punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8 + punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa + + movdqa xmm5, xmm2 + punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc + punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe + + ; save the calculations. we only have 15 registers ... + movdqa i8, xmm0 + movdqa i9, xmm7 + movdqa i10, xmm4 + movdqa i11, xmm3 + movdqa i12, xmm1 + movdqa i13, xmm8 + movdqa i14, xmm2 + movdqa i15, xmm5 + + ; 0-7 + movdqa xmm0, i0 + movdqa xmm1, xmm0 + punpcklbw xmm0, i1 ; 00 10 + punpckhbw xmm1, i1 ; 08 18 + + movdqa xmm2, i2 + movdqa xmm3, xmm2 + punpcklbw xmm2, i3 ; 20 30 + punpckhbw xmm3, i3 ; 28 38 + + movdqa xmm4, xmm0 + punpcklwd xmm0, xmm2 ; 00 10 20 30 + punpckhwd xmm4, xmm2 ; 04 14 24 34 + + movdqa xmm2, xmm1 + punpcklwd xmm1, xmm3 ; 08 18 28 38 + punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c + + ; using xmm[0124] + ; work on next 4 rows + + movdqa xmm3, i4 + movdqa xmm5, xmm3 + punpcklbw xmm3, i5 ; 40 50 + punpckhbw xmm5, i5 ; 48 58 + + movdqa xmm6, i6 + movdqa xmm7, xmm6 + punpcklbw xmm6, i7 ; 60 70 + punpckhbw xmm7, i7 ; 68 78 + + movdqa xmm8, xmm3 + punpcklwd xmm3, xmm6 ; 40 50 60 70 + punpckhwd xmm8, xmm6 ; 44 54 64 74 + + movdqa xmm6, xmm5 + punpcklwd xmm5, xmm7 ; 48 58 68 78 + punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c + + ; pull the first two sets together + + movdqa xmm7, xmm0 + punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70 + punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72 + + movdqa xmm3, xmm4 + punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74 + punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76 + + movdqa xmm8, xmm1 + punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78 + punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a + + movdqa xmm5, xmm2 + punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c + punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e + ; final combination + + movdqa xmm6, xmm0 + punpcklqdq xmm0, i8 + punpckhqdq xmm6, i8 + + movdqa xmm9, xmm7 + punpcklqdq xmm7, i9 + punpckhqdq xmm9, i9 + + movdqa xmm10, xmm4 + punpcklqdq xmm4, i10 + punpckhqdq xmm10, i10 + + movdqa xmm11, xmm3 + punpcklqdq xmm3, i11 + punpckhqdq xmm11, i11 + + movdqa xmm12, xmm1 + punpcklqdq xmm1, i12 + punpckhqdq xmm12, i12 + + movdqa xmm13, xmm8 + punpcklqdq xmm8, i13 + punpckhqdq xmm13, i13 + + movdqa xmm14, xmm2 + punpcklqdq xmm2, i14 + punpckhqdq xmm14, i14 + + movdqa xmm15, xmm5 + punpcklqdq xmm5, i15 + punpckhqdq xmm15, i15 + + movdqa s0, xmm0 + movdqa s1, xmm6 + movdqa s2, xmm7 + movdqa s3, xmm9 + movdqa s4, xmm4 + movdqa s5, xmm10 + movdqa s6, xmm3 + movdqa s7, xmm11 + movdqa s8, xmm1 + movdqa s9, xmm12 + movdqa s10, xmm8 + movdqa s11, xmm13 + movdqa s12, xmm2 + movdqa s13, xmm14 + movdqa s14, xmm5 + movdqa s15, xmm15 + + ; free stack space + add rsp, stack_size + + ; un-ALIGN_STACK + pop rsp + +%if LIBVPX_YASM_WIN64 + pop r13 + pop r12 + RESTORE_XMM + pop rbp +%endif + + ret + +SECTION_RODATA +align 16 +te0: + times 16 db 0xe0 +align 16 +t7f: + times 16 db 0x7f +align 16 +tfe: + times 16 db 0xfe +align 16 +t1f: + times 16 db 0x1f +align 16 +t80: + times 16 db 0x80 +align 16 +t1: + times 16 db 0x01 +align 16 +t3: + times 16 db 0x03 +align 16 +t4: + times 16 db 0x04 diff --git a/thirdparty/libvpx/vp8/common/x86/loopfilter_sse2.asm b/thirdparty/libvpx/vp8/common/x86/loopfilter_sse2.asm new file mode 100644 index 00000000000..1913abc69b0 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/loopfilter_sse2.asm @@ -0,0 +1,1640 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" +%define _t0 0 +%define _t1 _t0 + 16 +%define _p3 _t1 + 16 +%define _p2 _p3 + 16 +%define _p1 _p2 + 16 +%define _p0 _p1 + 16 +%define _q0 _p0 + 16 +%define _q1 _q0 + 16 +%define _q2 _q1 + 16 +%define _q3 _q2 + 16 +%define lf_var_size 160 + +; Use of pmaxub instead of psubusb to compute filter mask was seen +; in ffvp8 + +%macro LFH_FILTER_AND_HEV_MASK 1 +%if %1 + movdqa xmm2, [rdi+2*rax] ; q3 + movdqa xmm1, [rsi+2*rax] ; q2 + movdqa xmm4, [rsi+rax] ; q1 + movdqa xmm5, [rsi] ; q0 + neg rax ; negate pitch to deal with above border +%else + movlps xmm2, [rsi + rcx*2] ; q3 + movlps xmm1, [rsi + rcx] ; q2 + movlps xmm4, [rsi] ; q1 + movlps xmm5, [rsi + rax] ; q0 + + movhps xmm2, [rdi + rcx*2] + movhps xmm1, [rdi + rcx] + movhps xmm4, [rdi] + movhps xmm5, [rdi + rax] + + lea rsi, [rsi + rax*4] + lea rdi, [rdi + rax*4] + + movdqa [rsp+_q2], xmm1 ; store q2 + movdqa [rsp+_q1], xmm4 ; store q1 +%endif + movdqa xmm7, [rdx] ;limit + + movdqa xmm6, xmm1 ; q2 + movdqa xmm3, xmm4 ; q1 + + psubusb xmm1, xmm2 ; q2-=q3 + psubusb xmm2, xmm6 ; q3-=q2 + + psubusb xmm4, xmm6 ; q1-=q2 + psubusb xmm6, xmm3 ; q2-=q1 + + por xmm4, xmm6 ; abs(q2-q1) + por xmm1, xmm2 ; abs(q3-q2) + + movdqa xmm0, xmm5 ; q0 + pmaxub xmm1, xmm4 + + psubusb xmm5, xmm3 ; q0-=q1 + psubusb xmm3, xmm0 ; q1-=q0 + + por xmm5, xmm3 ; abs(q0-q1) + movdqa [rsp+_t0], xmm5 ; save to t0 + + pmaxub xmm1, xmm5 + +%if %1 + movdqa xmm2, [rsi+4*rax] ; p3 + movdqa xmm4, [rdi+4*rax] ; p2 + movdqa xmm6, [rsi+2*rax] ; p1 +%else + movlps xmm2, [rsi + rax] ; p3 + movlps xmm4, [rsi] ; p2 + movlps xmm6, [rsi + rcx] ; p1 + + movhps xmm2, [rdi + rax] + movhps xmm4, [rdi] + movhps xmm6, [rdi + rcx] + + movdqa [rsp+_p2], xmm4 ; store p2 + movdqa [rsp+_p1], xmm6 ; store p1 +%endif + + movdqa xmm5, xmm4 ; p2 + movdqa xmm3, xmm6 ; p1 + + psubusb xmm4, xmm2 ; p2-=p3 + psubusb xmm2, xmm5 ; p3-=p2 + + psubusb xmm3, xmm5 ; p1-=p2 + pmaxub xmm1, xmm4 ; abs(p3 - p2) + + psubusb xmm5, xmm6 ; p2-=p1 + pmaxub xmm1, xmm2 ; abs(p3 - p2) + + pmaxub xmm1, xmm5 ; abs(p2 - p1) + movdqa xmm2, xmm6 ; p1 + + pmaxub xmm1, xmm3 ; abs(p2 - p1) +%if %1 + movdqa xmm4, [rsi+rax] ; p0 + movdqa xmm3, [rdi] ; q1 +%else + movlps xmm4, [rsi + rcx*2] ; p0 + movhps xmm4, [rdi + rcx*2] + movdqa xmm3, [rsp+_q1] ; q1 +%endif + + movdqa xmm5, xmm4 ; p0 + psubusb xmm4, xmm6 ; p0-=p1 + + psubusb xmm6, xmm5 ; p1-=p0 + + por xmm6, xmm4 ; abs(p1 - p0) + mov rdx, arg(2) ; get blimit + + movdqa [rsp+_t1], xmm6 ; save to t1 + + movdqa xmm4, xmm3 ; q1 + pmaxub xmm1, xmm6 + + psubusb xmm3, xmm2 ; q1-=p1 + psubusb xmm2, xmm4 ; p1-=q1 + + psubusb xmm1, xmm7 + por xmm2, xmm3 ; abs(p1-q1) + + movdqa xmm7, [rdx] ; blimit + mov rdx, arg(4) ; hev get thresh + + movdqa xmm3, xmm0 ; q0 + pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero + + movdqa xmm6, xmm5 ; p0 + psrlw xmm2, 1 ; abs(p1-q1)/2 + + psubusb xmm5, xmm3 ; p0-=q0 + psubusb xmm3, xmm6 ; q0-=p0 + por xmm5, xmm3 ; abs(p0 - q0) + + paddusb xmm5, xmm5 ; abs(p0-q0)*2 + + movdqa xmm4, [rsp+_t0] ; hev get abs (q1 - q0) + movdqa xmm3, [rsp+_t1] ; get abs (p1 - p0) + + paddusb xmm5, xmm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + + movdqa xmm2, [rdx] ; hev + + psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit + psubusb xmm4, xmm2 ; hev + + psubusb xmm3, xmm2 ; hev + por xmm1, xmm5 + + pxor xmm7, xmm7 + paddb xmm4, xmm3 ; hev abs(q1 - q0) > thresh || abs(p1 - p0) > thresh + + pcmpeqb xmm4, xmm5 ; hev + pcmpeqb xmm3, xmm3 ; hev + + pcmpeqb xmm1, xmm7 ; mask xmm1 + pxor xmm4, xmm3 ; hev +%endmacro + +%macro B_FILTER 1 + movdqa xmm3, [GLOBAL(t80)] +%if %1 == 0 + movdqa xmm2, [rsp+_p1] ; p1 + movdqa xmm7, [rsp+_q1] ; q1 +%elif %1 == 1 + movdqa xmm2, [rsi+2*rax] ; p1 + movdqa xmm7, [rdi] ; q1 +%elif %1 == 2 + movdqa xmm2, [rsp+_p1] ; p1 + movdqa xmm6, [rsp+_p0] ; p0 + movdqa xmm0, [rsp+_q0] ; q0 + movdqa xmm7, [rsp+_q1] ; q1 +%endif + + pxor xmm2, xmm3 ; p1 offset to convert to signed values + pxor xmm7, xmm3 ; q1 offset to convert to signed values + + psubsb xmm2, xmm7 ; p1 - q1 + pxor xmm6, xmm3 ; offset to convert to signed values + + pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1) + pxor xmm0, xmm3 ; offset to convert to signed values + + movdqa xmm3, xmm0 ; q0 + psubsb xmm0, xmm6 ; q0 - p0 + paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1) + paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1) + paddsb xmm2, xmm0 ; 3 * (q0 - p0) + hvm(p1 - q1) + pand xmm1, xmm2 ; mask filter values we don't care about + + movdqa xmm2, xmm1 + paddsb xmm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 + paddsb xmm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 + + punpckhbw xmm5, xmm2 ; axbxcxdx + punpcklbw xmm2, xmm2 ; exfxgxhx + + punpcklbw xmm0, xmm1 ; exfxgxhx + psraw xmm5, 11 ; sign extended shift right by 3 + + punpckhbw xmm1, xmm1 ; axbxcxdx + psraw xmm2, 11 ; sign extended shift right by 3 + + packsswb xmm2, xmm5 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; + psraw xmm0, 11 ; sign extended shift right by 3 + + psraw xmm1, 11 ; sign extended shift right by 3 + movdqa xmm5, xmm0 ; save results + + packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 + + paddsb xmm6, xmm2 ; p0+= p0 add + + movdqa xmm2, [GLOBAL(ones)] + paddsw xmm5, xmm2 + paddsw xmm1, xmm2 + psraw xmm5, 1 ; partial shifted one more time for 2nd tap + psraw xmm1, 1 ; partial shifted one more time for 2nd tap + packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 + movdqa xmm2, [GLOBAL(t80)] + +%if %1 == 0 + movdqa xmm1, [rsp+_p1] ; p1 + lea rsi, [rsi + rcx*2] + lea rdi, [rdi + rcx*2] +%elif %1 == 1 + movdqa xmm1, [rsi+2*rax] ; p1 +%elif %1 == 2 + movdqa xmm1, [rsp+_p1] ; p1 +%endif + + pandn xmm4, xmm5 ; high edge variance additive + pxor xmm6, xmm2 ; unoffset + + pxor xmm1, xmm2 ; reoffset + psubsb xmm3, xmm0 ; q0-= q0 add + + paddsb xmm1, xmm4 ; p1+= p1 add + pxor xmm3, xmm2 ; unoffset + + pxor xmm1, xmm2 ; unoffset + psubsb xmm7, xmm4 ; q1-= q1 add + + pxor xmm7, xmm2 ; unoffset +%if %1 == 0 + movq [rsi], xmm6 ; p0 + movhps [rdi], xmm6 + movq [rsi + rax], xmm1 ; p1 + movhps [rdi + rax], xmm1 + movq [rsi + rcx], xmm3 ; q0 + movhps [rdi + rcx], xmm3 + movq [rsi + rcx*2], xmm7 ; q1 + movhps [rdi + rcx*2], xmm7 +%elif %1 == 1 + movdqa [rsi+rax], xmm6 ; write back + movdqa [rsi+2*rax], xmm1 ; write back + movdqa [rsi], xmm3 ; write back + movdqa [rdi], xmm7 ; write back +%endif + +%endmacro + +%if ABI_IS_32BIT + +;void vp8_loop_filter_horizontal_edge_sse2 +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +;) +global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE +sym(vp8_loop_filter_horizontal_edge_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, lf_var_size + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step + + mov rdx, arg(3) ;limit + + lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing + + ; calculate breakout conditions and high edge variance + LFH_FILTER_AND_HEV_MASK 1 + ; filter and write back the result + B_FILTER 1 + + add rsp, lf_var_size + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +%endif + +;void vp8_loop_filter_horizontal_edge_uv_sse2 +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +; int count +;) +global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE +sym(vp8_loop_filter_horizontal_edge_uv_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, lf_var_size + + mov rsi, arg(0) ; u + mov rdi, arg(5) ; v + movsxd rax, dword ptr arg(1) ; src_pixel_step + mov rcx, rax + neg rax ; negate pitch to deal with above border + + mov rdx, arg(3) ;limit + + lea rsi, [rsi + rcx] + lea rdi, [rdi + rcx] + + ; calculate breakout conditions and high edge variance + LFH_FILTER_AND_HEV_MASK 0 + ; filter and write back the result + B_FILTER 0 + + add rsp, lf_var_size + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +%macro MB_FILTER_AND_WRITEBACK 1 + movdqa xmm3, [GLOBAL(t80)] +%if %1 == 0 + movdqa xmm2, [rsp+_p1] ; p1 + movdqa xmm7, [rsp+_q1] ; q1 +%elif %1 == 1 + movdqa xmm2, [rsi+2*rax] ; p1 + movdqa xmm7, [rdi] ; q1 + + mov rcx, rax + neg rcx +%elif %1 == 2 + movdqa xmm2, [rsp+_p1] ; p1 + movdqa xmm6, [rsp+_p0] ; p0 + movdqa xmm0, [rsp+_q0] ; q0 + movdqa xmm7, [rsp+_q1] ; q1 +%endif + + pxor xmm2, xmm3 ; p1 offset to convert to signed values + pxor xmm7, xmm3 ; q1 offset to convert to signed values + pxor xmm6, xmm3 ; offset to convert to signed values + pxor xmm0, xmm3 ; offset to convert to signed values + + psubsb xmm2, xmm7 ; p1 - q1 + + movdqa xmm3, xmm0 ; q0 + psubsb xmm0, xmm6 ; q0 - p0 + paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1) + paddsb xmm2, xmm0 ; 2 * (q0 - p0) + paddsb xmm2, xmm0 ; 3 * (q0 - p0) + (p1 - q1) + pand xmm1, xmm2 ; mask filter values we don't care about + + movdqa xmm2, xmm1 ; vp8_filter + + pand xmm2, xmm4 ; Filter2 = vp8_filter & hev + pxor xmm0, xmm0 + + pandn xmm4, xmm1 ; vp8_filter&=~hev + pxor xmm1, xmm1 + + punpcklbw xmm0, xmm4 ; Filter 2 (hi) + punpckhbw xmm1, xmm4 ; Filter 2 (lo) + + movdqa xmm5, xmm2 + + movdqa xmm4, [GLOBAL(s9)] + paddsb xmm5, [GLOBAL(t3)] ; vp8_signed_char_clamp(Filter2 + 3) + paddsb xmm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) + + pmulhw xmm1, xmm4 ; Filter 2 (lo) * 9 + pmulhw xmm0, xmm4 ; Filter 2 (hi) * 9 + + punpckhbw xmm7, xmm5 ; axbxcxdx + punpcklbw xmm5, xmm5 ; exfxgxhx + + psraw xmm7, 11 ; sign extended shift right by 3 + + psraw xmm5, 11 ; sign extended shift right by 3 + punpckhbw xmm4, xmm2 ; axbxcxdx + + punpcklbw xmm2, xmm2 ; exfxgxhx + psraw xmm4, 11 ; sign extended shift right by 3 + + packsswb xmm5, xmm7 ; Filter2 >>=3; + psraw xmm2, 11 ; sign extended shift right by 3 + + packsswb xmm2, xmm4 ; Filter1 >>=3; + + paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2 + + psubsb xmm3, xmm2 ; qs0 =qs0 - Filter1 + movdqa xmm7, xmm1 + + movdqa xmm4, [GLOBAL(s63)] + movdqa xmm5, xmm0 + movdqa xmm2, xmm5 + paddw xmm0, xmm4 ; Filter 2 (hi) * 9 + 63 + paddw xmm1, xmm4 ; Filter 2 (lo) * 9 + 63 + movdqa xmm4, xmm7 + + paddw xmm5, xmm5 ; Filter 2 (hi) * 18 + + paddw xmm7, xmm7 ; Filter 2 (lo) * 18 + paddw xmm5, xmm0 ; Filter 2 (hi) * 27 + 63 + + paddw xmm7, xmm1 ; Filter 2 (lo) * 27 + 63 + paddw xmm2, xmm0 ; Filter 2 (hi) * 18 + 63 + psraw xmm0, 7 ; (Filter 2 (hi) * 9 + 63) >> 7 + + paddw xmm4, xmm1 ; Filter 2 (lo) * 18 + 63 + psraw xmm1, 7 ; (Filter 2 (lo) * 9 + 63) >> 7 + psraw xmm2, 7 ; (Filter 2 (hi) * 18 + 63) >> 7 + + packsswb xmm0, xmm1 ; u1 = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) + + psraw xmm4, 7 ; (Filter 2 (lo) * 18 + 63) >> 7 + psraw xmm5, 7 ; (Filter 2 (hi) * 27 + 63) >> 7 + psraw xmm7, 7 ; (Filter 2 (lo) * 27 + 63) >> 7 + + packsswb xmm5, xmm7 ; u3 = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) + packsswb xmm2, xmm4 ; u2 = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) + movdqa xmm7, [GLOBAL(t80)] + +%if %1 == 0 + movdqa xmm1, [rsp+_q1] ; q1 + movdqa xmm4, [rsp+_p1] ; p1 + lea rsi, [rsi+rcx*2] + lea rdi, [rdi+rcx*2] + +%elif %1 == 1 + movdqa xmm1, [rdi] ; q1 + movdqa xmm4, [rsi+rax*2] ; p1 +%elif %1 == 2 + movdqa xmm4, [rsp+_p1] ; p1 + movdqa xmm1, [rsp+_q1] ; q1 +%endif + + pxor xmm1, xmm7 + pxor xmm4, xmm7 + + psubsb xmm3, xmm5 ; sq = vp8_signed_char_clamp(qs0 - u3) + paddsb xmm6, xmm5 ; sp = vp8_signed_char_clamp(ps0 - u3) + psubsb xmm1, xmm2 ; sq = vp8_signed_char_clamp(qs1 - u2) + paddsb xmm4, xmm2 ; sp = vp8_signed_char_clamp(ps1 - u2) + +%if %1 == 1 + movdqa xmm2, [rdi+rax*4] ; p2 + movdqa xmm5, [rdi+rcx] ; q2 +%else + movdqa xmm2, [rsp+_p2] ; p2 + movdqa xmm5, [rsp+_q2] ; q2 +%endif + + pxor xmm1, xmm7 ; *oq1 = sq^0x80; + pxor xmm4, xmm7 ; *op1 = sp^0x80; + pxor xmm2, xmm7 + pxor xmm5, xmm7 + paddsb xmm2, xmm0 ; sp = vp8_signed_char_clamp(ps2 - u) + psubsb xmm5, xmm0 ; sq = vp8_signed_char_clamp(qs2 - u) + pxor xmm2, xmm7 ; *op2 = sp^0x80; + pxor xmm5, xmm7 ; *oq2 = sq^0x80; + pxor xmm3, xmm7 ; *oq0 = sq^0x80 + pxor xmm6, xmm7 ; *oq0 = sp^0x80 +%if %1 == 0 + movq [rsi], xmm6 ; p0 + movhps [rdi], xmm6 + movq [rsi + rcx], xmm3 ; q0 + movhps [rdi + rcx], xmm3 + lea rdx, [rcx + rcx*2] + movq [rsi+rcx*2], xmm1 ; q1 + movhps [rdi+rcx*2], xmm1 + + movq [rsi + rax], xmm4 ; p1 + movhps [rdi + rax], xmm4 + + movq [rsi+rax*2], xmm2 ; p2 + movhps [rdi+rax*2], xmm2 + + movq [rsi+rdx], xmm5 ; q2 + movhps [rdi+rdx], xmm5 +%elif %1 == 1 + movdqa [rdi+rcx], xmm5 ; q2 + movdqa [rdi], xmm1 ; q1 + movdqa [rsi], xmm3 ; q0 + movdqa [rsi+rax ], xmm6 ; p0 + movdqa [rsi+rax*2], xmm4 ; p1 + movdqa [rdi+rax*4], xmm2 ; p2 +%elif %1 == 2 + movdqa [rsp+_p1], xmm4 ; p1 + movdqa [rsp+_p0], xmm6 ; p0 + movdqa [rsp+_q0], xmm3 ; q0 + movdqa [rsp+_q1], xmm1 ; q1 +%endif + +%endmacro + + +;void vp8_mbloop_filter_horizontal_edge_sse2 +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +;) +global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE +sym(vp8_mbloop_filter_horizontal_edge_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, lf_var_size + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step + mov rdx, arg(3) ;limit + + lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing + + ; calculate breakout conditions and high edge variance + LFH_FILTER_AND_HEV_MASK 1 + ; filter and write back the results + MB_FILTER_AND_WRITEBACK 1 + + add rsp, lf_var_size + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_mbloop_filter_horizontal_edge_uv_sse2 +;( +; unsigned char *u, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +; unsigned char *v +;) +global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE +sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, lf_var_size + + mov rsi, arg(0) ; u + mov rdi, arg(5) ; v + movsxd rax, dword ptr arg(1) ; src_pixel_step + mov rcx, rax + neg rax ; negate pitch to deal with above border + mov rdx, arg(3) ;limit + + lea rsi, [rsi + rcx] + lea rdi, [rdi + rcx] + + ; calculate breakout conditions and high edge variance + LFH_FILTER_AND_HEV_MASK 0 + ; filter and write back the results + MB_FILTER_AND_WRITEBACK 0 + + add rsp, lf_var_size + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +%macro TRANSPOSE_16X8 2 + movq xmm4, [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00 + movq xmm1, [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10 + movq xmm0, [rsi+2*rax] ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20 + movq xmm7, [rdi+2*rax] ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30 + movq xmm5, [rsi+4*rax] ; xx xx xx xx xx xx xx xx 47 46 45 44 43 42 41 40 + movq xmm2, [rdi+4*rax] ; xx xx xx xx xx xx xx xx 57 56 55 54 53 52 51 50 + + punpcklbw xmm4, xmm1 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 + + movq xmm1, [rdi+2*rcx] ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70 + + movdqa xmm3, xmm4 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 + punpcklbw xmm0, xmm7 ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20 + + movq xmm7, [rsi+2*rcx] ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60 + + punpcklbw xmm5, xmm2 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 +%if %1 + lea rsi, [rsi+rax*8] + lea rdi, [rdi+rax*8] +%else + mov rsi, arg(5) ; v_ptr +%endif + + movdqa xmm6, xmm5 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 + punpcklbw xmm7, xmm1 ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60 + punpcklwd xmm5, xmm7 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 + punpckhwd xmm6, xmm7 ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44 + punpcklwd xmm3, xmm0 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 + +%if %1 == 0 + lea rdi, [rsi + rax - 4] ; rdi points to row +1 for indirect addressing + lea rsi, [rsi - 4] +%endif + + movdqa xmm2, xmm3 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 + punpckhwd xmm4, xmm0 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 + + movdqa xmm7, xmm4 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 + punpckhdq xmm3, xmm5 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 + + punpckhdq xmm7, xmm6 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 + + punpckldq xmm4, xmm6 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 + + punpckldq xmm2, xmm5 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 + + movdqa [rsp+_t0], xmm2 ; save to free XMM2 + + movq xmm2, [rsi] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80 + movq xmm6, [rdi] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90 + movq xmm0, [rsi+2*rax] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0 + movq xmm5, [rdi+2*rax] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0 + movq xmm1, [rsi+4*rax] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0 + + punpcklbw xmm2, xmm6 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 + + movq xmm6, [rdi+4*rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0 + + punpcklbw xmm0, xmm5 ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0 + + movq xmm5, [rsi+2*rcx] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0 + + punpcklbw xmm1, xmm6 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0 + + movq xmm6, [rdi+2*rcx] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0 + + punpcklbw xmm5, xmm6 ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0 + + movdqa xmm6, xmm1 ; + punpckhwd xmm6, xmm5 ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4 + + punpcklwd xmm1, xmm5 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 + movdqa xmm5, xmm2 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 + + punpcklwd xmm5, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 + + punpckhwd xmm2, xmm0 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 + + movdqa xmm0, xmm5 + punpckldq xmm0, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 + + punpckhdq xmm5, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 + movdqa xmm1, xmm2 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 + + punpckldq xmm1, xmm6 ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84 + + punpckhdq xmm2, xmm6 ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86 + movdqa xmm6, xmm7 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 + + punpcklqdq xmm6, xmm2 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 + + punpckhqdq xmm7, xmm2 ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07 + +%if %2 == 0 + movdqa [rsp+_q3], xmm7 ; save 7 + movdqa [rsp+_q2], xmm6 ; save 6 +%endif + movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 + punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 + punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + movdqa [rsp+_p1], xmm2 ; save 2 + + movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 + punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 + movdqa [rsp+_p0], xmm3 ; save 3 + + punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 + + movdqa [rsp+_q0], xmm4 ; save 4 + movdqa [rsp+_q1], xmm5 ; save 5 + movdqa xmm1, [rsp+_t0] + + movdqa xmm2, xmm1 ; + punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 + punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 + +%if %2 == 0 + movdqa [rsp+_p2], xmm1 + movdqa [rsp+_p3], xmm2 +%endif + +%endmacro + +%macro LFV_FILTER_MASK_HEV_MASK 0 + movdqa xmm0, xmm6 ; q2 + psubusb xmm0, xmm7 ; q2-q3 + + psubusb xmm7, xmm6 ; q3-q2 + movdqa xmm4, xmm5 ; q1 + + por xmm7, xmm0 ; abs (q3-q2) + psubusb xmm4, xmm6 ; q1-q2 + + movdqa xmm0, xmm1 + psubusb xmm6, xmm5 ; q2-q1 + + por xmm6, xmm4 ; abs (q2-q1) + psubusb xmm0, xmm2 ; p2 - p3; + + psubusb xmm2, xmm1 ; p3 - p2; + por xmm0, xmm2 ; abs(p2-p3) + + movdqa xmm5, [rsp+_p1] ; p1 + pmaxub xmm0, xmm7 + + movdqa xmm2, xmm5 ; p1 + psubusb xmm5, xmm1 ; p1-p2 + psubusb xmm1, xmm2 ; p2-p1 + + movdqa xmm7, xmm3 ; p0 + psubusb xmm7, xmm2 ; p0-p1 + + por xmm1, xmm5 ; abs(p2-p1) + pmaxub xmm0, xmm6 + + pmaxub xmm0, xmm1 + movdqa xmm1, xmm2 ; p1 + + psubusb xmm2, xmm3 ; p1-p0 + + por xmm2, xmm7 ; abs(p1-p0) + + pmaxub xmm0, xmm2 + + movdqa xmm5, [rsp+_q0] ; q0 + movdqa xmm7, [rsp+_q1] ; q1 + + mov rdx, arg(3) ; limit + + movdqa xmm6, xmm5 ; q0 + movdqa xmm4, xmm7 ; q1 + + psubusb xmm5, xmm7 ; q0-q1 + psubusb xmm7, xmm6 ; q1-q0 + + por xmm7, xmm5 ; abs(q1-q0) + + pmaxub xmm0, xmm7 + + psubusb xmm0, [rdx] ; limit + + mov rdx, arg(2) ; blimit + movdqa xmm5, xmm4 ; q1 + + psubusb xmm5, xmm1 ; q1-=p1 + psubusb xmm1, xmm4 ; p1-=q1 + + por xmm5, xmm1 ; abs(p1-q1) + movdqa xmm1, xmm3 ; p0 + + pand xmm5, [GLOBAL(tfe)] ; set lsb of each byte to zero + psubusb xmm1, xmm6 ; p0-q0 + + movdqa xmm4, [rdx] ; blimit + mov rdx, arg(4) ; get thresh + + psrlw xmm5, 1 ; abs(p1-q1)/2 + psubusb xmm6, xmm3 ; q0-p0 + + por xmm1, xmm6 ; abs(q0-p0) + paddusb xmm1, xmm1 ; abs(q0-p0)*2 + movdqa xmm3, [rdx] + + paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + psubusb xmm2, xmm3 ; abs(q1 - q0) > thresh + + psubusb xmm7, xmm3 ; abs(p1 - p0)> thresh + + psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit + por xmm2, xmm7 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh + + por xmm1, xmm0 ; mask + pcmpeqb xmm2, xmm0 + + pxor xmm0, xmm0 + pcmpeqb xmm4, xmm4 + + pcmpeqb xmm1, xmm0 + pxor xmm4, xmm2 +%endmacro + +%macro BV_TRANSPOSE 0 + ; xmm1 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + ; xmm6 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 + ; xmm3 = f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 + ; xmm7 = f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 + movdqa xmm2, xmm1 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + punpcklbw xmm2, xmm6 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 + + movdqa xmm4, xmm3 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 + punpckhbw xmm1, xmm6 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 + + punpcklbw xmm4, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 + + punpckhbw xmm3, xmm7 ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 + + movdqa xmm6, xmm2 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 + punpcklwd xmm2, xmm4 ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 + + punpckhwd xmm6, xmm4 ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 + movdqa xmm5, xmm1 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 + + punpcklwd xmm1, xmm3 ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 + + punpckhwd xmm5, xmm3 ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2 + ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 + ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 + ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 + ; xmm5 = f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2 +%endmacro + +%macro BV_WRITEBACK 2 + movd [rsi+2], %1 + movd [rsi+4*rax+2], %2 + psrldq %1, 4 + psrldq %2, 4 + movd [rdi+2], %1 + movd [rdi+4*rax+2], %2 + psrldq %1, 4 + psrldq %2, 4 + movd [rsi+2*rax+2], %1 + movd [rsi+2*rcx+2], %2 + psrldq %1, 4 + psrldq %2, 4 + movd [rdi+2*rax+2], %1 + movd [rdi+2*rcx+2], %2 +%endmacro + +%if ABI_IS_32BIT + +;void vp8_loop_filter_vertical_edge_sse2 +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +;) +global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE +sym(vp8_loop_filter_vertical_edge_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, lf_var_size + + mov rsi, arg(0) ; src_ptr + movsxd rax, dword ptr arg(1) ; src_pixel_step + + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + lea rcx, [rax*2+rax] + + ;transpose 16x8 to 8x16, and store the 8-line result on stack. + TRANSPOSE_16X8 1, 1 + + ; calculate filter mask and high edge variance + LFV_FILTER_MASK_HEV_MASK + + ; start work on filters + B_FILTER 2 + + ; transpose and write back - only work on q1, q0, p0, p1 + BV_TRANSPOSE + ; store 16-line result + + lea rdx, [rax] + neg rdx + + BV_WRITEBACK xmm1, xmm5 + + lea rsi, [rsi+rdx*8] + lea rdi, [rdi+rdx*8] + BV_WRITEBACK xmm2, xmm6 + + add rsp, lf_var_size + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +%endif + +;void vp8_loop_filter_vertical_edge_uv_sse2 +;( +; unsigned char *u, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +; unsigned char *v +;) +global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE +sym(vp8_loop_filter_vertical_edge_uv_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, lf_var_size + + mov rsi, arg(0) ; u_ptr + movsxd rax, dword ptr arg(1) ; src_pixel_step + + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + lea rcx, [rax+2*rax] + + ;transpose 16x8 to 8x16, and store the 8-line result on stack. + TRANSPOSE_16X8 0, 1 + + ; calculate filter mask and high edge variance + LFV_FILTER_MASK_HEV_MASK + + ; start work on filters + B_FILTER 2 + + ; transpose and write back - only work on q1, q0, p0, p1 + BV_TRANSPOSE + + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + + ; store 16-line result + BV_WRITEBACK xmm1, xmm5 + + mov rsi, arg(0) ; u_ptr + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + BV_WRITEBACK xmm2, xmm6 + + add rsp, lf_var_size + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +%macro MBV_TRANSPOSE 0 + movdqa xmm0, [rsp+_p3] ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 + movdqa xmm1, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 + + punpcklbw xmm0, xmm2 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 + punpckhbw xmm1, xmm2 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 + + movdqa xmm7, [rsp+_p1] ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + movdqa xmm6, xmm7 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + + punpcklbw xmm7, [rsp+_p0] ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 + punpckhbw xmm6, [rsp+_p0] ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 + + movdqa xmm3, xmm0 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 + punpcklwd xmm0, xmm7 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 + + punpckhwd xmm3, xmm7 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 + movdqa xmm4, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 + + punpcklwd xmm1, xmm6 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 + punpckhwd xmm4, xmm6 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 + + movdqa xmm7, [rsp+_q0] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 + punpcklbw xmm7, [rsp+_q1] ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 + + movdqa xmm6, xmm5 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 + punpcklbw xmm6, [rsp+_q3] ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06 + + movdqa xmm2, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 + punpcklwd xmm7, xmm6 ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04 + + punpckhwd xmm2, xmm6 ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44 + movdqa xmm6, xmm0 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 + + punpckldq xmm0, xmm7 ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00 + punpckhdq xmm6, xmm7 ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20 +%endmacro + +%macro MBV_WRITEBACK_1 0 + movq [rsi], xmm0 + movhps [rdi], xmm0 + + movq [rsi+2*rax], xmm6 + movhps [rdi+2*rax], xmm6 + + movdqa xmm0, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 + punpckldq xmm0, xmm2 ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40 + punpckhdq xmm3, xmm2 ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60 + + movq [rsi+4*rax], xmm0 + movhps [rdi+4*rax], xmm0 + + movq [rsi+2*rcx], xmm3 + movhps [rdi+2*rcx], xmm3 + + movdqa xmm7, [rsp+_q0] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 + punpckhbw xmm7, [rsp+_q1] ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 + punpckhbw xmm5, [rsp+_q3] ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86 + + movdqa xmm0, xmm7 + punpcklwd xmm0, xmm5 ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84 + punpckhwd xmm7, xmm5 ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4 + + movdqa xmm5, xmm1 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 + punpckldq xmm1, xmm0 ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80 + punpckhdq xmm5, xmm0 ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0 +%endmacro + +%macro MBV_WRITEBACK_2 0 + movq [rsi], xmm1 + movhps [rdi], xmm1 + + movq [rsi+2*rax], xmm5 + movhps [rdi+2*rax], xmm5 + + movdqa xmm1, xmm4 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 + punpckldq xmm1, xmm7 ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0 + punpckhdq xmm4, xmm7 ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0 + + movq [rsi+4*rax], xmm1 + movhps [rdi+4*rax], xmm1 + + movq [rsi+2*rcx], xmm4 + movhps [rdi+2*rcx], xmm4 +%endmacro + + +;void vp8_mbloop_filter_vertical_edge_sse2 +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +;) +global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE +sym(vp8_mbloop_filter_vertical_edge_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, lf_var_size + + mov rsi, arg(0) ; src_ptr + movsxd rax, dword ptr arg(1) ; src_pixel_step + + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + lea rcx, [rax*2+rax] + + ; Transpose + TRANSPOSE_16X8 1, 0 + + ; calculate filter mask and high edge variance + LFV_FILTER_MASK_HEV_MASK + + neg rax + ; start work on filters + MB_FILTER_AND_WRITEBACK 2 + + lea rsi, [rsi+rax*8] + lea rdi, [rdi+rax*8] + + ; transpose and write back + MBV_TRANSPOSE + + neg rax + + MBV_WRITEBACK_1 + + + lea rsi, [rsi+rax*8] + lea rdi, [rdi+rax*8] + MBV_WRITEBACK_2 + + add rsp, lf_var_size + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_mbloop_filter_vertical_edge_uv_sse2 +;( +; unsigned char *u, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +; unsigned char *v +;) +global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE +sym(vp8_mbloop_filter_vertical_edge_uv_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, lf_var_size + + mov rsi, arg(0) ; u_ptr + movsxd rax, dword ptr arg(1) ; src_pixel_step + + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + lea rcx, [rax+2*rax] + + ; Transpose + TRANSPOSE_16X8 0, 0 + + ; calculate filter mask and high edge variance + LFV_FILTER_MASK_HEV_MASK + + ; start work on filters + MB_FILTER_AND_WRITEBACK 2 + + ; transpose and write back + MBV_TRANSPOSE + + mov rsi, arg(0) ;u_ptr + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] + MBV_WRITEBACK_1 + mov rsi, arg(5) ;v_ptr + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] + MBV_WRITEBACK_2 + + add rsp, lf_var_size + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_loop_filter_simple_horizontal_edge_sse2 +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +;) +global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE +sym(vp8_loop_filter_simple_horizontal_edge_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 3 + SAVE_XMM 7 + GET_GOT rbx + ; end prolog + + mov rcx, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? + movdqa xmm6, [GLOBAL(tfe)] + lea rdx, [rcx + rax] + neg rax + + ; calculate mask + movdqa xmm0, [rdx] ; q1 + mov rdx, arg(2) ;blimit + movdqa xmm1, [rcx+2*rax] ; p1 + + movdqa xmm2, xmm1 + movdqa xmm3, xmm0 + + psubusb xmm0, xmm1 ; q1-=p1 + psubusb xmm1, xmm3 ; p1-=q1 + por xmm1, xmm0 ; abs(p1-q1) + pand xmm1, xmm6 ; set lsb of each byte to zero + psrlw xmm1, 1 ; abs(p1-q1)/2 + + movdqa xmm7, XMMWORD PTR [rdx] + + movdqa xmm5, [rcx+rax] ; p0 + movdqa xmm4, [rcx] ; q0 + movdqa xmm0, xmm4 ; q0 + movdqa xmm6, xmm5 ; p0 + psubusb xmm5, xmm4 ; p0-=q0 + psubusb xmm4, xmm6 ; q0-=p0 + por xmm5, xmm4 ; abs(p0 - q0) + + movdqa xmm4, [GLOBAL(t80)] + + paddusb xmm5, xmm5 ; abs(p0-q0)*2 + paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit + pxor xmm7, xmm7 + pcmpeqb xmm5, xmm7 + + + ; start work on filters + pxor xmm2, xmm4 ; p1 offset to convert to signed values + pxor xmm3, xmm4 ; q1 offset to convert to signed values + psubsb xmm2, xmm3 ; p1 - q1 + + pxor xmm6, xmm4 ; offset to convert to signed values + pxor xmm0, xmm4 ; offset to convert to signed values + movdqa xmm3, xmm0 ; q0 + psubsb xmm0, xmm6 ; q0 - p0 + paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0) + paddsb xmm2, xmm0 ; p1 - q1 + 2 * (q0 - p0) + paddsb xmm2, xmm0 ; p1 - q1 + 3 * (q0 - p0) + pand xmm5, xmm2 ; mask filter values we don't care about + + movdqa xmm0, xmm5 + paddsb xmm5, [GLOBAL(t3)] ; 3* (q0 - p0) + (p1 - q1) + 4 + paddsb xmm0, [GLOBAL(t4)] ; +3 instead of +4 + + movdqa xmm1, [GLOBAL(te0)] + movdqa xmm2, [GLOBAL(t1f)] + +; pxor xmm7, xmm7 + pcmpgtb xmm7, xmm0 ;save sign + pand xmm7, xmm1 ;preserve the upper 3 bits + psrlw xmm0, 3 + pand xmm0, xmm2 ;clear out upper 3 bits + por xmm0, xmm7 ;add sign + psubsb xmm3, xmm0 ; q0-= q0sz add + + pxor xmm7, xmm7 + pcmpgtb xmm7, xmm5 ;save sign + pand xmm7, xmm1 ;preserve the upper 3 bits + psrlw xmm5, 3 + pand xmm5, xmm2 ;clear out upper 3 bits + por xmm5, xmm7 ;add sign + paddsb xmm6, xmm5 ; p0+= p0 add + + pxor xmm3, xmm4 ; unoffset + movdqa [rcx], xmm3 ; write back + + pxor xmm6, xmm4 ; unoffset + movdqa [rcx+rax], xmm6 ; write back + + ; begin epilog + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_loop_filter_simple_vertical_edge_sse2 +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +;) +global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE +sym(vp8_loop_filter_simple_vertical_edge_sse2): + push rbp ; save old base pointer value. + mov rbp, rsp ; set new base pointer value. + SHADOW_ARGS_TO_STACK 3 + SAVE_XMM 7 + GET_GOT rbx ; save callee-saved reg + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 32 ; reserve 32 bytes + %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; + %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? + + lea rsi, [rsi - 2 ] + lea rdi, [rsi + rax] + lea rdx, [rsi + rax*4] + lea rcx, [rdx + rax] + + movd xmm0, [rsi] ; (high 96 bits unused) 03 02 01 00 + movd xmm1, [rdx] ; (high 96 bits unused) 43 42 41 40 + movd xmm2, [rdi] ; 13 12 11 10 + movd xmm3, [rcx] ; 53 52 51 50 + punpckldq xmm0, xmm1 ; (high 64 bits unused) 43 42 41 40 03 02 01 00 + punpckldq xmm2, xmm3 ; 53 52 51 50 13 12 11 10 + + movd xmm4, [rsi + rax*2] ; 23 22 21 20 + movd xmm5, [rdx + rax*2] ; 63 62 61 60 + movd xmm6, [rdi + rax*2] ; 33 32 31 30 + movd xmm7, [rcx + rax*2] ; 73 72 71 70 + punpckldq xmm4, xmm5 ; 63 62 61 60 23 22 21 20 + punpckldq xmm6, xmm7 ; 73 72 71 70 33 32 31 30 + + punpcklbw xmm0, xmm2 ; 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00 + punpcklbw xmm4, xmm6 ; 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20 + + movdqa xmm1, xmm0 + punpcklwd xmm0, xmm4 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 + punpckhwd xmm1, xmm4 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 + + movdqa xmm2, xmm0 + punpckldq xmm0, xmm1 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 + punpckhdq xmm2, xmm1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 + + lea rsi, [rsi + rax*8] + lea rdi, [rsi + rax] + lea rdx, [rsi + rax*4] + lea rcx, [rdx + rax] + + movd xmm4, [rsi] ; 83 82 81 80 + movd xmm1, [rdx] ; c3 c2 c1 c0 + movd xmm6, [rdi] ; 93 92 91 90 + movd xmm3, [rcx] ; d3 d2 d1 d0 + punpckldq xmm4, xmm1 ; c3 c2 c1 c0 83 82 81 80 + punpckldq xmm6, xmm3 ; d3 d2 d1 d0 93 92 91 90 + + movd xmm1, [rsi + rax*2] ; a3 a2 a1 a0 + movd xmm5, [rdx + rax*2] ; e3 e2 e1 e0 + movd xmm3, [rdi + rax*2] ; b3 b2 b1 b0 + movd xmm7, [rcx + rax*2] ; f3 f2 f1 f0 + punpckldq xmm1, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0 + punpckldq xmm3, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0 + + punpcklbw xmm4, xmm6 ; d3 c3 d2 c2 d1 c1 d0 c0 93 83 92 82 91 81 90 80 + punpcklbw xmm1, xmm3 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0 + + movdqa xmm7, xmm4 + punpcklwd xmm4, xmm1 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 + punpckhwd xmm7, xmm1 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 + + movdqa xmm6, xmm4 + punpckldq xmm4, xmm7 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 + punpckhdq xmm6, xmm7 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 + + movdqa xmm1, xmm0 + movdqa xmm3, xmm2 + + punpcklqdq xmm0, xmm4 ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 + punpckhqdq xmm1, xmm4 ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 + punpcklqdq xmm2, xmm6 ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + punpckhqdq xmm3, xmm6 ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 + + mov rdx, arg(2) ;blimit + + ; calculate mask + movdqa xmm6, xmm0 ; p1 + movdqa xmm7, xmm3 ; q1 + psubusb xmm7, xmm0 ; q1-=p1 + psubusb xmm6, xmm3 ; p1-=q1 + por xmm6, xmm7 ; abs(p1-q1) + pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero + psrlw xmm6, 1 ; abs(p1-q1)/2 + + movdqa xmm7, [rdx] + + movdqa xmm5, xmm1 ; p0 + movdqa xmm4, xmm2 ; q0 + psubusb xmm5, xmm2 ; p0-=q0 + psubusb xmm4, xmm1 ; q0-=p0 + por xmm5, xmm4 ; abs(p0 - q0) + paddusb xmm5, xmm5 ; abs(p0-q0)*2 + paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + + movdqa xmm4, [GLOBAL(t80)] + + psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit + pxor xmm7, xmm7 + pcmpeqb xmm5, xmm7 ; mm5 = mask + + ; start work on filters + movdqa t0, xmm0 + movdqa t1, xmm3 + + pxor xmm0, xmm4 ; p1 offset to convert to signed values + pxor xmm3, xmm4 ; q1 offset to convert to signed values + psubsb xmm0, xmm3 ; p1 - q1 + + pxor xmm1, xmm4 ; offset to convert to signed values + pxor xmm2, xmm4 ; offset to convert to signed values + + movdqa xmm3, xmm2 ; offseted ; q0 + psubsb xmm2, xmm1 ; q0 - p0 + paddsb xmm0, xmm2 ; p1 - q1 + 1 * (q0 - p0) + paddsb xmm0, xmm2 ; p1 - q1 + 2 * (q0 - p0) + paddsb xmm0, xmm2 ; p1 - q1 + 3 * (q0 - p0) + pand xmm5, xmm0 ; mask filter values we don't care about + + movdqa xmm0, xmm5 + paddsb xmm5, [GLOBAL(t3)] ; 3* (q0 - p0) + (p1 - q1) + 4 + paddsb xmm0, [GLOBAL(t4)] ; +3 instead of +4 + + movdqa xmm6, [GLOBAL(te0)] + movdqa xmm2, [GLOBAL(t1f)] + +; pxor xmm7, xmm7 + pcmpgtb xmm7, xmm0 ;save sign + pand xmm7, xmm6 ;preserve the upper 3 bits + psrlw xmm0, 3 + pand xmm0, xmm2 ;clear out upper 3 bits + por xmm0, xmm7 ;add sign + psubsb xmm3, xmm0 ; q0-= q0sz add + + pxor xmm7, xmm7 + pcmpgtb xmm7, xmm5 ;save sign + pand xmm7, xmm6 ;preserve the upper 3 bits + psrlw xmm5, 3 + pand xmm5, xmm2 ;clear out upper 3 bits + por xmm5, xmm7 ;add sign + paddsb xmm1, xmm5 ; p0+= p0 add + + pxor xmm3, xmm4 ; unoffset q0 + pxor xmm1, xmm4 ; unoffset p0 + + movdqa xmm0, t0 ; p1 + movdqa xmm4, t1 ; q1 + + ; write out order: xmm0 xmm2 xmm1 xmm3 + lea rdx, [rsi + rax*4] + + ; transpose back to write out + ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 + ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 + ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 + movdqa xmm6, xmm0 + punpcklbw xmm0, xmm1 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 + punpckhbw xmm6, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 + + movdqa xmm5, xmm3 + punpcklbw xmm3, xmm4 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 + punpckhbw xmm5, xmm4 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 + + movdqa xmm2, xmm0 + punpcklwd xmm0, xmm3 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 + punpckhwd xmm2, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 + + movdqa xmm3, xmm6 + punpcklwd xmm6, xmm5 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 + punpckhwd xmm3, xmm5 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 + + movd [rsi], xmm6 ; write the second 8-line result + movd [rdx], xmm3 + psrldq xmm6, 4 + psrldq xmm3, 4 + movd [rdi], xmm6 + movd [rcx], xmm3 + psrldq xmm6, 4 + psrldq xmm3, 4 + movd [rsi + rax*2], xmm6 + movd [rdx + rax*2], xmm3 + psrldq xmm6, 4 + psrldq xmm3, 4 + movd [rdi + rax*2], xmm6 + movd [rcx + rax*2], xmm3 + + neg rax + lea rsi, [rsi + rax*8] + neg rax + lea rdi, [rsi + rax] + lea rdx, [rsi + rax*4] + lea rcx, [rdx + rax] + + movd [rsi], xmm0 ; write the first 8-line result + movd [rdx], xmm2 + psrldq xmm0, 4 + psrldq xmm2, 4 + movd [rdi], xmm0 + movd [rcx], xmm2 + psrldq xmm0, 4 + psrldq xmm2, 4 + movd [rsi + rax*2], xmm0 + movd [rdx + rax*2], xmm2 + psrldq xmm0, 4 + psrldq xmm2, 4 + movd [rdi + rax*2], xmm0 + movd [rcx + rax*2], xmm2 + + add rsp, 32 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +tfe: + times 16 db 0xfe +align 16 +t80: + times 16 db 0x80 +align 16 +t1s: + times 16 db 0x01 +align 16 +t3: + times 16 db 0x03 +align 16 +t4: + times 16 db 0x04 +align 16 +ones: + times 8 dw 0x0001 +align 16 +s9: + times 8 dw 0x0900 +align 16 +s63: + times 8 dw 0x003f +align 16 +te0: + times 16 db 0xe0 +align 16 +t1f: + times 16 db 0x1f diff --git a/thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c b/thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c new file mode 100644 index 00000000000..65860046003 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "vp8/common/loopfilter.h" + +#define prototype_loopfilter(sym) \ + void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ + const unsigned char *limit, const unsigned char *thresh, int count) + +#define prototype_loopfilter_nc(sym) \ + void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ + const unsigned char *limit, const unsigned char *thresh) + +#define prototype_simple_loopfilter(sym) \ + void sym(unsigned char *y, int ystride, const unsigned char *blimit) + +prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx); +prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx); +prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx); +prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx); +prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx); +prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx); + +#if HAVE_SSE2 && ARCH_X86_64 +prototype_loopfilter(vp8_loop_filter_bv_y_sse2); +prototype_loopfilter(vp8_loop_filter_bh_y_sse2); +#else +prototype_loopfilter_nc(vp8_loop_filter_vertical_edge_sse2); +prototype_loopfilter_nc(vp8_loop_filter_horizontal_edge_sse2); +#endif +prototype_loopfilter_nc(vp8_mbloop_filter_vertical_edge_sse2); +prototype_loopfilter_nc(vp8_mbloop_filter_horizontal_edge_sse2); + +extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2; +extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2; +extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2; +extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2; + +#if HAVE_MMX +/* Horizontal MB filtering */ +void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); +} + + +/* Vertical MB Filtering */ +void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); +} + + +/* Horizontal B Filtering */ +void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); +} + + +void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) +{ + vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit); +} + + +/* Vertical B Filtering */ +void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); +} + + +void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) +{ + vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit); +} +#endif + + +/* Horizontal MB filtering */ +#if HAVE_SSE2 +void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr); + + if (u_ptr) + vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); +} + + +/* Vertical MB Filtering */ +void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ + vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr); + + if (u_ptr) + vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); +} + + +/* Horizontal B Filtering */ +void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ +#if ARCH_X86_64 + vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); +#else + vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); + vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); + vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); +#endif + + if (u_ptr) + vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride); +} + + +void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) +{ + vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit); +} + + +/* Vertical B Filtering */ +void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, + int y_stride, int uv_stride, loop_filter_info *lfi) +{ +#if ARCH_X86_64 + vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); +#else + vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); + vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); + vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); +#endif + + if (u_ptr) + vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4); +} + + +void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) +{ + vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit); +} + +#endif diff --git a/thirdparty/libvpx/vp8/common/x86/recon_mmx.asm b/thirdparty/libvpx/vp8/common/x86/recon_mmx.asm new file mode 100644 index 00000000000..15e98713c7f --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/recon_mmx.asm @@ -0,0 +1,274 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + + +;void copy_mem8x8_mmx( +; unsigned char *src, +; int src_stride, +; unsigned char *dst, +; int dst_stride +; ) +global sym(vp8_copy_mem8x8_mmx) PRIVATE +sym(vp8_copy_mem8x8_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src; + movq mm0, [rsi] + + movsxd rax, dword ptr arg(1) ;src_stride; + mov rdi, arg(2) ;dst; + + movq mm1, [rsi+rax] + movq mm2, [rsi+rax*2] + + movsxd rcx, dword ptr arg(3) ;dst_stride + lea rsi, [rsi+rax*2] + + movq [rdi], mm0 + add rsi, rax + + movq [rdi+rcx], mm1 + movq [rdi+rcx*2], mm2 + + + lea rdi, [rdi+rcx*2] + movq mm3, [rsi] + + add rdi, rcx + movq mm4, [rsi+rax] + + movq mm5, [rsi+rax*2] + movq [rdi], mm3 + + lea rsi, [rsi+rax*2] + movq [rdi+rcx], mm4 + + movq [rdi+rcx*2], mm5 + lea rdi, [rdi+rcx*2] + + movq mm0, [rsi+rax] + movq mm1, [rsi+rax*2] + + movq [rdi+rcx], mm0 + movq [rdi+rcx*2],mm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void copy_mem8x4_mmx( +; unsigned char *src, +; int src_stride, +; unsigned char *dst, +; int dst_stride +; ) +global sym(vp8_copy_mem8x4_mmx) PRIVATE +sym(vp8_copy_mem8x4_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src; + movq mm0, [rsi] + + movsxd rax, dword ptr arg(1) ;src_stride; + mov rdi, arg(2) ;dst; + + movq mm1, [rsi+rax] + movq mm2, [rsi+rax*2] + + movsxd rcx, dword ptr arg(3) ;dst_stride + lea rsi, [rsi+rax*2] + + movq [rdi], mm0 + movq [rdi+rcx], mm1 + + movq [rdi+rcx*2], mm2 + lea rdi, [rdi+rcx*2] + + movq mm3, [rsi+rax] + movq [rdi+rcx], mm3 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void copy_mem16x16_mmx( +; unsigned char *src, +; int src_stride, +; unsigned char *dst, +; int dst_stride +; ) +global sym(vp8_copy_mem16x16_mmx) PRIVATE +sym(vp8_copy_mem16x16_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src; + movsxd rax, dword ptr arg(1) ;src_stride; + + mov rdi, arg(2) ;dst; + movsxd rcx, dword ptr arg(3) ;dst_stride + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq mm1, [rsi+rax] + movq mm4, [rsi+rax+8] + + movq mm2, [rsi+rax*2] + movq mm5, [rsi+rax*2+8] + + lea rsi, [rsi+rax*2] + add rsi, rax + + movq [rdi], mm0 + movq [rdi+8], mm3 + + movq [rdi+rcx], mm1 + movq [rdi+rcx+8], mm4 + + movq [rdi+rcx*2], mm2 + movq [rdi+rcx*2+8], mm5 + + lea rdi, [rdi+rcx*2] + add rdi, rcx + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq mm1, [rsi+rax] + movq mm4, [rsi+rax+8] + + movq mm2, [rsi+rax*2] + movq mm5, [rsi+rax*2+8] + + lea rsi, [rsi+rax*2] + add rsi, rax + + movq [rdi], mm0 + movq [rdi+8], mm3 + + movq [rdi+rcx], mm1 + movq [rdi+rcx+8], mm4 + + movq [rdi+rcx*2], mm2 + movq [rdi+rcx*2+8], mm5 + + lea rdi, [rdi+rcx*2] + add rdi, rcx + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq mm1, [rsi+rax] + movq mm4, [rsi+rax+8] + + movq mm2, [rsi+rax*2] + movq mm5, [rsi+rax*2+8] + + lea rsi, [rsi+rax*2] + add rsi, rax + + movq [rdi], mm0 + movq [rdi+8], mm3 + + movq [rdi+rcx], mm1 + movq [rdi+rcx+8], mm4 + + movq [rdi+rcx*2], mm2 + movq [rdi+rcx*2+8], mm5 + + lea rdi, [rdi+rcx*2] + add rdi, rcx + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq mm1, [rsi+rax] + movq mm4, [rsi+rax+8] + + movq mm2, [rsi+rax*2] + movq mm5, [rsi+rax*2+8] + + lea rsi, [rsi+rax*2] + add rsi, rax + + movq [rdi], mm0 + movq [rdi+8], mm3 + + movq [rdi+rcx], mm1 + movq [rdi+rcx+8], mm4 + + movq [rdi+rcx*2], mm2 + movq [rdi+rcx*2+8], mm5 + + lea rdi, [rdi+rcx*2] + add rdi, rcx + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq mm1, [rsi+rax] + movq mm4, [rsi+rax+8] + + movq mm2, [rsi+rax*2] + movq mm5, [rsi+rax*2+8] + + lea rsi, [rsi+rax*2] + add rsi, rax + + movq [rdi], mm0 + movq [rdi+8], mm3 + + movq [rdi+rcx], mm1 + movq [rdi+rcx+8], mm4 + + movq [rdi+rcx*2], mm2 + movq [rdi+rcx*2+8], mm5 + + lea rdi, [rdi+rcx*2] + add rdi, rcx + + movq mm0, [rsi] + movq mm3, [rsi+8]; + + movq [rdi], mm0 + movq [rdi+8], mm3 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret diff --git a/thirdparty/libvpx/vp8/common/x86/recon_sse2.asm b/thirdparty/libvpx/vp8/common/x86/recon_sse2.asm new file mode 100644 index 00000000000..cb89537f761 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/recon_sse2.asm @@ -0,0 +1,116 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void copy_mem16x16_sse2( +; unsigned char *src, +; int src_stride, +; unsigned char *dst, +; int dst_stride +; ) +global sym(vp8_copy_mem16x16_sse2) PRIVATE +sym(vp8_copy_mem16x16_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src; + movdqu xmm0, [rsi] + + movsxd rax, dword ptr arg(1) ;src_stride; + mov rdi, arg(2) ;dst; + + movdqu xmm1, [rsi+rax] + movdqu xmm2, [rsi+rax*2] + + movsxd rcx, dword ptr arg(3) ;dst_stride + lea rsi, [rsi+rax*2] + + movdqa [rdi], xmm0 + add rsi, rax + + movdqa [rdi+rcx], xmm1 + movdqa [rdi+rcx*2],xmm2 + + lea rdi, [rdi+rcx*2] + movdqu xmm3, [rsi] + + add rdi, rcx + movdqu xmm4, [rsi+rax] + + movdqu xmm5, [rsi+rax*2] + lea rsi, [rsi+rax*2] + + movdqa [rdi], xmm3 + add rsi, rax + + movdqa [rdi+rcx], xmm4 + movdqa [rdi+rcx*2],xmm5 + + lea rdi, [rdi+rcx*2] + movdqu xmm0, [rsi] + + add rdi, rcx + movdqu xmm1, [rsi+rax] + + movdqu xmm2, [rsi+rax*2] + lea rsi, [rsi+rax*2] + + movdqa [rdi], xmm0 + add rsi, rax + + movdqa [rdi+rcx], xmm1 + + movdqa [rdi+rcx*2], xmm2 + movdqu xmm3, [rsi] + + movdqu xmm4, [rsi+rax] + lea rdi, [rdi+rcx*2] + + add rdi, rcx + movdqu xmm5, [rsi+rax*2] + + lea rsi, [rsi+rax*2] + movdqa [rdi], xmm3 + + add rsi, rax + movdqa [rdi+rcx], xmm4 + + movdqa [rdi+rcx*2],xmm5 + movdqu xmm0, [rsi] + + lea rdi, [rdi+rcx*2] + movdqu xmm1, [rsi+rax] + + add rdi, rcx + movdqu xmm2, [rsi+rax*2] + + lea rsi, [rsi+rax*2] + movdqa [rdi], xmm0 + + movdqa [rdi+rcx], xmm1 + movdqa [rdi+rcx*2],xmm2 + + movdqu xmm3, [rsi+rax] + lea rdi, [rdi+rcx*2] + + movdqa [rdi+rcx], xmm3 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret diff --git a/thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm b/thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm new file mode 100644 index 00000000000..47dd452297f --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm @@ -0,0 +1,702 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" +extern sym(vp8_bilinear_filters_x86_8) + + +%define BLOCK_HEIGHT_WIDTH 4 +%define vp8_filter_weight 128 +%define VP8_FILTER_SHIFT 7 + + +;void vp8_filter_block1d_h6_mmx +;( +; unsigned char *src_ptr, +; unsigned short *output_ptr, +; unsigned int src_pixels_per_line, +; unsigned int pixel_step, +; unsigned int output_height, +; unsigned int output_width, +; short * vp8_filter +;) +global sym(vp8_filter_block1d_h6_mmx) PRIVATE +sym(vp8_filter_block1d_h6_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rdx, arg(6) ;vp8_filter + + movq mm1, [rdx + 16] ; do both the negative taps first!!! + movq mm2, [rdx + 32] ; + movq mm6, [rdx + 48] ; + movq mm7, [rdx + 64] ; + + mov rdi, arg(1) ;output_ptr + mov rsi, arg(0) ;src_ptr + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rax, dword ptr arg(5) ;output_width ; destination pitch? + pxor mm0, mm0 ; mm0 = 00000000 + +.nextrow: + movq mm3, [rsi-2] ; mm3 = p-2..p5 + movq mm4, mm3 ; mm4 = p-2..p5 + psrlq mm3, 8 ; mm3 = p-1..p5 + punpcklbw mm3, mm0 ; mm3 = p-1..p2 + pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. + + movq mm5, mm4 ; mm5 = p-2..p5 + punpckhbw mm4, mm0 ; mm5 = p2..p5 + pmullw mm4, mm7 ; mm5 *= kernel 4 modifiers + paddsw mm3, mm4 ; mm3 += mm5 + + movq mm4, mm5 ; mm4 = p-2..p5; + psrlq mm5, 16 ; mm5 = p0..p5; + punpcklbw mm5, mm0 ; mm5 = p0..p3 + pmullw mm5, mm2 ; mm5 *= kernel 2 modifiers + paddsw mm3, mm5 ; mm3 += mm5 + + movq mm5, mm4 ; mm5 = p-2..p5 + psrlq mm4, 24 ; mm4 = p1..p5 + punpcklbw mm4, mm0 ; mm4 = p1..p4 + pmullw mm4, mm6 ; mm5 *= kernel 3 modifiers + paddsw mm3, mm4 ; mm3 += mm5 + + ; do outer positive taps + movd mm4, [rsi+3] + punpcklbw mm4, mm0 ; mm5 = p3..p6 + pmullw mm4, [rdx+80] ; mm5 *= kernel 0 modifiers + paddsw mm3, mm4 ; mm3 += mm5 + + punpcklbw mm5, mm0 ; mm5 = p-2..p1 + pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers + paddsw mm3, mm5 ; mm3 += mm5 + + paddsw mm3, [GLOBAL(rd)] ; mm3 += round value + psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 + packuswb mm3, mm0 ; pack and unpack to saturate + punpcklbw mm3, mm0 ; + + movq [rdi], mm3 ; store the results in the destination + +%if ABI_IS_32BIT + add rsi, dword ptr arg(2) ;src_pixels_per_line ; next line + add rdi, rax; +%else + movsxd r8, dword ptr arg(2) ;src_pixels_per_line + add rdi, rax; + + add rsi, r8 ; next line +%endif + + dec rcx ; decrement count + jnz .nextrow ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_filter_block1dc_v6_mmx +;( +; short *src_ptr, +; unsigned char *output_ptr, +; int output_pitch, +; unsigned int pixels_per_line, +; unsigned int pixel_step, +; unsigned int output_height, +; unsigned int output_width, +; short * vp8_filter +;) +global sym(vp8_filter_block1dc_v6_mmx) PRIVATE +sym(vp8_filter_block1dc_v6_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 8 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movq mm5, [GLOBAL(rd)] + push rbx + mov rbx, arg(7) ;vp8_filter + movq mm1, [rbx + 16] ; do both the negative taps first!!! + movq mm2, [rbx + 32] ; + movq mm6, [rbx + 48] ; + movq mm7, [rbx + 64] ; + + movsxd rdx, dword ptr arg(3) ;pixels_per_line + mov rdi, arg(1) ;output_ptr + mov rsi, arg(0) ;src_ptr + sub rsi, rdx + sub rsi, rdx + movsxd rcx, DWORD PTR arg(5) ;output_height + movsxd rax, DWORD PTR arg(2) ;output_pitch ; destination pitch? + pxor mm0, mm0 ; mm0 = 00000000 + + +.nextrow_cv: + movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1 + pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. + + + movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2 + pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers. + paddsw mm3, mm4 ; mm3 += mm4 + + movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0 + pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers. + paddsw mm3, mm4 ; mm3 += mm4 + + movq mm4, [rsi] ; mm4 = p0..p3 = row -2 + pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers. + paddsw mm3, mm4 ; mm3 += mm4 + + + add rsi, rdx ; move source forward 1 line to avoid 3 * pitch + movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1 + pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers. + paddsw mm3, mm4 ; mm3 += mm4 + + movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3 + pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers. + paddsw mm3, mm4 ; mm3 += mm4 + + + paddsw mm3, mm5 ; mm3 += round value + psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 + packuswb mm3, mm0 ; pack and saturate + + movd [rdi],mm3 ; store the results in the destination + ; the subsequent iterations repeat 3 out of 4 of these reads. Since the + ; recon block should be in cache this shouldn't cost much. Its obviously + ; avoidable!!!. + lea rdi, [rdi+rax] ; + dec rcx ; decrement count + jnz .nextrow_cv ; next row + + pop rbx + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +;void bilinear_predict8x8_mmx +;( +; unsigned char *src_ptr, +; int src_pixels_per_line, +; int xoffset, +; int yoffset, +; unsigned char *dst_ptr, +; int dst_pitch +;) +global sym(vp8_bilinear_predict8x8_mmx) PRIVATE +sym(vp8_bilinear_predict8x8_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; + ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; + + movsxd rax, dword ptr arg(2) ;xoffset + mov rdi, arg(4) ;dst_ptr ; + + shl rax, 5 ; offset * 32 + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] + + add rax, rcx ; HFilter + mov rsi, arg(0) ;src_ptr ; + + movsxd rdx, dword ptr arg(5) ;dst_pitch + movq mm1, [rax] ; + + movq mm2, [rax+16] ; + movsxd rax, dword ptr arg(3) ;yoffset + + pxor mm0, mm0 ; + + shl rax, 5 ; offset*32 + add rax, rcx ; VFilter + + lea rcx, [rdi+rdx*8] ; + movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; + + + + ; get the first horizontal line done ; + movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 + movq mm4, mm3 ; make a copy of current line + + punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 + punpckhbw mm4, mm0 ; + + pmullw mm3, mm1 ; + pmullw mm4, mm1 ; + + movq mm5, [rsi+1] ; + movq mm6, mm5 ; + + punpcklbw mm5, mm0 ; + punpckhbw mm6, mm0 ; + + pmullw mm5, mm2 ; + pmullw mm6, mm2 ; + + paddw mm3, mm5 ; + paddw mm4, mm6 ; + + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value + psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw mm4, [GLOBAL(rd)] ; + psraw mm4, VP8_FILTER_SHIFT ; + + movq mm7, mm3 ; + packuswb mm7, mm4 ; + + add rsi, rdx ; next line +.next_row_8x8: + movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 + movq mm4, mm3 ; make a copy of current line + + punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 + punpckhbw mm4, mm0 ; + + pmullw mm3, mm1 ; + pmullw mm4, mm1 ; + + movq mm5, [rsi+1] ; + movq mm6, mm5 ; + + punpcklbw mm5, mm0 ; + punpckhbw mm6, mm0 ; + + pmullw mm5, mm2 ; + pmullw mm6, mm2 ; + + paddw mm3, mm5 ; + paddw mm4, mm6 ; + + movq mm5, mm7 ; + movq mm6, mm7 ; + + punpcklbw mm5, mm0 ; + punpckhbw mm6, mm0 + + pmullw mm5, [rax] ; + pmullw mm6, [rax] ; + + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value + psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw mm4, [GLOBAL(rd)] ; + psraw mm4, VP8_FILTER_SHIFT ; + + movq mm7, mm3 ; + packuswb mm7, mm4 ; + + + pmullw mm3, [rax+16] ; + pmullw mm4, [rax+16] ; + + paddw mm3, mm5 ; + paddw mm4, mm6 ; + + + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value + psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw mm4, [GLOBAL(rd)] ; + psraw mm4, VP8_FILTER_SHIFT ; + + packuswb mm3, mm4 + + movq [rdi], mm3 ; store the results in the destination + +%if ABI_IS_32BIT + add rsi, rdx ; next line + add rdi, dword ptr arg(5) ;dst_pitch ; +%else + movsxd r8, dword ptr arg(5) ;dst_pitch + add rsi, rdx ; next line + add rdi, r8 ;dst_pitch +%endif + cmp rdi, rcx ; + jne .next_row_8x8 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +;void bilinear_predict8x4_mmx +;( +; unsigned char *src_ptr, +; int src_pixels_per_line, +; int xoffset, +; int yoffset, +; unsigned char *dst_ptr, +; int dst_pitch +;) +global sym(vp8_bilinear_predict8x4_mmx) PRIVATE +sym(vp8_bilinear_predict8x4_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; + ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; + + movsxd rax, dword ptr arg(2) ;xoffset + mov rdi, arg(4) ;dst_ptr ; + + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] + shl rax, 5 + + mov rsi, arg(0) ;src_ptr ; + add rax, rcx + + movsxd rdx, dword ptr arg(5) ;dst_pitch + movq mm1, [rax] ; + + movq mm2, [rax+16] ; + movsxd rax, dword ptr arg(3) ;yoffset + + pxor mm0, mm0 ; + shl rax, 5 + + add rax, rcx + lea rcx, [rdi+rdx*4] ; + + movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; + + ; get the first horizontal line done ; + movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 + movq mm4, mm3 ; make a copy of current line + + punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 + punpckhbw mm4, mm0 ; + + pmullw mm3, mm1 ; + pmullw mm4, mm1 ; + + movq mm5, [rsi+1] ; + movq mm6, mm5 ; + + punpcklbw mm5, mm0 ; + punpckhbw mm6, mm0 ; + + pmullw mm5, mm2 ; + pmullw mm6, mm2 ; + + paddw mm3, mm5 ; + paddw mm4, mm6 ; + + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value + psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw mm4, [GLOBAL(rd)] ; + psraw mm4, VP8_FILTER_SHIFT ; + + movq mm7, mm3 ; + packuswb mm7, mm4 ; + + add rsi, rdx ; next line +.next_row_8x4: + movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 + movq mm4, mm3 ; make a copy of current line + + punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 + punpckhbw mm4, mm0 ; + + pmullw mm3, mm1 ; + pmullw mm4, mm1 ; + + movq mm5, [rsi+1] ; + movq mm6, mm5 ; + + punpcklbw mm5, mm0 ; + punpckhbw mm6, mm0 ; + + pmullw mm5, mm2 ; + pmullw mm6, mm2 ; + + paddw mm3, mm5 ; + paddw mm4, mm6 ; + + movq mm5, mm7 ; + movq mm6, mm7 ; + + punpcklbw mm5, mm0 ; + punpckhbw mm6, mm0 + + pmullw mm5, [rax] ; + pmullw mm6, [rax] ; + + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value + psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw mm4, [GLOBAL(rd)] ; + psraw mm4, VP8_FILTER_SHIFT ; + + movq mm7, mm3 ; + packuswb mm7, mm4 ; + + + pmullw mm3, [rax+16] ; + pmullw mm4, [rax+16] ; + + paddw mm3, mm5 ; + paddw mm4, mm6 ; + + + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value + psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw mm4, [GLOBAL(rd)] ; + psraw mm4, VP8_FILTER_SHIFT ; + + packuswb mm3, mm4 + + movq [rdi], mm3 ; store the results in the destination + +%if ABI_IS_32BIT + add rsi, rdx ; next line + add rdi, dword ptr arg(5) ;dst_pitch ; +%else + movsxd r8, dword ptr arg(5) ;dst_pitch + add rsi, rdx ; next line + add rdi, r8 +%endif + cmp rdi, rcx ; + jne .next_row_8x4 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +;void bilinear_predict4x4_mmx +;( +; unsigned char *src_ptr, +; int src_pixels_per_line, +; int xoffset, +; int yoffset, +; unsigned char *dst_ptr, +; int dst_pitch +;) +global sym(vp8_bilinear_predict4x4_mmx) PRIVATE +sym(vp8_bilinear_predict4x4_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; + ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; + + movsxd rax, dword ptr arg(2) ;xoffset + mov rdi, arg(4) ;dst_ptr ; + + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] + shl rax, 5 + + add rax, rcx ; HFilter + mov rsi, arg(0) ;src_ptr ; + + movsxd rdx, dword ptr arg(5) ;ldst_pitch + movq mm1, [rax] ; + + movq mm2, [rax+16] ; + movsxd rax, dword ptr arg(3) ;yoffset + + pxor mm0, mm0 ; + shl rax, 5 + + add rax, rcx + lea rcx, [rdi+rdx*4] ; + + movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; + + ; get the first horizontal line done ; + movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 + punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 + + pmullw mm3, mm1 ; + movd mm5, [rsi+1] ; + + punpcklbw mm5, mm0 ; + pmullw mm5, mm2 ; + + paddw mm3, mm5 ; + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value + + psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + movq mm7, mm3 ; + packuswb mm7, mm0 ; + + add rsi, rdx ; next line +.next_row_4x4: + movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 + punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 + + pmullw mm3, mm1 ; + movd mm5, [rsi+1] ; + + punpcklbw mm5, mm0 ; + pmullw mm5, mm2 ; + + paddw mm3, mm5 ; + + movq mm5, mm7 ; + punpcklbw mm5, mm0 ; + + pmullw mm5, [rax] ; + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value + + psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + movq mm7, mm3 ; + + packuswb mm7, mm0 ; + + pmullw mm3, [rax+16] ; + paddw mm3, mm5 ; + + + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value + psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + packuswb mm3, mm0 + movd [rdi], mm3 ; store the results in the destination + +%if ABI_IS_32BIT + add rsi, rdx ; next line + add rdi, dword ptr arg(5) ;dst_pitch ; +%else + movsxd r8, dword ptr arg(5) ;dst_pitch ; + add rsi, rdx ; next line + add rdi, r8 +%endif + + cmp rdi, rcx ; + jne .next_row_4x4 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + + +SECTION_RODATA +align 16 +rd: + times 4 dw 0x40 + +align 16 +global HIDDEN_DATA(sym(vp8_six_tap_mmx)) +sym(vp8_six_tap_mmx): + times 8 dw 0 + times 8 dw 0 + times 8 dw 128 + times 8 dw 0 + times 8 dw 0 + times 8 dw 0 + + times 8 dw 0 + times 8 dw -6 + times 8 dw 123 + times 8 dw 12 + times 8 dw -1 + times 8 dw 0 + + times 8 dw 2 + times 8 dw -11 + times 8 dw 108 + times 8 dw 36 + times 8 dw -8 + times 8 dw 1 + + times 8 dw 0 + times 8 dw -9 + times 8 dw 93 + times 8 dw 50 + times 8 dw -6 + times 8 dw 0 + + times 8 dw 3 + times 8 dw -16 + times 8 dw 77 + times 8 dw 77 + times 8 dw -16 + times 8 dw 3 + + times 8 dw 0 + times 8 dw -6 + times 8 dw 50 + times 8 dw 93 + times 8 dw -9 + times 8 dw 0 + + times 8 dw 1 + times 8 dw -8 + times 8 dw 36 + times 8 dw 108 + times 8 dw -11 + times 8 dw 2 + + times 8 dw 0 + times 8 dw -1 + times 8 dw 12 + times 8 dw 123 + times 8 dw -6 + times 8 dw 0 + + diff --git a/thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm b/thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm new file mode 100644 index 00000000000..69f8d103c14 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm @@ -0,0 +1,1372 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" +extern sym(vp8_bilinear_filters_x86_8) + +%define BLOCK_HEIGHT_WIDTH 4 +%define VP8_FILTER_WEIGHT 128 +%define VP8_FILTER_SHIFT 7 + + +;/************************************************************************************ +; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The +; input pixel array has output_height rows. This routine assumes that output_height is an +; even number. This function handles 8 pixels in horizontal direction, calculating ONE +; rows each iteration to take advantage of the 128 bits operations. +;*************************************************************************************/ +;void vp8_filter_block1d8_h6_sse2 +;( +; unsigned char *src_ptr, +; unsigned short *output_ptr, +; unsigned int src_pixels_per_line, +; unsigned int pixel_step, +; unsigned int output_height, +; unsigned int output_width, +; short *vp8_filter +;) +global sym(vp8_filter_block1d8_h6_sse2) PRIVATE +sym(vp8_filter_block1d8_h6_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rdx, arg(6) ;vp8_filter + mov rsi, arg(0) ;src_ptr + + mov rdi, arg(1) ;output_ptr + + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(5) ;output_width +%endif + pxor xmm0, xmm0 ; clear xmm0 for unpack + +.filter_block1d8_h6_rowloop: + movq xmm3, MMWORD PTR [rsi - 2] + movq xmm1, MMWORD PTR [rsi + 6] + + prefetcht2 [rsi+rax-2] + + pslldq xmm1, 8 + por xmm1, xmm3 + + movdqa xmm4, xmm1 + movdqa xmm5, xmm1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm1 + + punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 + psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 + + pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 + punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 + + psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 + pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 + + + punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 + psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 + + pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 + + punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 + psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 + + pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 + + punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 + psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 + + + pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 + + punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 + pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 + + + paddsw xmm4, xmm7 + paddsw xmm4, xmm5 + + paddsw xmm4, xmm3 + paddsw xmm4, xmm6 + + paddsw xmm4, xmm1 + paddsw xmm4, [GLOBAL(rd)] + + psraw xmm4, 7 + + packuswb xmm4, xmm0 + punpcklbw xmm4, xmm0 + + movdqa XMMWORD Ptr [rdi], xmm4 + lea rsi, [rsi + rax] + +%if ABI_IS_32BIT + add rdi, DWORD Ptr arg(5) ;[output_width] +%else + add rdi, r8 +%endif + dec rcx + + jnz .filter_block1d8_h6_rowloop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_filter_block1d16_h6_sse2 +;( +; unsigned char *src_ptr, +; unsigned short *output_ptr, +; unsigned int src_pixels_per_line, +; unsigned int pixel_step, +; unsigned int output_height, +; unsigned int output_width, +; short *vp8_filter +;) +;/************************************************************************************ +; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The +; input pixel array has output_height rows. This routine assumes that output_height is an +; even number. This function handles 8 pixels in horizontal direction, calculating ONE +; rows each iteration to take advantage of the 128 bits operations. +;*************************************************************************************/ +global sym(vp8_filter_block1d16_h6_sse2) PRIVATE +sym(vp8_filter_block1d16_h6_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rdx, arg(6) ;vp8_filter + mov rsi, arg(0) ;src_ptr + + mov rdi, arg(1) ;output_ptr + + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(5) ;output_width +%endif + + pxor xmm0, xmm0 ; clear xmm0 for unpack + +.filter_block1d16_h6_sse2_rowloop: + movq xmm3, MMWORD PTR [rsi - 2] + movq xmm1, MMWORD PTR [rsi + 6] + + movq xmm2, MMWORD PTR [rsi +14] + pslldq xmm2, 8 + + por xmm2, xmm1 + prefetcht2 [rsi+rax-2] + + pslldq xmm1, 8 + por xmm1, xmm3 + + movdqa xmm4, xmm1 + movdqa xmm5, xmm1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm1 + + punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 + psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 + + pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 + punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 + + psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 + pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 + + + punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 + psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 + + pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 + + punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 + psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 + + pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 + + punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 + psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 + + + pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 + + punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 + pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 + + paddsw xmm4, xmm7 + paddsw xmm4, xmm5 + + paddsw xmm4, xmm3 + paddsw xmm4, xmm6 + + paddsw xmm4, xmm1 + paddsw xmm4, [GLOBAL(rd)] + + psraw xmm4, 7 + + packuswb xmm4, xmm0 + punpcklbw xmm4, xmm0 + + movdqa XMMWORD Ptr [rdi], xmm4 + + movdqa xmm3, xmm2 + movdqa xmm4, xmm2 + + movdqa xmm5, xmm2 + movdqa xmm6, xmm2 + + movdqa xmm7, xmm2 + + punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 + psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 + + pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 + punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 + + psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 + pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 + + + punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 + psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 + + pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 + + punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 + psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 + + pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 + + punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 + psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 + + pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 + + punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 + pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 + + + paddsw xmm4, xmm7 + paddsw xmm4, xmm5 + + paddsw xmm4, xmm3 + paddsw xmm4, xmm6 + + paddsw xmm4, xmm2 + paddsw xmm4, [GLOBAL(rd)] + + psraw xmm4, 7 + + packuswb xmm4, xmm0 + punpcklbw xmm4, xmm0 + + movdqa XMMWORD Ptr [rdi+16], xmm4 + + lea rsi, [rsi + rax] +%if ABI_IS_32BIT + add rdi, DWORD Ptr arg(5) ;[output_width] +%else + add rdi, r8 +%endif + + dec rcx + jnz .filter_block1d16_h6_sse2_rowloop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_filter_block1d8_v6_sse2 +;( +; short *src_ptr, +; unsigned char *output_ptr, +; int dst_ptich, +; unsigned int pixels_per_line, +; unsigned int pixel_step, +; unsigned int output_height, +; unsigned int output_width, +; short * vp8_filter +;) +;/************************************************************************************ +; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The +; input pixel array has output_height rows. +;*************************************************************************************/ +global sym(vp8_filter_block1d8_v6_sse2) PRIVATE +sym(vp8_filter_block1d8_v6_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 8 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rax, arg(7) ;vp8_filter + movsxd rdx, dword ptr arg(3) ;pixels_per_line + + mov rdi, arg(1) ;output_ptr + mov rsi, arg(0) ;src_ptr + + sub rsi, rdx + sub rsi, rdx + + movsxd rcx, DWORD PTR arg(5) ;[output_height] + pxor xmm0, xmm0 ; clear xmm0 + + movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(2) ; dst_ptich +%endif + +.vp8_filter_block1d8_v6_sse2_loop: + movdqa xmm1, XMMWORD PTR [rsi] + pmullw xmm1, [rax] + + movdqa xmm2, XMMWORD PTR [rsi + rdx] + pmullw xmm2, [rax + 16] + + movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] + pmullw xmm3, [rax + 32] + + movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] + pmullw xmm5, [rax + 64] + + add rsi, rdx + movdqa xmm4, XMMWORD PTR [rsi + rdx * 2] + + pmullw xmm4, [rax + 48] + movdqa xmm6, XMMWORD PTR [rsi + rdx * 4] + + pmullw xmm6, [rax + 80] + + paddsw xmm2, xmm5 + paddsw xmm2, xmm3 + + paddsw xmm2, xmm1 + paddsw xmm2, xmm4 + + paddsw xmm2, xmm6 + paddsw xmm2, xmm7 + + psraw xmm2, 7 + packuswb xmm2, xmm0 ; pack and saturate + + movq QWORD PTR [rdi], xmm2 ; store the results in the destination +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(2) ;[dst_ptich] +%else + add rdi, r8 +%endif + dec rcx ; decrement count + jnz .vp8_filter_block1d8_v6_sse2_loop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_filter_block1d16_v6_sse2 +;( +; unsigned short *src_ptr, +; unsigned char *output_ptr, +; int dst_ptich, +; unsigned int pixels_per_line, +; unsigned int pixel_step, +; unsigned int output_height, +; unsigned int output_width, +; const short *vp8_filter +;) +;/************************************************************************************ +; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The +; input pixel array has output_height rows. +;*************************************************************************************/ +global sym(vp8_filter_block1d16_v6_sse2) PRIVATE +sym(vp8_filter_block1d16_v6_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 8 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rax, arg(7) ;vp8_filter + movsxd rdx, dword ptr arg(3) ;pixels_per_line + + mov rdi, arg(1) ;output_ptr + mov rsi, arg(0) ;src_ptr + + sub rsi, rdx + sub rsi, rdx + + movsxd rcx, DWORD PTR arg(5) ;[output_height] +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(2) ; dst_ptich +%endif + +.vp8_filter_block1d16_v6_sse2_loop: +; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order. + movdqa xmm1, XMMWORD PTR [rsi + rdx] ; line 2 + movdqa xmm2, XMMWORD PTR [rsi + rdx + 16] + pmullw xmm1, [rax + 16] + pmullw xmm2, [rax + 16] + + movdqa xmm3, XMMWORD PTR [rsi + rdx * 4] ; line 5 + movdqa xmm4, XMMWORD PTR [rsi + rdx * 4 + 16] + pmullw xmm3, [rax + 64] + pmullw xmm4, [rax + 64] + + movdqa xmm5, XMMWORD PTR [rsi + rdx * 2] ; line 3 + movdqa xmm6, XMMWORD PTR [rsi + rdx * 2 + 16] + pmullw xmm5, [rax + 32] + pmullw xmm6, [rax + 32] + + movdqa xmm7, XMMWORD PTR [rsi] ; line 1 + movdqa xmm0, XMMWORD PTR [rsi + 16] + pmullw xmm7, [rax] + pmullw xmm0, [rax] + + paddsw xmm1, xmm3 + paddsw xmm2, xmm4 + paddsw xmm1, xmm5 + paddsw xmm2, xmm6 + paddsw xmm1, xmm7 + paddsw xmm2, xmm0 + + add rsi, rdx + + movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] ; line 4 + movdqa xmm4, XMMWORD PTR [rsi + rdx * 2 + 16] + pmullw xmm3, [rax + 48] + pmullw xmm4, [rax + 48] + + movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] ; line 6 + movdqa xmm6, XMMWORD PTR [rsi + rdx * 4 + 16] + pmullw xmm5, [rax + 80] + pmullw xmm6, [rax + 80] + + movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] + pxor xmm0, xmm0 ; clear xmm0 + + paddsw xmm1, xmm3 + paddsw xmm2, xmm4 + paddsw xmm1, xmm5 + paddsw xmm2, xmm6 + + paddsw xmm1, xmm7 + paddsw xmm2, xmm7 + + psraw xmm1, 7 + psraw xmm2, 7 + + packuswb xmm1, xmm2 ; pack and saturate + movdqa XMMWORD PTR [rdi], xmm1 ; store the results in the destination +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(2) ;[dst_ptich] +%else + add rdi, r8 +%endif + dec rcx ; decrement count + jnz .vp8_filter_block1d16_v6_sse2_loop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_filter_block1d8_h6_only_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; int dst_ptich, +; unsigned int output_height, +; const short *vp8_filter +;) +; First-pass filter only when yoffset==0 +global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE +sym(vp8_filter_block1d8_h6_only_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rdx, arg(5) ;vp8_filter + mov rsi, arg(0) ;src_ptr + + mov rdi, arg(2) ;output_ptr + + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(3) ;dst_ptich +%endif + pxor xmm0, xmm0 ; clear xmm0 for unpack + +.filter_block1d8_h6_only_rowloop: + movq xmm3, MMWORD PTR [rsi - 2] + movq xmm1, MMWORD PTR [rsi + 6] + + prefetcht2 [rsi+rax-2] + + pslldq xmm1, 8 + por xmm1, xmm3 + + movdqa xmm4, xmm1 + movdqa xmm5, xmm1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm1 + + punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 + psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 + + pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 + punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 + + psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 + pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 + + + punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 + psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 + + pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 + + punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 + psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 + + pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 + + punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 + psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 + + + pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 + + punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 + pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 + + + paddsw xmm4, xmm7 + paddsw xmm4, xmm5 + + paddsw xmm4, xmm3 + paddsw xmm4, xmm6 + + paddsw xmm4, xmm1 + paddsw xmm4, [GLOBAL(rd)] + + psraw xmm4, 7 + + packuswb xmm4, xmm0 + + movq QWORD PTR [rdi], xmm4 ; store the results in the destination + lea rsi, [rsi + rax] + +%if ABI_IS_32BIT + add rdi, DWORD Ptr arg(3) ;dst_ptich +%else + add rdi, r8 +%endif + dec rcx + + jnz .filter_block1d8_h6_only_rowloop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_filter_block1d16_h6_only_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; int dst_ptich, +; unsigned int output_height, +; const short *vp8_filter +;) +; First-pass filter only when yoffset==0 +global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE +sym(vp8_filter_block1d16_h6_only_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rdx, arg(5) ;vp8_filter + mov rsi, arg(0) ;src_ptr + + mov rdi, arg(2) ;output_ptr + + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(3) ;dst_ptich +%endif + + pxor xmm0, xmm0 ; clear xmm0 for unpack + +.filter_block1d16_h6_only_sse2_rowloop: + movq xmm3, MMWORD PTR [rsi - 2] + movq xmm1, MMWORD PTR [rsi + 6] + + movq xmm2, MMWORD PTR [rsi +14] + pslldq xmm2, 8 + + por xmm2, xmm1 + prefetcht2 [rsi+rax-2] + + pslldq xmm1, 8 + por xmm1, xmm3 + + movdqa xmm4, xmm1 + movdqa xmm5, xmm1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm1 + + punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 + psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 + + pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 + punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 + + psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 + pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 + + punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 + psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 + + pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 + + punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 + psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 + + pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 + + punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 + psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 + + pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 + + punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 + pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 + + paddsw xmm4, xmm7 + paddsw xmm4, xmm5 + + paddsw xmm4, xmm3 + paddsw xmm4, xmm6 + + paddsw xmm4, xmm1 + paddsw xmm4, [GLOBAL(rd)] + + psraw xmm4, 7 + + packuswb xmm4, xmm0 ; lower 8 bytes + + movq QWORD Ptr [rdi], xmm4 ; store the results in the destination + + movdqa xmm3, xmm2 + movdqa xmm4, xmm2 + + movdqa xmm5, xmm2 + movdqa xmm6, xmm2 + + movdqa xmm7, xmm2 + + punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 + psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 + + pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 + punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 + + psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 + pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 + + punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 + psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 + + pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 + + punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 + psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 + + pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 + + punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 + psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 + + pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 + + punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 + pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 + + paddsw xmm4, xmm7 + paddsw xmm4, xmm5 + + paddsw xmm4, xmm3 + paddsw xmm4, xmm6 + + paddsw xmm4, xmm2 + paddsw xmm4, [GLOBAL(rd)] + + psraw xmm4, 7 + + packuswb xmm4, xmm0 ; higher 8 bytes + + movq QWORD Ptr [rdi+8], xmm4 ; store the results in the destination + + lea rsi, [rsi + rax] +%if ABI_IS_32BIT + add rdi, DWORD Ptr arg(3) ;dst_ptich +%else + add rdi, r8 +%endif + + dec rcx + jnz .filter_block1d16_h6_only_sse2_rowloop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_filter_block1d8_v6_only_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; int dst_ptich, +; unsigned int output_height, +; const short *vp8_filter +;) +; Second-pass filter only when xoffset==0 +global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE +sym(vp8_filter_block1d8_v6_only_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rdx, dword ptr arg(1) ;src_pixels_per_line + + mov rax, arg(5) ;vp8_filter + + pxor xmm0, xmm0 ; clear xmm0 + + movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(3) ; dst_ptich +%endif + +.vp8_filter_block1d8_v6_only_sse2_loop: + movq xmm1, MMWORD PTR [rsi] + movq xmm2, MMWORD PTR [rsi + rdx] + movq xmm3, MMWORD PTR [rsi + rdx * 2] + movq xmm5, MMWORD PTR [rsi + rdx * 4] + add rsi, rdx + movq xmm4, MMWORD PTR [rsi + rdx * 2] + movq xmm6, MMWORD PTR [rsi + rdx * 4] + + punpcklbw xmm1, xmm0 + pmullw xmm1, [rax] + + punpcklbw xmm2, xmm0 + pmullw xmm2, [rax + 16] + + punpcklbw xmm3, xmm0 + pmullw xmm3, [rax + 32] + + punpcklbw xmm5, xmm0 + pmullw xmm5, [rax + 64] + + punpcklbw xmm4, xmm0 + pmullw xmm4, [rax + 48] + + punpcklbw xmm6, xmm0 + pmullw xmm6, [rax + 80] + + paddsw xmm2, xmm5 + paddsw xmm2, xmm3 + + paddsw xmm2, xmm1 + paddsw xmm2, xmm4 + + paddsw xmm2, xmm6 + paddsw xmm2, xmm7 + + psraw xmm2, 7 + packuswb xmm2, xmm0 ; pack and saturate + + movq QWORD PTR [rdi], xmm2 ; store the results in the destination +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;[dst_ptich] +%else + add rdi, r8 +%endif + dec rcx ; decrement count + jnz .vp8_filter_block1d8_v6_only_sse2_loop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_unpack_block1d16_h6_sse2 +;( +; unsigned char *src_ptr, +; unsigned short *output_ptr, +; unsigned int src_pixels_per_line, +; unsigned int output_height, +; unsigned int output_width +;) +global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE +sym(vp8_unpack_block1d16_h6_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(1) ;output_ptr + + movsxd rcx, dword ptr arg(3) ;output_height + movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source + + pxor xmm0, xmm0 ; clear xmm0 for unpack +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source +%endif + +.unpack_block1d16_h6_sse2_rowloop: + movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2 + movq xmm3, MMWORD PTR [rsi+8] ; make copy of xmm1 + + punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 + punpcklbw xmm1, xmm0 + + movdqa XMMWORD Ptr [rdi], xmm1 + movdqa XMMWORD Ptr [rdi + 16], xmm3 + + lea rsi, [rsi + rax] +%if ABI_IS_32BIT + add rdi, DWORD Ptr arg(4) ;[output_width] +%else + add rdi, r8 +%endif + dec rcx + jnz .unpack_block1d16_h6_sse2_rowloop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_bilinear_predict16x16_sse2 +;( +; unsigned char *src_ptr, +; int src_pixels_per_line, +; int xoffset, +; int yoffset, +; unsigned char *dst_ptr, +; int dst_pitch +;) +extern sym(vp8_bilinear_filters_x86_8) +global sym(vp8_bilinear_predict16x16_sse2) PRIVATE +sym(vp8_bilinear_predict16x16_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset] + ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset] + + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] + movsxd rax, dword ptr arg(2) ;xoffset + + cmp rax, 0 ;skip first_pass filter if xoffset=0 + je .b16x16_sp_only + + shl rax, 5 + add rax, rcx ;HFilter + + mov rdi, arg(4) ;dst_ptr + mov rsi, arg(0) ;src_ptr + movsxd rdx, dword ptr arg(5) ;dst_pitch + + movdqa xmm1, [rax] + movdqa xmm2, [rax+16] + + movsxd rax, dword ptr arg(3) ;yoffset + + cmp rax, 0 ;skip second_pass filter if yoffset=0 + je .b16x16_fp_only + + shl rax, 5 + add rax, rcx ;VFilter + + lea rcx, [rdi+rdx*8] + lea rcx, [rcx+rdx*8] + movsxd rdx, dword ptr arg(1) ;src_pixels_per_line + + pxor xmm0, xmm0 + +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(5) ;dst_pitch +%endif + ; get the first horizontal line done + movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 + movdqa xmm4, xmm3 ; make a copy of current line + + punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 + punpckhbw xmm4, xmm0 + + pmullw xmm3, xmm1 + pmullw xmm4, xmm1 + + movdqu xmm5, [rsi+1] + movdqa xmm6, xmm5 + + punpcklbw xmm5, xmm0 + punpckhbw xmm6, xmm0 + + pmullw xmm5, xmm2 + pmullw xmm6, xmm2 + + paddw xmm3, xmm5 + paddw xmm4, xmm6 + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw xmm4, [GLOBAL(rd)] + psraw xmm4, VP8_FILTER_SHIFT + + movdqa xmm7, xmm3 + packuswb xmm7, xmm4 + + add rsi, rdx ; next line +.next_row: + movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 + movdqa xmm4, xmm3 ; make a copy of current line + + punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 + punpckhbw xmm4, xmm0 + + pmullw xmm3, xmm1 + pmullw xmm4, xmm1 + + movdqu xmm5, [rsi+1] + movdqa xmm6, xmm5 + + punpcklbw xmm5, xmm0 + punpckhbw xmm6, xmm0 + + pmullw xmm5, xmm2 + pmullw xmm6, xmm2 + + paddw xmm3, xmm5 + paddw xmm4, xmm6 + + movdqa xmm5, xmm7 + movdqa xmm6, xmm7 + + punpcklbw xmm5, xmm0 + punpckhbw xmm6, xmm0 + + pmullw xmm5, [rax] + pmullw xmm6, [rax] + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw xmm4, [GLOBAL(rd)] + psraw xmm4, VP8_FILTER_SHIFT + + movdqa xmm7, xmm3 + packuswb xmm7, xmm4 + + pmullw xmm3, [rax+16] + pmullw xmm4, [rax+16] + + paddw xmm3, xmm5 + paddw xmm4, xmm6 + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw xmm4, [GLOBAL(rd)] + psraw xmm4, VP8_FILTER_SHIFT + + packuswb xmm3, xmm4 + movdqa [rdi], xmm3 ; store the results in the destination + + add rsi, rdx ; next line +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(5) ;dst_pitch +%else + add rdi, r8 +%endif + + cmp rdi, rcx + jne .next_row + + jmp .done + +.b16x16_sp_only: + movsxd rax, dword ptr arg(3) ;yoffset + shl rax, 5 + add rax, rcx ;VFilter + + mov rdi, arg(4) ;dst_ptr + mov rsi, arg(0) ;src_ptr + movsxd rdx, dword ptr arg(5) ;dst_pitch + + movdqa xmm1, [rax] + movdqa xmm2, [rax+16] + + lea rcx, [rdi+rdx*8] + lea rcx, [rcx+rdx*8] + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + + pxor xmm0, xmm0 + + ; get the first horizontal line done + movdqu xmm7, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 + + add rsi, rax ; next line +.next_row_spo: + movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 + + movdqa xmm5, xmm7 + movdqa xmm6, xmm7 + + movdqa xmm4, xmm3 ; make a copy of current line + movdqa xmm7, xmm3 + + punpcklbw xmm5, xmm0 + punpckhbw xmm6, xmm0 + punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 + punpckhbw xmm4, xmm0 + + pmullw xmm5, xmm1 + pmullw xmm6, xmm1 + pmullw xmm3, xmm2 + pmullw xmm4, xmm2 + + paddw xmm3, xmm5 + paddw xmm4, xmm6 + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw xmm4, [GLOBAL(rd)] + psraw xmm4, VP8_FILTER_SHIFT + + packuswb xmm3, xmm4 + movdqa [rdi], xmm3 ; store the results in the destination + + add rsi, rax ; next line + add rdi, rdx ;dst_pitch + cmp rdi, rcx + jne .next_row_spo + + jmp .done + +.b16x16_fp_only: + lea rcx, [rdi+rdx*8] + lea rcx, [rcx+rdx*8] + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + pxor xmm0, xmm0 + +.next_row_fpo: + movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 + movdqa xmm4, xmm3 ; make a copy of current line + + punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 + punpckhbw xmm4, xmm0 + + pmullw xmm3, xmm1 + pmullw xmm4, xmm1 + + movdqu xmm5, [rsi+1] + movdqa xmm6, xmm5 + + punpcklbw xmm5, xmm0 + punpckhbw xmm6, xmm0 + + pmullw xmm5, xmm2 + pmullw xmm6, xmm2 + + paddw xmm3, xmm5 + paddw xmm4, xmm6 + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw xmm4, [GLOBAL(rd)] + psraw xmm4, VP8_FILTER_SHIFT + + packuswb xmm3, xmm4 + movdqa [rdi], xmm3 ; store the results in the destination + + add rsi, rax ; next line + add rdi, rdx ; dst_pitch + cmp rdi, rcx + jne .next_row_fpo + +.done: + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_bilinear_predict8x8_sse2 +;( +; unsigned char *src_ptr, +; int src_pixels_per_line, +; int xoffset, +; int yoffset, +; unsigned char *dst_ptr, +; int dst_pitch +;) +global sym(vp8_bilinear_predict8x8_sse2) PRIVATE +sym(vp8_bilinear_predict8x8_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 144 ; reserve 144 bytes + + ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset] + ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] + + mov rsi, arg(0) ;src_ptr + movsxd rdx, dword ptr arg(1) ;src_pixels_per_line + + ;Read 9-line unaligned data in and put them on stack. This gives a big + ;performance boost. + movdqu xmm0, [rsi] + lea rax, [rdx + rdx*2] + movdqu xmm1, [rsi+rdx] + movdqu xmm2, [rsi+rdx*2] + add rsi, rax + movdqu xmm3, [rsi] + movdqu xmm4, [rsi+rdx] + movdqu xmm5, [rsi+rdx*2] + add rsi, rax + movdqu xmm6, [rsi] + movdqu xmm7, [rsi+rdx] + + movdqa XMMWORD PTR [rsp], xmm0 + + movdqu xmm0, [rsi+rdx*2] + + movdqa XMMWORD PTR [rsp+16], xmm1 + movdqa XMMWORD PTR [rsp+32], xmm2 + movdqa XMMWORD PTR [rsp+48], xmm3 + movdqa XMMWORD PTR [rsp+64], xmm4 + movdqa XMMWORD PTR [rsp+80], xmm5 + movdqa XMMWORD PTR [rsp+96], xmm6 + movdqa XMMWORD PTR [rsp+112], xmm7 + movdqa XMMWORD PTR [rsp+128], xmm0 + + movsxd rax, dword ptr arg(2) ;xoffset + shl rax, 5 + add rax, rcx ;HFilter + + mov rdi, arg(4) ;dst_ptr + movsxd rdx, dword ptr arg(5) ;dst_pitch + + movdqa xmm1, [rax] + movdqa xmm2, [rax+16] + + movsxd rax, dword ptr arg(3) ;yoffset + shl rax, 5 + add rax, rcx ;VFilter + + lea rcx, [rdi+rdx*8] + + movdqa xmm5, [rax] + movdqa xmm6, [rax+16] + + pxor xmm0, xmm0 + + ; get the first horizontal line done + movdqa xmm3, XMMWORD PTR [rsp] + movdqa xmm4, xmm3 ; make a copy of current line + psrldq xmm4, 1 + + punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07 + punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08 + + pmullw xmm3, xmm1 + pmullw xmm4, xmm2 + + paddw xmm3, xmm4 + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + movdqa xmm7, xmm3 + add rsp, 16 ; next line +.next_row8x8: + movdqa xmm3, XMMWORD PTR [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 + movdqa xmm4, xmm3 ; make a copy of current line + psrldq xmm4, 1 + + punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07 + punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08 + + pmullw xmm3, xmm1 + pmullw xmm4, xmm2 + + paddw xmm3, xmm4 + pmullw xmm7, xmm5 + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + movdqa xmm4, xmm3 + + pmullw xmm3, xmm6 + paddw xmm3, xmm7 + + movdqa xmm7, xmm4 + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + packuswb xmm3, xmm0 + movq [rdi], xmm3 ; store the results in the destination + + add rsp, 16 ; next line + add rdi, rdx + + cmp rdi, rcx + jne .next_row8x8 + + ;add rsp, 144 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +SECTION_RODATA +align 16 +rd: + times 8 dw 0x40 diff --git a/thirdparty/libvpx/vp8/common/x86/subpixel_ssse3.asm b/thirdparty/libvpx/vp8/common/x86/subpixel_ssse3.asm new file mode 100644 index 00000000000..c06f24556e6 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/subpixel_ssse3.asm @@ -0,0 +1,1508 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +%define BLOCK_HEIGHT_WIDTH 4 +%define VP8_FILTER_WEIGHT 128 +%define VP8_FILTER_SHIFT 7 + + +;/************************************************************************************ +; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The +; input pixel array has output_height rows. This routine assumes that output_height is an +; even number. This function handles 8 pixels in horizontal direction, calculating ONE +; rows each iteration to take advantage of the 128 bits operations. +; +; This is an implementation of some of the SSE optimizations first seen in ffvp8 +; +;*************************************************************************************/ +;void vp8_filter_block1d8_h6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE +sym(vp8_filter_block1d8_h6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 + + movdqa xmm7, [GLOBAL(rd)] + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + mov rdi, arg(2) ;output_ptr + + cmp esi, DWORD PTR [rax] + je vp8_filter_block1d8_h4_ssse3 + + movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rcx, dword ptr arg(4) ;output_height + + movsxd rdx, dword ptr arg(3) ;output_pitch + + sub rdi, rdx +;xmm3 free +.filter_block1d8_h6_rowloop_ssse3: + movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 + + movq xmm2, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 + + punpcklbw xmm0, xmm2 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 + + movdqa xmm1, xmm0 + pmaddubsw xmm0, xmm4 + + movdqa xmm2, xmm1 + pshufb xmm1, [GLOBAL(shuf2bfrom1)] + + pshufb xmm2, [GLOBAL(shuf3bfrom1)] + pmaddubsw xmm1, xmm5 + + lea rdi, [rdi + rdx] + pmaddubsw xmm2, xmm6 + + lea rsi, [rsi + rax] + dec rcx + + paddsw xmm0, xmm1 + paddsw xmm2, xmm7 + + paddsw xmm0, xmm2 + + psraw xmm0, 7 + + packuswb xmm0, xmm0 + + movq MMWORD Ptr [rdi], xmm0 + jnz .filter_block1d8_h6_rowloop_ssse3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +vp8_filter_block1d8_h4_ssse3: + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 + + movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)] + movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)] + + mov rsi, arg(0) ;src_ptr + + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rcx, dword ptr arg(4) ;output_height + + movsxd rdx, dword ptr arg(3) ;output_pitch + + sub rdi, rdx + +.filter_block1d8_h4_rowloop_ssse3: + movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 + + movq xmm1, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 + + punpcklbw xmm0, xmm1 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 + + movdqa xmm2, xmm0 + pshufb xmm0, xmm3 + + pshufb xmm2, xmm4 + pmaddubsw xmm0, xmm5 + + lea rdi, [rdi + rdx] + pmaddubsw xmm2, xmm6 + + lea rsi, [rsi + rax] + dec rcx + + paddsw xmm0, xmm7 + + paddsw xmm0, xmm2 + + psraw xmm0, 7 + + packuswb xmm0, xmm0 + + movq MMWORD Ptr [rdi], xmm0 + + jnz .filter_block1d8_h4_rowloop_ssse3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret +;void vp8_filter_block1d16_h6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE +sym(vp8_filter_block1d16_h6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 ; + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + + mov rdi, arg(2) ;output_ptr + + mov rsi, arg(0) ;src_ptr + + movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 + + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rdx, dword ptr arg(3) ;output_pitch + +.filter_block1d16_h6_rowloop_ssse3: + movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 + + movq xmm3, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 + + punpcklbw xmm0, xmm3 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 + + movdqa xmm1, xmm0 + pmaddubsw xmm0, xmm4 + + movdqa xmm2, xmm1 + pshufb xmm1, [GLOBAL(shuf2bfrom1)] + + pshufb xmm2, [GLOBAL(shuf3bfrom1)] + movq xmm3, MMWORD PTR [rsi + 6] + + pmaddubsw xmm1, xmm5 + movq xmm7, MMWORD PTR [rsi + 11] + + pmaddubsw xmm2, xmm6 + punpcklbw xmm3, xmm7 + + paddsw xmm0, xmm1 + movdqa xmm1, xmm3 + + pmaddubsw xmm3, xmm4 + paddsw xmm0, xmm2 + + movdqa xmm2, xmm1 + paddsw xmm0, [GLOBAL(rd)] + + pshufb xmm1, [GLOBAL(shuf2bfrom1)] + pshufb xmm2, [GLOBAL(shuf3bfrom1)] + + psraw xmm0, 7 + pmaddubsw xmm1, xmm5 + + pmaddubsw xmm2, xmm6 + packuswb xmm0, xmm0 + + lea rsi, [rsi + rax] + paddsw xmm3, xmm1 + + paddsw xmm3, xmm2 + + paddsw xmm3, [GLOBAL(rd)] + + psraw xmm3, 7 + + packuswb xmm3, xmm3 + + punpcklqdq xmm0, xmm3 + + movdqa XMMWORD Ptr [rdi], xmm0 + + lea rdi, [rdi + rdx] + dec rcx + jnz .filter_block1d16_h6_rowloop_ssse3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_filter_block1d4_h6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE +sym(vp8_filter_block1d4_h6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 ; + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + movdqa xmm7, [GLOBAL(rd)] + + cmp esi, DWORD PTR [rax] + je .vp8_filter_block1d4_h4_ssse3 + + movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rcx, dword ptr arg(4) ;output_height + + movsxd rdx, dword ptr arg(3) ;output_pitch + +;xmm3 free +.filter_block1d4_h6_rowloop_ssse3: + movdqu xmm0, XMMWORD PTR [rsi - 2] + + movdqa xmm1, xmm0 + pshufb xmm0, [GLOBAL(shuf1b)] + + movdqa xmm2, xmm1 + pshufb xmm1, [GLOBAL(shuf2b)] + pmaddubsw xmm0, xmm4 + pshufb xmm2, [GLOBAL(shuf3b)] + pmaddubsw xmm1, xmm5 + +;-- + pmaddubsw xmm2, xmm6 + + lea rsi, [rsi + rax] +;-- + paddsw xmm0, xmm1 + paddsw xmm0, xmm7 + pxor xmm1, xmm1 + paddsw xmm0, xmm2 + psraw xmm0, 7 + packuswb xmm0, xmm0 + + movd DWORD PTR [rdi], xmm0 + + add rdi, rdx + dec rcx + jnz .filter_block1d4_h6_rowloop_ssse3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +.vp8_filter_block1d4_h4_ssse3: + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 + movdqa xmm0, XMMWORD PTR [GLOBAL(shuf2b)] + movdqa xmm3, XMMWORD PTR [GLOBAL(shuf3b)] + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rcx, dword ptr arg(4) ;output_height + + movsxd rdx, dword ptr arg(3) ;output_pitch + +.filter_block1d4_h4_rowloop_ssse3: + movdqu xmm1, XMMWORD PTR [rsi - 2] + + movdqa xmm2, xmm1 + pshufb xmm1, xmm0 ;;[GLOBAL(shuf2b)] + pshufb xmm2, xmm3 ;;[GLOBAL(shuf3b)] + pmaddubsw xmm1, xmm5 + +;-- + pmaddubsw xmm2, xmm6 + + lea rsi, [rsi + rax] +;-- + paddsw xmm1, xmm7 + paddsw xmm1, xmm2 + psraw xmm1, 7 + packuswb xmm1, xmm1 + + movd DWORD PTR [rdi], xmm1 + + add rdi, rdx + dec rcx + jnz .filter_block1d4_h4_rowloop_ssse3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + + +;void vp8_filter_block1d16_v6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE +sym(vp8_filter_block1d16_v6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 ; + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + + cmp esi, DWORD PTR [rax] + je .vp8_filter_block1d16_v4_ssse3 + + movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 + movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + movsxd rdx, DWORD PTR arg(1) ;pixels_per_line + mov rdi, arg(2) ;output_ptr + +%if ABI_IS_32BIT=0 + movsxd r8, DWORD PTR arg(3) ;out_pitch +%endif + mov rax, rsi + movsxd rcx, DWORD PTR arg(4) ;output_height + add rax, rdx + + +.vp8_filter_block1d16_v6_ssse3_loop: + movq xmm1, MMWORD PTR [rsi] ;A + movq xmm2, MMWORD PTR [rsi + rdx] ;B + movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E + + punpcklbw xmm2, xmm4 ;B D + punpcklbw xmm3, xmm0 ;C E + + movq xmm0, MMWORD PTR [rax + rdx * 4] ;F + + pmaddubsw xmm3, xmm6 + punpcklbw xmm1, xmm0 ;A F + pmaddubsw xmm2, xmm7 + pmaddubsw xmm1, xmm5 + + paddsw xmm2, xmm3 + paddsw xmm2, xmm1 + paddsw xmm2, [GLOBAL(rd)] + psraw xmm2, 7 + packuswb xmm2, xmm2 + + movq MMWORD PTR [rdi], xmm2 ;store the results + + movq xmm1, MMWORD PTR [rsi + 8] ;A + movq xmm2, MMWORD PTR [rsi + rdx + 8] ;B + movq xmm3, MMWORD PTR [rsi + rdx * 2 + 8] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E + + punpcklbw xmm2, xmm4 ;B D + punpcklbw xmm3, xmm0 ;C E + + movq xmm0, MMWORD PTR [rax + rdx * 4 + 8] ;F + pmaddubsw xmm3, xmm6 + punpcklbw xmm1, xmm0 ;A F + pmaddubsw xmm2, xmm7 + pmaddubsw xmm1, xmm5 + + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw xmm2, xmm3 + paddsw xmm2, xmm1 + paddsw xmm2, [GLOBAL(rd)] + psraw xmm2, 7 + packuswb xmm2, xmm2 + + movq MMWORD PTR [rdi+8], xmm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;out_pitch +%else + add rdi, r8 +%endif + dec rcx + jnz .vp8_filter_block1d16_v6_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +.vp8_filter_block1d16_v4_ssse3: + movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + movsxd rdx, DWORD PTR arg(1) ;pixels_per_line + mov rdi, arg(2) ;output_ptr + +%if ABI_IS_32BIT=0 + movsxd r8, DWORD PTR arg(3) ;out_pitch +%endif + mov rax, rsi + movsxd rcx, DWORD PTR arg(4) ;output_height + add rax, rdx + +.vp8_filter_block1d16_v4_ssse3_loop: + movq xmm2, MMWORD PTR [rsi + rdx] ;B + movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E + + punpcklbw xmm2, xmm4 ;B D + punpcklbw xmm3, xmm0 ;C E + + pmaddubsw xmm3, xmm6 + pmaddubsw xmm2, xmm7 + movq xmm5, MMWORD PTR [rsi + rdx + 8] ;B + movq xmm1, MMWORD PTR [rsi + rdx * 2 + 8] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E + + paddsw xmm2, [GLOBAL(rd)] + paddsw xmm2, xmm3 + psraw xmm2, 7 + packuswb xmm2, xmm2 + + punpcklbw xmm5, xmm4 ;B D + punpcklbw xmm1, xmm0 ;C E + + pmaddubsw xmm1, xmm6 + pmaddubsw xmm5, xmm7 + + movdqa xmm4, [GLOBAL(rd)] + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw xmm5, xmm1 + paddsw xmm5, xmm4 + psraw xmm5, 7 + packuswb xmm5, xmm5 + + punpcklqdq xmm2, xmm5 + + movdqa XMMWORD PTR [rdi], xmm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;out_pitch +%else + add rdi, r8 +%endif + dec rcx + jnz .vp8_filter_block1d16_v4_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_filter_block1d8_v6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE +sym(vp8_filter_block1d8_v6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 ; + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + + movsxd rdx, DWORD PTR arg(1) ;pixels_per_line + mov rdi, arg(2) ;output_ptr +%if ABI_IS_32BIT=0 + movsxd r8, DWORD PTR arg(3) ; out_pitch +%endif + movsxd rcx, DWORD PTR arg(4) ;[output_height] + + cmp esi, DWORD PTR [rax] + je .vp8_filter_block1d8_v4_ssse3 + + movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 + movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + + mov rax, rsi + add rax, rdx + +.vp8_filter_block1d8_v6_ssse3_loop: + movq xmm1, MMWORD PTR [rsi] ;A + movq xmm2, MMWORD PTR [rsi + rdx] ;B + movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E + + punpcklbw xmm2, xmm4 ;B D + punpcklbw xmm3, xmm0 ;C E + + movq xmm0, MMWORD PTR [rax + rdx * 4] ;F + movdqa xmm4, [GLOBAL(rd)] + + pmaddubsw xmm3, xmm6 + punpcklbw xmm1, xmm0 ;A F + pmaddubsw xmm2, xmm7 + pmaddubsw xmm1, xmm5 + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw xmm2, xmm3 + paddsw xmm2, xmm1 + paddsw xmm2, xmm4 + psraw xmm2, 7 + packuswb xmm2, xmm2 + + movq MMWORD PTR [rdi], xmm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;[out_pitch] +%else + add rdi, r8 +%endif + dec rcx + jnz .vp8_filter_block1d8_v6_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +.vp8_filter_block1d8_v4_ssse3: + movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 + movdqa xmm5, [GLOBAL(rd)] + + mov rsi, arg(0) ;src_ptr + + mov rax, rsi + add rax, rdx + +.vp8_filter_block1d8_v4_ssse3_loop: + movq xmm2, MMWORD PTR [rsi + rdx] ;B + movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E + + punpcklbw xmm2, xmm4 ;B D + punpcklbw xmm3, xmm0 ;C E + + pmaddubsw xmm3, xmm6 + pmaddubsw xmm2, xmm7 + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw xmm2, xmm3 + paddsw xmm2, xmm5 + psraw xmm2, 7 + packuswb xmm2, xmm2 + + movq MMWORD PTR [rdi], xmm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;[out_pitch] +%else + add rdi, r8 +%endif + dec rcx + jnz .vp8_filter_block1d8_v4_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret +;void vp8_filter_block1d4_v6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE +sym(vp8_filter_block1d4_v6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 ; + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + + movsxd rdx, DWORD PTR arg(1) ;pixels_per_line + mov rdi, arg(2) ;output_ptr +%if ABI_IS_32BIT=0 + movsxd r8, DWORD PTR arg(3) ; out_pitch +%endif + movsxd rcx, DWORD PTR arg(4) ;[output_height] + + cmp esi, DWORD PTR [rax] + je .vp8_filter_block1d4_v4_ssse3 + + movq mm5, MMWORD PTR [rax] ;k0_k5 + movq mm6, MMWORD PTR [rax+256] ;k2_k4 + movq mm7, MMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + + mov rax, rsi + add rax, rdx + +.vp8_filter_block1d4_v6_ssse3_loop: + movd mm1, DWORD PTR [rsi] ;A + movd mm2, DWORD PTR [rsi + rdx] ;B + movd mm3, DWORD PTR [rsi + rdx * 2] ;C + movd mm4, DWORD PTR [rax + rdx * 2] ;D + movd mm0, DWORD PTR [rsi + rdx * 4] ;E + + punpcklbw mm2, mm4 ;B D + punpcklbw mm3, mm0 ;C E + + movd mm0, DWORD PTR [rax + rdx * 4] ;F + + movq mm4, [GLOBAL(rd)] + + pmaddubsw mm3, mm6 + punpcklbw mm1, mm0 ;A F + pmaddubsw mm2, mm7 + pmaddubsw mm1, mm5 + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw mm2, mm3 + paddsw mm2, mm1 + paddsw mm2, mm4 + psraw mm2, 7 + packuswb mm2, mm2 + + movd DWORD PTR [rdi], mm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;[out_pitch] +%else + add rdi, r8 +%endif + dec rcx + jnz .vp8_filter_block1d4_v6_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +.vp8_filter_block1d4_v4_ssse3: + movq mm6, MMWORD PTR [rax+256] ;k2_k4 + movq mm7, MMWORD PTR [rax+128] ;k1_k3 + movq mm5, MMWORD PTR [GLOBAL(rd)] + + mov rsi, arg(0) ;src_ptr + + mov rax, rsi + add rax, rdx + +.vp8_filter_block1d4_v4_ssse3_loop: + movd mm2, DWORD PTR [rsi + rdx] ;B + movd mm3, DWORD PTR [rsi + rdx * 2] ;C + movd mm4, DWORD PTR [rax + rdx * 2] ;D + movd mm0, DWORD PTR [rsi + rdx * 4] ;E + + punpcklbw mm2, mm4 ;B D + punpcklbw mm3, mm0 ;C E + + pmaddubsw mm3, mm6 + pmaddubsw mm2, mm7 + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw mm2, mm3 + paddsw mm2, mm5 + psraw mm2, 7 + packuswb mm2, mm2 + + movd DWORD PTR [rdi], mm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;[out_pitch] +%else + add rdi, r8 +%endif + dec rcx + jnz .vp8_filter_block1d4_v4_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_bilinear_predict16x16_ssse3 +;( +; unsigned char *src_ptr, +; int src_pixels_per_line, +; int xoffset, +; int yoffset, +; unsigned char *dst_ptr, +; int dst_pitch +;) +global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE +sym(vp8_bilinear_predict16x16_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] + movsxd rax, dword ptr arg(2) ; xoffset + + cmp rax, 0 ; skip first_pass filter if xoffset=0 + je .b16x16_sp_only + + shl rax, 4 + lea rax, [rax + rcx] ; HFilter + + mov rdi, arg(4) ; dst_ptr + mov rsi, arg(0) ; src_ptr + movsxd rdx, dword ptr arg(5) ; dst_pitch + + movdqa xmm1, [rax] + + movsxd rax, dword ptr arg(3) ; yoffset + + cmp rax, 0 ; skip second_pass filter if yoffset=0 + je .b16x16_fp_only + + shl rax, 4 + lea rax, [rax + rcx] ; VFilter + + lea rcx, [rdi+rdx*8] + lea rcx, [rcx+rdx*8] + movsxd rdx, dword ptr arg(1) ; src_pixels_per_line + + movdqa xmm2, [rax] + +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(5) ; dst_pitch +%endif + movq xmm3, [rsi] ; 00 01 02 03 04 05 06 07 + movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 + + punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 + movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 + + movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 + + lea rsi, [rsi + rdx] ; next line + + pmaddubsw xmm3, xmm1 ; 00 02 04 06 08 10 12 14 + + punpcklbw xmm4, xmm5 ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 + pmaddubsw xmm4, xmm1 ; 01 03 05 07 09 11 13 15 + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value + psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 + + movdqa xmm7, xmm3 + packuswb xmm7, xmm4 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 + +.next_row: + movq xmm6, [rsi] ; 00 01 02 03 04 05 06 07 + movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 + + punpcklbw xmm6, xmm5 + movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 + + movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 + lea rsi, [rsi + rdx] ; next line + + pmaddubsw xmm6, xmm1 + + punpcklbw xmm4, xmm5 + pmaddubsw xmm4, xmm1 + + paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value + psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 + + paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value + psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 + + packuswb xmm6, xmm4 + movdqa xmm5, xmm7 + + punpcklbw xmm5, xmm6 + pmaddubsw xmm5, xmm2 + + punpckhbw xmm7, xmm6 + pmaddubsw xmm7, xmm2 + + paddw xmm5, [GLOBAL(rd)] ; xmm5 += round value + psraw xmm5, VP8_FILTER_SHIFT ; xmm5 /= 128 + + paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value + psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 + + packuswb xmm5, xmm7 + movdqa xmm7, xmm6 + + movdqa [rdi], xmm5 ; store the results in the destination +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(5) ; dst_pitch +%else + add rdi, r8 +%endif + + cmp rdi, rcx + jne .next_row + + jmp .done + +.b16x16_sp_only: + movsxd rax, dword ptr arg(3) ; yoffset + shl rax, 4 + lea rax, [rax + rcx] ; VFilter + + mov rdi, arg(4) ; dst_ptr + mov rsi, arg(0) ; src_ptr + movsxd rdx, dword ptr arg(5) ; dst_pitch + + movdqa xmm1, [rax] ; VFilter + + lea rcx, [rdi+rdx*8] + lea rcx, [rcx+rdx*8] + movsxd rax, dword ptr arg(1) ; src_pixels_per_line + + ; get the first horizontal line done + movq xmm4, [rsi] ; load row 0 + movq xmm2, [rsi + 8] ; load row 0 + + lea rsi, [rsi + rax] ; next line +.next_row_sp: + movq xmm3, [rsi] ; load row + 1 + movq xmm5, [rsi + 8] ; load row + 1 + + punpcklbw xmm4, xmm3 + punpcklbw xmm2, xmm5 + + pmaddubsw xmm4, xmm1 + movq xmm7, [rsi + rax] ; load row + 2 + + pmaddubsw xmm2, xmm1 + movq xmm6, [rsi + rax + 8] ; load row + 2 + + punpcklbw xmm3, xmm7 + punpcklbw xmm5, xmm6 + + pmaddubsw xmm3, xmm1 + paddw xmm4, [GLOBAL(rd)] + + pmaddubsw xmm5, xmm1 + paddw xmm2, [GLOBAL(rd)] + + psraw xmm4, VP8_FILTER_SHIFT + psraw xmm2, VP8_FILTER_SHIFT + + packuswb xmm4, xmm2 + paddw xmm3, [GLOBAL(rd)] + + movdqa [rdi], xmm4 ; store row 0 + paddw xmm5, [GLOBAL(rd)] + + psraw xmm3, VP8_FILTER_SHIFT + psraw xmm5, VP8_FILTER_SHIFT + + packuswb xmm3, xmm5 + movdqa xmm4, xmm7 + + movdqa [rdi + rdx],xmm3 ; store row 1 + lea rsi, [rsi + 2*rax] + + movdqa xmm2, xmm6 + lea rdi, [rdi + 2*rdx] + + cmp rdi, rcx + jne .next_row_sp + + jmp .done + +.b16x16_fp_only: + lea rcx, [rdi+rdx*8] + lea rcx, [rcx+rdx*8] + movsxd rax, dword ptr arg(1) ; src_pixels_per_line + +.next_row_fp: + movq xmm2, [rsi] ; 00 01 02 03 04 05 06 07 + movq xmm4, [rsi+1] ; 01 02 03 04 05 06 07 08 + + punpcklbw xmm2, xmm4 + movq xmm3, [rsi+8] ; 08 09 10 11 12 13 14 15 + + pmaddubsw xmm2, xmm1 + movq xmm4, [rsi+9] ; 09 10 11 12 13 14 15 16 + + lea rsi, [rsi + rax] ; next line + punpcklbw xmm3, xmm4 + + pmaddubsw xmm3, xmm1 + movq xmm5, [rsi] + + paddw xmm2, [GLOBAL(rd)] + movq xmm7, [rsi+1] + + movq xmm6, [rsi+8] + psraw xmm2, VP8_FILTER_SHIFT + + punpcklbw xmm5, xmm7 + movq xmm7, [rsi+9] + + paddw xmm3, [GLOBAL(rd)] + pmaddubsw xmm5, xmm1 + + psraw xmm3, VP8_FILTER_SHIFT + punpcklbw xmm6, xmm7 + + packuswb xmm2, xmm3 + pmaddubsw xmm6, xmm1 + + movdqa [rdi], xmm2 ; store the results in the destination + paddw xmm5, [GLOBAL(rd)] + + lea rdi, [rdi + rdx] ; dst_pitch + psraw xmm5, VP8_FILTER_SHIFT + + paddw xmm6, [GLOBAL(rd)] + psraw xmm6, VP8_FILTER_SHIFT + + packuswb xmm5, xmm6 + lea rsi, [rsi + rax] ; next line + + movdqa [rdi], xmm5 ; store the results in the destination + lea rdi, [rdi + rdx] ; dst_pitch + + cmp rdi, rcx + + jne .next_row_fp + +.done: + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_bilinear_predict8x8_ssse3 +;( +; unsigned char *src_ptr, +; int src_pixels_per_line, +; int xoffset, +; int yoffset, +; unsigned char *dst_ptr, +; int dst_pitch +;) +global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE +sym(vp8_bilinear_predict8x8_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 144 ; reserve 144 bytes + + lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] + + mov rsi, arg(0) ;src_ptr + movsxd rdx, dword ptr arg(1) ;src_pixels_per_line + + ;Read 9-line unaligned data in and put them on stack. This gives a big + ;performance boost. + movdqu xmm0, [rsi] + lea rax, [rdx + rdx*2] + movdqu xmm1, [rsi+rdx] + movdqu xmm2, [rsi+rdx*2] + add rsi, rax + movdqu xmm3, [rsi] + movdqu xmm4, [rsi+rdx] + movdqu xmm5, [rsi+rdx*2] + add rsi, rax + movdqu xmm6, [rsi] + movdqu xmm7, [rsi+rdx] + + movdqa XMMWORD PTR [rsp], xmm0 + + movdqu xmm0, [rsi+rdx*2] + + movdqa XMMWORD PTR [rsp+16], xmm1 + movdqa XMMWORD PTR [rsp+32], xmm2 + movdqa XMMWORD PTR [rsp+48], xmm3 + movdqa XMMWORD PTR [rsp+64], xmm4 + movdqa XMMWORD PTR [rsp+80], xmm5 + movdqa XMMWORD PTR [rsp+96], xmm6 + movdqa XMMWORD PTR [rsp+112], xmm7 + movdqa XMMWORD PTR [rsp+128], xmm0 + + movsxd rax, dword ptr arg(2) ; xoffset + cmp rax, 0 ; skip first_pass filter if xoffset=0 + je .b8x8_sp_only + + shl rax, 4 + add rax, rcx ; HFilter + + mov rdi, arg(4) ; dst_ptr + movsxd rdx, dword ptr arg(5) ; dst_pitch + + movdqa xmm0, [rax] + + movsxd rax, dword ptr arg(3) ; yoffset + cmp rax, 0 ; skip second_pass filter if yoffset=0 + je .b8x8_fp_only + + shl rax, 4 + lea rax, [rax + rcx] ; VFilter + + lea rcx, [rdi+rdx*8] + + movdqa xmm1, [rax] + + ; get the first horizontal line done + movdqa xmm3, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 + movdqa xmm5, xmm3 ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 xx + + psrldq xmm5, 1 + lea rsp, [rsp + 16] ; next line + + punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 + pmaddubsw xmm3, xmm0 ; 00 02 04 06 08 10 12 14 + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + movdqa xmm7, xmm3 + packuswb xmm7, xmm7 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 + +.next_row: + movdqa xmm6, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 + lea rsp, [rsp + 16] ; next line + + movdqa xmm5, xmm6 + + psrldq xmm5, 1 + + punpcklbw xmm6, xmm5 + pmaddubsw xmm6, xmm0 + + paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value + psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 + + packuswb xmm6, xmm6 + + punpcklbw xmm7, xmm6 + pmaddubsw xmm7, xmm1 + + paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value + psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 + + packuswb xmm7, xmm7 + + movq [rdi], xmm7 ; store the results in the destination + lea rdi, [rdi + rdx] + + movdqa xmm7, xmm6 + + cmp rdi, rcx + jne .next_row + + jmp .done8x8 + +.b8x8_sp_only: + movsxd rax, dword ptr arg(3) ; yoffset + shl rax, 4 + lea rax, [rax + rcx] ; VFilter + + mov rdi, arg(4) ;dst_ptr + movsxd rdx, dword ptr arg(5) ; dst_pitch + + movdqa xmm0, [rax] ; VFilter + + movq xmm1, XMMWORD PTR [rsp] + movq xmm2, XMMWORD PTR [rsp+16] + + movq xmm3, XMMWORD PTR [rsp+32] + punpcklbw xmm1, xmm2 + + movq xmm4, XMMWORD PTR [rsp+48] + punpcklbw xmm2, xmm3 + + movq xmm5, XMMWORD PTR [rsp+64] + punpcklbw xmm3, xmm4 + + movq xmm6, XMMWORD PTR [rsp+80] + punpcklbw xmm4, xmm5 + + movq xmm7, XMMWORD PTR [rsp+96] + punpcklbw xmm5, xmm6 + + pmaddubsw xmm1, xmm0 + pmaddubsw xmm2, xmm0 + + pmaddubsw xmm3, xmm0 + pmaddubsw xmm4, xmm0 + + pmaddubsw xmm5, xmm0 + punpcklbw xmm6, xmm7 + + pmaddubsw xmm6, xmm0 + paddw xmm1, [GLOBAL(rd)] + + paddw xmm2, [GLOBAL(rd)] + psraw xmm1, VP8_FILTER_SHIFT + + paddw xmm3, [GLOBAL(rd)] + psraw xmm2, VP8_FILTER_SHIFT + + paddw xmm4, [GLOBAL(rd)] + psraw xmm3, VP8_FILTER_SHIFT + + paddw xmm5, [GLOBAL(rd)] + psraw xmm4, VP8_FILTER_SHIFT + + paddw xmm6, [GLOBAL(rd)] + psraw xmm5, VP8_FILTER_SHIFT + + psraw xmm6, VP8_FILTER_SHIFT + packuswb xmm1, xmm1 + + packuswb xmm2, xmm2 + movq [rdi], xmm1 + + packuswb xmm3, xmm3 + movq [rdi+rdx], xmm2 + + packuswb xmm4, xmm4 + movq xmm1, XMMWORD PTR [rsp+112] + + lea rdi, [rdi + 2*rdx] + movq xmm2, XMMWORD PTR [rsp+128] + + packuswb xmm5, xmm5 + movq [rdi], xmm3 + + packuswb xmm6, xmm6 + movq [rdi+rdx], xmm4 + + lea rdi, [rdi + 2*rdx] + punpcklbw xmm7, xmm1 + + movq [rdi], xmm5 + pmaddubsw xmm7, xmm0 + + movq [rdi+rdx], xmm6 + punpcklbw xmm1, xmm2 + + pmaddubsw xmm1, xmm0 + paddw xmm7, [GLOBAL(rd)] + + psraw xmm7, VP8_FILTER_SHIFT + paddw xmm1, [GLOBAL(rd)] + + psraw xmm1, VP8_FILTER_SHIFT + packuswb xmm7, xmm7 + + packuswb xmm1, xmm1 + lea rdi, [rdi + 2*rdx] + + movq [rdi], xmm7 + + movq [rdi+rdx], xmm1 + lea rsp, [rsp + 144] + + jmp .done8x8 + +.b8x8_fp_only: + lea rcx, [rdi+rdx*8] + +.next_row_fp: + movdqa xmm1, XMMWORD PTR [rsp] + movdqa xmm3, XMMWORD PTR [rsp+16] + + movdqa xmm2, xmm1 + movdqa xmm5, XMMWORD PTR [rsp+32] + + psrldq xmm2, 1 + movdqa xmm7, XMMWORD PTR [rsp+48] + + movdqa xmm4, xmm3 + psrldq xmm4, 1 + + movdqa xmm6, xmm5 + psrldq xmm6, 1 + + punpcklbw xmm1, xmm2 + pmaddubsw xmm1, xmm0 + + punpcklbw xmm3, xmm4 + pmaddubsw xmm3, xmm0 + + punpcklbw xmm5, xmm6 + pmaddubsw xmm5, xmm0 + + movdqa xmm2, xmm7 + psrldq xmm2, 1 + + punpcklbw xmm7, xmm2 + pmaddubsw xmm7, xmm0 + + paddw xmm1, [GLOBAL(rd)] + psraw xmm1, VP8_FILTER_SHIFT + + paddw xmm3, [GLOBAL(rd)] + psraw xmm3, VP8_FILTER_SHIFT + + paddw xmm5, [GLOBAL(rd)] + psraw xmm5, VP8_FILTER_SHIFT + + paddw xmm7, [GLOBAL(rd)] + psraw xmm7, VP8_FILTER_SHIFT + + packuswb xmm1, xmm1 + packuswb xmm3, xmm3 + + packuswb xmm5, xmm5 + movq [rdi], xmm1 + + packuswb xmm7, xmm7 + movq [rdi+rdx], xmm3 + + lea rdi, [rdi + 2*rdx] + movq [rdi], xmm5 + + lea rsp, [rsp + 4*16] + movq [rdi+rdx], xmm7 + + lea rdi, [rdi + 2*rdx] + cmp rdi, rcx + + jne .next_row_fp + + lea rsp, [rsp + 16] + +.done8x8: + ;add rsp, 144 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +shuf1b: + db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 +shuf2b: + db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11 +shuf3b: + db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10 + +align 16 +shuf2bfrom1: + db 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11, 9,13 +align 16 +shuf3bfrom1: + db 2, 6, 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11 + +align 16 +rd: + times 8 dw 0x40 + +align 16 +k0_k5: + times 8 db 0, 0 ;placeholder + times 8 db 0, 0 + times 8 db 2, 1 + times 8 db 0, 0 + times 8 db 3, 3 + times 8 db 0, 0 + times 8 db 1, 2 + times 8 db 0, 0 +k1_k3: + times 8 db 0, 0 ;placeholder + times 8 db -6, 12 + times 8 db -11, 36 + times 8 db -9, 50 + times 8 db -16, 77 + times 8 db -6, 93 + times 8 db -8, 108 + times 8 db -1, 123 +k2_k4: + times 8 db 128, 0 ;placeholder + times 8 db 123, -1 + times 8 db 108, -8 + times 8 db 93, -6 + times 8 db 77, -16 + times 8 db 50, -9 + times 8 db 36, -11 + times 8 db 12, -6 +align 16 +vp8_bilinear_filters_ssse3: + times 8 db 128, 0 + times 8 db 112, 16 + times 8 db 96, 32 + times 8 db 80, 48 + times 8 db 64, 64 + times 8 db 48, 80 + times 8 db 32, 96 + times 8 db 16, 112 + diff --git a/thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c b/thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c new file mode 100644 index 00000000000..fb0b57eb1c1 --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c @@ -0,0 +1,625 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#include "vpx_ports/mem.h" +#include "filter_x86.h" + +extern const short vp8_six_tap_mmx[8][6*8]; + +extern void vp8_filter_block1d_h6_mmx +( + unsigned char *src_ptr, + unsigned short *output_ptr, + unsigned int src_pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +); +extern void vp8_filter_block1dc_v6_mmx +( + unsigned short *src_ptr, + unsigned char *output_ptr, + int output_pitch, + unsigned int pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +); +extern void vp8_filter_block1d8_h6_sse2 +( + unsigned char *src_ptr, + unsigned short *output_ptr, + unsigned int src_pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +); +extern void vp8_filter_block1d16_h6_sse2 +( + unsigned char *src_ptr, + unsigned short *output_ptr, + unsigned int src_pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +); +extern void vp8_filter_block1d8_v6_sse2 +( + unsigned short *src_ptr, + unsigned char *output_ptr, + int dst_ptich, + unsigned int pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +); +extern void vp8_filter_block1d16_v6_sse2 +( + unsigned short *src_ptr, + unsigned char *output_ptr, + int dst_ptich, + unsigned int pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +); +extern void vp8_unpack_block1d16_h6_sse2 +( + unsigned char *src_ptr, + unsigned short *output_ptr, + unsigned int src_pixels_per_line, + unsigned int output_height, + unsigned int output_width +); +extern void vp8_filter_block1d8_h6_only_sse2 +( + unsigned char *src_ptr, + unsigned int src_pixels_per_line, + unsigned char *output_ptr, + int dst_ptich, + unsigned int output_height, + const short *vp8_filter +); +extern void vp8_filter_block1d16_h6_only_sse2 +( + unsigned char *src_ptr, + unsigned int src_pixels_per_line, + unsigned char *output_ptr, + int dst_ptich, + unsigned int output_height, + const short *vp8_filter +); +extern void vp8_filter_block1d8_v6_only_sse2 +( + unsigned char *src_ptr, + unsigned int src_pixels_per_line, + unsigned char *output_ptr, + int dst_ptich, + unsigned int output_height, + const short *vp8_filter +); + + +#if HAVE_MMX +void vp8_sixtap_predict4x4_mmx +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + DECLARE_ALIGNED(16, unsigned short, FData2[16*16]); /* Temp data bufffer used in filtering */ + const short *HFilter, *VFilter; + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter); + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1dc_v6_mmx(FData2 + 8, dst_ptr, dst_pitch, 8, 4 , 4, 4, VFilter); + +} + + +void vp8_sixtap_predict16x16_mmx +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + + DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */ + + const short *HFilter, *VFilter; + + + HFilter = vp8_six_tap_mmx[xoffset]; + + vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter); + vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 21, 32, HFilter); + vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8, FData2 + 8, src_pixels_per_line, 1, 21, 32, HFilter); + vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12, FData2 + 12, src_pixels_per_line, 1, 21, 32, HFilter); + + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1dc_v6_mmx(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, 16, VFilter); + vp8_filter_block1dc_v6_mmx(FData2 + 36, dst_ptr + 4, dst_pitch, 32, 16 , 16, 16, VFilter); + vp8_filter_block1dc_v6_mmx(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter); + vp8_filter_block1dc_v6_mmx(FData2 + 44, dst_ptr + 12, dst_pitch, 32, 16 , 16, 16, VFilter); + +} + + +void vp8_sixtap_predict8x8_mmx +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + + DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ + + const short *HFilter, *VFilter; + + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter); + vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 13, 16, HFilter); + + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, 8, VFilter); + vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 8, 8, VFilter); + +} + + +void vp8_sixtap_predict8x4_mmx +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + + DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ + + const short *HFilter, *VFilter; + + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter); + vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 9, 16, HFilter); + + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, 8, VFilter); + vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 4, 8, VFilter); + +} + + + +void vp8_bilinear_predict16x16_mmx +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + vp8_bilinear_predict8x8_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pitch); + vp8_bilinear_predict8x8_mmx(src_ptr + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + 8, dst_pitch); + vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8, dst_pitch); + vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8 + 8, dst_pitch); +} +#endif + + +#if HAVE_SSE2 +void vp8_sixtap_predict16x16_sse2 +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch + +) +{ + DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */ + + const short *HFilter, *VFilter; + + if (xoffset) + { + if (yoffset) + { + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter); + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter); + } + else + { + /* First-pass only */ + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter); + } + } + else + { + /* Second-pass only */ + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32); + vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter); + } +} + + +void vp8_sixtap_predict8x8_sse2 +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ + const short *HFilter, *VFilter; + + if (xoffset) + { + if (yoffset) + { + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter); + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter); + } + else + { + /* First-pass only */ + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter); + } + } + else + { + /* Second-pass only */ + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter); + } +} + + +void vp8_sixtap_predict8x4_sse2 +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ + const short *HFilter, *VFilter; + + if (xoffset) + { + if (yoffset) + { + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter); + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter); + } + else + { + /* First-pass only */ + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter); + } + } + else + { + /* Second-pass only */ + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter); + } +} + +#endif + +#if HAVE_SSSE3 + +extern void vp8_filter_block1d8_h6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pixels_per_line, + unsigned char *output_ptr, + unsigned int output_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +extern void vp8_filter_block1d16_h6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pixels_per_line, + unsigned char *output_ptr, + unsigned int output_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +extern void vp8_filter_block1d16_v6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +extern void vp8_filter_block1d8_v6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +extern void vp8_filter_block1d4_h6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pixels_per_line, + unsigned char *output_ptr, + unsigned int output_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +extern void vp8_filter_block1d4_v6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +void vp8_sixtap_predict16x16_ssse3 +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch + +) +{ + DECLARE_ALIGNED(16, unsigned char, FData2[24*24]); + + if (xoffset) + { + if (yoffset) + { + vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, FData2, + 16, 21, xoffset); + vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, + 16, yoffset); + } + else + { + /* First-pass only */ + vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 16, xoffset); + } + } + else + { + if (yoffset) + { + /* Second-pass only */ + vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 16, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); + } + } +} + +void vp8_sixtap_predict8x8_ssse3 +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + DECLARE_ALIGNED(16, unsigned char, FData2[256]); + + if (xoffset) + { + if (yoffset) + { + vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, FData2, + 8, 13, xoffset); + vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, + 8, yoffset); + } + else + { + vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 8, xoffset); + } + } + else + { + if (yoffset) + { + /* Second-pass only */ + vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 8, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); + } + } +} + + +void vp8_sixtap_predict8x4_ssse3 +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + DECLARE_ALIGNED(16, unsigned char, FData2[256]); + + if (xoffset) + { + if (yoffset) + { + vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, FData2, + 8, 9, xoffset); + vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, + 4, yoffset); + } + else + { + /* First-pass only */ + vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 4, xoffset); + } + } + else + { + if (yoffset) + { + /* Second-pass only */ + vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 4, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); + } + } +} + +void vp8_sixtap_predict4x4_ssse3 +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + DECLARE_ALIGNED(16, unsigned char, FData2[4*9]); + + if (xoffset) + { + if (yoffset) + { + vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + FData2, 4, 9, xoffset); + vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, + 4, yoffset); + } + else + { + vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, + dst_ptr, dst_pitch, 4, xoffset); + } + } + else + { + if (yoffset) + { + vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), + src_pixels_per_line, + dst_ptr, dst_pitch, 4, yoffset); + } + else + { + /* ssse3 second-pass only function couldn't handle (xoffset==0 && + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ + int r; + + for (r = 0; r < 4; r++) + { + dst_ptr[0] = src_ptr[0]; + dst_ptr[1] = src_ptr[1]; + dst_ptr[2] = src_ptr[2]; + dst_ptr[3] = src_ptr[3]; + dst_ptr += dst_pitch; + src_ptr += src_pixels_per_line; + } + } + } +} + +#endif diff --git a/thirdparty/libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm b/thirdparty/libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm new file mode 100644 index 00000000000..88a07b9f3fa --- /dev/null +++ b/thirdparty/libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm @@ -0,0 +1,1753 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + + +;void vp8_loop_filter_horizontal_edge_mmx +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +; int count +;) +global sym(vp8_loop_filter_horizontal_edge_mmx) PRIVATE +sym(vp8_loop_filter_horizontal_edge_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 32 ; reserve 32 bytes + %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; + %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? + + movsxd rcx, dword ptr arg(5) ;count +.next8_h: + mov rdx, arg(3) ;limit + movq mm7, [rdx] + mov rdi, rsi ; rdi points to row +1 for indirect addressing + add rdi, rax + + ; calculate breakout conditions + movq mm2, [rdi+2*rax] ; q3 + movq mm1, [rsi+2*rax] ; q2 + movq mm6, mm1 ; q2 + psubusb mm1, mm2 ; q2-=q3 + psubusb mm2, mm6 ; q3-=q2 + por mm1, mm2 ; abs(q3-q2) + psubusb mm1, mm7 ; + + + movq mm4, [rsi+rax] ; q1 + movq mm3, mm4 ; q1 + psubusb mm4, mm6 ; q1-=q2 + psubusb mm6, mm3 ; q2-=q1 + por mm4, mm6 ; abs(q2-q1) + + psubusb mm4, mm7 + por mm1, mm4 + + movq mm4, [rsi] ; q0 + movq mm0, mm4 ; q0 + psubusb mm4, mm3 ; q0-=q1 + psubusb mm3, mm0 ; q1-=q0 + por mm4, mm3 ; abs(q0-q1) + movq t0, mm4 ; save to t0 + psubusb mm4, mm7 + por mm1, mm4 + + + neg rax ; negate pitch to deal with above border + + movq mm2, [rsi+4*rax] ; p3 + movq mm4, [rdi+4*rax] ; p2 + movq mm5, mm4 ; p2 + psubusb mm4, mm2 ; p2-=p3 + psubusb mm2, mm5 ; p3-=p2 + por mm4, mm2 ; abs(p3 - p2) + psubusb mm4, mm7 + por mm1, mm4 + + + movq mm4, [rsi+2*rax] ; p1 + movq mm3, mm4 ; p1 + psubusb mm4, mm5 ; p1-=p2 + psubusb mm5, mm3 ; p2-=p1 + por mm4, mm5 ; abs(p2 - p1) + psubusb mm4, mm7 + por mm1, mm4 + + movq mm2, mm3 ; p1 + + movq mm4, [rsi+rax] ; p0 + movq mm5, mm4 ; p0 + psubusb mm4, mm3 ; p0-=p1 + psubusb mm3, mm5 ; p1-=p0 + por mm4, mm3 ; abs(p1 - p0) + movq t1, mm4 ; save to t1 + psubusb mm4, mm7 + por mm1, mm4 + + movq mm3, [rdi] ; q1 + movq mm4, mm3 ; q1 + psubusb mm3, mm2 ; q1-=p1 + psubusb mm2, mm4 ; p1-=q1 + por mm2, mm3 ; abs(p1-q1) + pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero + psrlw mm2, 1 ; abs(p1-q1)/2 + + movq mm6, mm5 ; p0 + movq mm3, [rsi] ; q0 + psubusb mm5, mm3 ; p0-=q0 + psubusb mm3, mm6 ; q0-=p0 + por mm5, mm3 ; abs(p0 - q0) + paddusb mm5, mm5 ; abs(p0-q0)*2 + paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + + mov rdx, arg(2) ;blimit ; get blimit + movq mm7, [rdx] ; blimit + + psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit + por mm1, mm5 + pxor mm5, mm5 + pcmpeqb mm1, mm5 ; mask mm1 + + ; calculate high edge variance + mov rdx, arg(4) ;thresh ; get thresh + movq mm7, [rdx] ; + movq mm4, t0 ; get abs (q1 - q0) + psubusb mm4, mm7 + movq mm3, t1 ; get abs (p1 - p0) + psubusb mm3, mm7 + paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh + + pcmpeqb mm4, mm5 + + pcmpeqb mm5, mm5 + pxor mm4, mm5 + + + ; start work on filters + movq mm2, [rsi+2*rax] ; p1 + movq mm7, [rdi] ; q1 + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values + psubsb mm2, mm7 ; p1 - q1 + pand mm2, mm4 ; high var mask (hvm)(p1 - q1) + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values + movq mm3, mm0 ; q0 + psubsb mm0, mm6 ; q0 - p0 + paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) + paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1) + paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) + pand mm1, mm2 ; mask filter values we don't care about + movq mm2, mm1 + paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 + paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 + + pxor mm0, mm0 ; + pxor mm5, mm5 + punpcklbw mm0, mm2 ; + punpckhbw mm5, mm2 ; + psraw mm0, 11 ; + psraw mm5, 11 + packsswb mm0, mm5 + movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; + + pxor mm0, mm0 ; 0 + movq mm5, mm1 ; abcdefgh + punpcklbw mm0, mm1 ; e0f0g0h0 + psraw mm0, 11 ; sign extended shift right by 3 + pxor mm1, mm1 ; 0 + punpckhbw mm1, mm5 ; a0b0c0d0 + psraw mm1, 11 ; sign extended shift right by 3 + movq mm5, mm0 ; save results + + packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 + paddsw mm5, [GLOBAL(ones)] + paddsw mm1, [GLOBAL(ones)] + psraw mm5, 1 ; partial shifted one more time for 2nd tap + psraw mm1, 1 ; partial shifted one more time for 2nd tap + packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 + pandn mm4, mm5 ; high edge variance additive + + paddsb mm6, mm2 ; p0+= p0 add + pxor mm6, [GLOBAL(t80)] ; unoffset + movq [rsi+rax], mm6 ; write back + + movq mm6, [rsi+2*rax] ; p1 + pxor mm6, [GLOBAL(t80)] ; reoffset + paddsb mm6, mm4 ; p1+= p1 add + pxor mm6, [GLOBAL(t80)] ; unoffset + movq [rsi+2*rax], mm6 ; write back + + psubsb mm3, mm0 ; q0-= q0 add + pxor mm3, [GLOBAL(t80)] ; unoffset + movq [rsi], mm3 ; write back + + psubsb mm7, mm4 ; q1-= q1 add + pxor mm7, [GLOBAL(t80)] ; unoffset + movq [rdi], mm7 ; write back + + add rsi,8 + neg rax + dec rcx + jnz .next8_h + + add rsp, 32 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_loop_filter_vertical_edge_mmx +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +; int count +;) +global sym(vp8_loop_filter_vertical_edge_mmx) PRIVATE +sym(vp8_loop_filter_vertical_edge_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 64 ; reserve 64 bytes + %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; + %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; + %define srct [rsp + 32] ;__declspec(align(16)) char srct[32]; + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? + + lea rsi, [rsi + rax*4 - 4] + + movsxd rcx, dword ptr arg(5) ;count +.next8_v: + mov rdi, rsi ; rdi points to row +1 for indirect addressing + add rdi, rax + + + ;transpose + movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60 + movq mm7, mm6 ; 77 76 75 74 73 72 71 70 + + punpckhbw mm7, [rdi+2*rax] ; 77 67 76 66 75 65 74 64 + punpcklbw mm6, [rdi+2*rax] ; 73 63 72 62 71 61 70 60 + + movq mm4, [rsi] ; 47 46 45 44 43 42 41 40 + movq mm5, mm4 ; 47 46 45 44 43 42 41 40 + + punpckhbw mm5, [rsi+rax] ; 57 47 56 46 55 45 54 44 + punpcklbw mm4, [rsi+rax] ; 53 43 52 42 51 41 50 40 + + movq mm3, mm5 ; 57 47 56 46 55 45 54 44 + punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46 + + punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44 + movq mm2, mm4 ; 53 43 52 42 51 41 50 40 + + punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42 + punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40 + + neg rax + movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20 + + movq mm1, mm6 ; 27 26 25 24 23 22 21 20 + punpckhbw mm6, [rsi+rax] ; 37 27 36 36 35 25 34 24 + + punpcklbw mm1, [rsi+rax] ; 33 23 32 22 31 21 30 20 + movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00 + + punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04 + movq mm0, mm7 ; 17 07 16 06 15 05 14 04 + + punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06 + punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04 + + movq mm6, mm7 ; 37 27 17 07 36 26 16 06 + punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3 + + punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2 + + movq mm5, mm6 ; 76 66 56 46 36 26 16 06 + psubusb mm5, mm7 ; q2-q3 + + psubusb mm7, mm6 ; q3-q2 + por mm7, mm5; ; mm7=abs (q3-q2) + + movq mm5, mm0 ; 35 25 15 05 34 24 14 04 + punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1 + + punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0 + movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1 + + psubusb mm3, mm6 ; q1-q2 + psubusb mm6, mm5 ; q2-q1 + + por mm6, mm3 ; mm6=abs(q2-q1) + lea rdx, srct + + movq [rdx+24], mm5 ; save q1 + movq [rdx+16], mm0 ; save q0 + + movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00 + punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00 + + movq mm0, mm3 ; 13 03 12 02 11 01 10 00 + punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00 + + punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02 + movq mm1, mm0 ; 31 21 11 01 30 20 10 00 + + punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3 + punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2 + + movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2 + psubusb mm2, mm0 ; p2-p3 + + psubusb mm0, mm1 ; p3-p2 + por mm0, mm2 ; mm0=abs(p3-p2) + + movq mm2, mm3 ; 33 23 13 03 32 22 12 02 + punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1 + + punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0 + movq [rdx+8], mm3 ; save p0 + + movq [rdx], mm2 ; save p1 + movq mm5, mm2 ; mm5 = p1 + + psubusb mm2, mm1 ; p1-p2 + psubusb mm1, mm5 ; p2-p1 + + por mm1, mm2 ; mm1=abs(p2-p1) + mov rdx, arg(3) ;limit + + movq mm4, [rdx] ; mm4 = limit + psubusb mm7, mm4 + + psubusb mm0, mm4 + psubusb mm1, mm4 + + psubusb mm6, mm4 + por mm7, mm6 + + por mm0, mm1 + por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit + + movq mm1, mm5 ; p1 + + movq mm7, mm3 ; mm3=mm7=p0 + psubusb mm7, mm5 ; p0 - p1 + + psubusb mm5, mm3 ; p1 - p0 + por mm5, mm7 ; abs(p1-p0) + + movq t0, mm5 ; save abs(p1-p0) + lea rdx, srct + + psubusb mm5, mm4 + por mm0, mm5 ; mm0=mask + + movq mm5, [rdx+16] ; mm5=q0 + movq mm7, [rdx+24] ; mm7=q1 + + movq mm6, mm5 ; mm6=q0 + movq mm2, mm7 ; q1 + psubusb mm5, mm7 ; q0-q1 + + psubusb mm7, mm6 ; q1-q0 + por mm7, mm5 ; abs(q1-q0) + + movq t1, mm7 ; save abs(q1-q0) + psubusb mm7, mm4 + + por mm0, mm7 ; mask + + movq mm5, mm2 ; q1 + psubusb mm5, mm1 ; q1-=p1 + psubusb mm1, mm2 ; p1-=q1 + por mm5, mm1 ; abs(p1-q1) + pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero + psrlw mm5, 1 ; abs(p1-q1)/2 + + mov rdx, arg(2) ;blimit ; + + movq mm4, [rdx] ;blimit + movq mm1, mm3 ; mm1=mm3=p0 + + movq mm7, mm6 ; mm7=mm6=q0 + psubusb mm1, mm7 ; p0-q0 + + psubusb mm7, mm3 ; q0-p0 + por mm1, mm7 ; abs(q0-p0) + paddusb mm1, mm1 ; abs(q0-p0)*2 + paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + + psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit + por mm1, mm0; ; mask + + pxor mm0, mm0 + pcmpeqb mm1, mm0 + + ; calculate high edge variance + mov rdx, arg(4) ;thresh ; get thresh + movq mm7, [rdx] + ; + movq mm4, t0 ; get abs (q1 - q0) + psubusb mm4, mm7 + + movq mm3, t1 ; get abs (p1 - p0) + psubusb mm3, mm7 + + por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh + pcmpeqb mm4, mm0 + + pcmpeqb mm0, mm0 + pxor mm4, mm0 + + + + ; start work on filters + lea rdx, srct + + movq mm2, [rdx] ; p1 + movq mm7, [rdx+24] ; q1 + + movq mm6, [rdx+8] ; p0 + movq mm0, [rdx+16] ; q0 + + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values + + psubsb mm2, mm7 ; p1 - q1 + pand mm2, mm4 ; high var mask (hvm)(p1 - q1) + + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values + + movq mm3, mm0 ; q0 + psubsb mm0, mm6 ; q0 - p0 + + paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) + paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1) + + paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) + pand mm1, mm2 ; mask filter values we don't care about + + movq mm2, mm1 + paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 + + paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 + pxor mm0, mm0 ; + + pxor mm5, mm5 + punpcklbw mm0, mm2 ; + + punpckhbw mm5, mm2 ; + psraw mm0, 11 ; + + psraw mm5, 11 + packsswb mm0, mm5 + + movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; + + pxor mm0, mm0 ; 0 + movq mm5, mm1 ; abcdefgh + + punpcklbw mm0, mm1 ; e0f0g0h0 + psraw mm0, 11 ; sign extended shift right by 3 + + pxor mm1, mm1 ; 0 + punpckhbw mm1, mm5 ; a0b0c0d0 + + psraw mm1, 11 ; sign extended shift right by 3 + movq mm5, mm0 ; save results + + packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 + paddsw mm5, [GLOBAL(ones)] + + paddsw mm1, [GLOBAL(ones)] + psraw mm5, 1 ; partial shifted one more time for 2nd tap + + psraw mm1, 1 ; partial shifted one more time for 2nd tap + packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 + + pandn mm4, mm5 ; high edge variance additive + + paddsb mm6, mm2 ; p0+= p0 add + pxor mm6, [GLOBAL(t80)] ; unoffset + + ; mm6=p0 ; + movq mm1, [rdx] ; p1 + pxor mm1, [GLOBAL(t80)] ; reoffset + + paddsb mm1, mm4 ; p1+= p1 add + pxor mm1, [GLOBAL(t80)] ; unoffset + ; mm6 = p0 mm1 = p1 + + psubsb mm3, mm0 ; q0-= q0 add + pxor mm3, [GLOBAL(t80)] ; unoffset + + ; mm3 = q0 + psubsb mm7, mm4 ; q1-= q1 add + pxor mm7, [GLOBAL(t80)] ; unoffset + ; mm7 = q1 + + ; transpose and write back + ; mm1 = 72 62 52 42 32 22 12 02 + ; mm6 = 73 63 53 43 33 23 13 03 + ; mm3 = 74 64 54 44 34 24 14 04 + ; mm7 = 75 65 55 45 35 25 15 05 + + movq mm2, mm1 ; 72 62 52 42 32 22 12 02 + punpcklbw mm2, mm6 ; 33 32 23 22 13 12 03 02 + + movq mm4, mm3 ; 74 64 54 44 34 24 14 04 + punpckhbw mm1, mm6 ; 73 72 63 62 53 52 43 42 + + punpcklbw mm4, mm7 ; 35 34 25 24 15 14 05 04 + punpckhbw mm3, mm7 ; 75 74 65 64 55 54 45 44 + + movq mm6, mm2 ; 33 32 23 22 13 12 03 02 + punpcklwd mm2, mm4 ; 15 14 13 12 05 04 03 02 + + punpckhwd mm6, mm4 ; 35 34 33 32 25 24 23 22 + movq mm5, mm1 ; 73 72 63 62 53 52 43 42 + + punpcklwd mm1, mm3 ; 55 54 53 52 45 44 43 42 + punpckhwd mm5, mm3 ; 75 74 73 72 65 64 63 62 + + + ; mm2 = 15 14 13 12 05 04 03 02 + ; mm6 = 35 34 33 32 25 24 23 22 + ; mm5 = 55 54 53 52 45 44 43 42 + ; mm1 = 75 74 73 72 65 64 63 62 + + + + movd [rsi+rax*4+2], mm2 + psrlq mm2, 32 + + movd [rdi+rax*4+2], mm2 + movd [rsi+rax*2+2], mm6 + + psrlq mm6, 32 + movd [rsi+rax+2],mm6 + + movd [rsi+2], mm1 + psrlq mm1, 32 + + movd [rdi+2], mm1 + neg rax + + movd [rdi+rax+2],mm5 + psrlq mm5, 32 + + movd [rdi+rax*2+2], mm5 + + lea rsi, [rsi+rax*8] + dec rcx + jnz .next8_v + + add rsp, 64 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_mbloop_filter_horizontal_edge_mmx +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +; int count +;) +global sym(vp8_mbloop_filter_horizontal_edge_mmx) PRIVATE +sym(vp8_mbloop_filter_horizontal_edge_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 32 ; reserve 32 bytes + %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; + %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? + + movsxd rcx, dword ptr arg(5) ;count +.next8_mbh: + mov rdx, arg(3) ;limit + movq mm7, [rdx] + mov rdi, rsi ; rdi points to row +1 for indirect addressing + add rdi, rax + + ; calculate breakout conditions + movq mm2, [rdi+2*rax] ; q3 + + movq mm1, [rsi+2*rax] ; q2 + movq mm6, mm1 ; q2 + psubusb mm1, mm2 ; q2-=q3 + psubusb mm2, mm6 ; q3-=q2 + por mm1, mm2 ; abs(q3-q2) + psubusb mm1, mm7 + + + ; mm1 = abs(q3-q2), mm6 =q2, mm7 = limit + movq mm4, [rsi+rax] ; q1 + movq mm3, mm4 ; q1 + psubusb mm4, mm6 ; q1-=q2 + psubusb mm6, mm3 ; q2-=q1 + por mm4, mm6 ; abs(q2-q1) + psubusb mm4, mm7 + por mm1, mm4 + + + ; mm1 = mask, mm3=q1, mm7 = limit + + movq mm4, [rsi] ; q0 + movq mm0, mm4 ; q0 + psubusb mm4, mm3 ; q0-=q1 + psubusb mm3, mm0 ; q1-=q0 + por mm4, mm3 ; abs(q0-q1) + movq t0, mm4 ; save to t0 + psubusb mm4, mm7 + por mm1, mm4 + + + ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) + + neg rax ; negate pitch to deal with above border + + movq mm2, [rsi+4*rax] ; p3 + movq mm4, [rdi+4*rax] ; p2 + movq mm5, mm4 ; p2 + psubusb mm4, mm2 ; p2-=p3 + psubusb mm2, mm5 ; p3-=p2 + por mm4, mm2 ; abs(p3 - p2) + psubusb mm4, mm7 + por mm1, mm4 + ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) + + movq mm4, [rsi+2*rax] ; p1 + movq mm3, mm4 ; p1 + psubusb mm4, mm5 ; p1-=p2 + psubusb mm5, mm3 ; p2-=p1 + por mm4, mm5 ; abs(p2 - p1) + psubusb mm4, mm7 + por mm1, mm4 + + movq mm2, mm3 ; p1 + + + ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) + + movq mm4, [rsi+rax] ; p0 + movq mm5, mm4 ; p0 + psubusb mm4, mm3 ; p0-=p1 + psubusb mm3, mm5 ; p1-=p0 + por mm4, mm3 ; abs(p1 - p0) + movq t1, mm4 ; save to t1 + psubusb mm4, mm7 + por mm1, mm4 + ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) t1 = abs(p1-p0) + ; mm5 = p0 + movq mm3, [rdi] ; q1 + movq mm4, mm3 ; q1 + psubusb mm3, mm2 ; q1-=p1 + psubusb mm2, mm4 ; p1-=q1 + por mm2, mm3 ; abs(p1-q1) + pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero + psrlw mm2, 1 ; abs(p1-q1)/2 + + movq mm6, mm5 ; p0 + movq mm3, mm0 ; q0 + psubusb mm5, mm3 ; p0-=q0 + psubusb mm3, mm6 ; q0-=p0 + por mm5, mm3 ; abs(p0 - q0) + paddusb mm5, mm5 ; abs(p0-q0)*2 + paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + + mov rdx, arg(2) ;blimit ; get blimit + movq mm7, [rdx] ; blimit + + psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit + por mm1, mm5 + pxor mm5, mm5 + pcmpeqb mm1, mm5 ; mask mm1 + + ; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0) + ; mm6 = p0, + + ; calculate high edge variance + mov rdx, arg(4) ;thresh ; get thresh + movq mm7, [rdx] ; + movq mm4, t0 ; get abs (q1 - q0) + psubusb mm4, mm7 + movq mm3, t1 ; get abs (p1 - p0) + psubusb mm3, mm7 + paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh + + pcmpeqb mm4, mm5 + + pcmpeqb mm5, mm5 + pxor mm4, mm5 + + + + ; mm1 = mask, mm0=q0, mm7 = thresh, t0 = abs(q0-q1) t1 = abs(p1-p0) + ; mm6 = p0, mm4=hev + ; start work on filters + movq mm2, [rsi+2*rax] ; p1 + movq mm7, [rdi] ; q1 + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values + psubsb mm2, mm7 ; p1 - q1 + + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values + movq mm3, mm0 ; q0 + psubsb mm0, mm6 ; q0 - p0 + paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1) + paddsb mm2, mm0 ; 2 * (q0 - p0) + paddsb mm2, mm0 ; 3 * (q0 - p0) + (p1 - q1) + pand mm1, mm2 ; mask filter values we don't care about + + + ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0 + movq mm2, mm1 ; vp8_filter + pand mm2, mm4; ; Filter2 = vp8_filter & hev + + movq mm5, mm2 ; + paddsb mm5, [GLOBAL(t3)]; + + pxor mm0, mm0 ; 0 + pxor mm7, mm7 ; 0 + + punpcklbw mm0, mm5 ; e0f0g0h0 + psraw mm0, 11 ; sign extended shift right by 3 + punpckhbw mm7, mm5 ; a0b0c0d0 + psraw mm7, 11 ; sign extended shift right by 3 + packsswb mm0, mm7 ; Filter2 >>=3; + + movq mm5, mm0 ; Filter2 + + paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) + pxor mm0, mm0 ; 0 + pxor mm7, mm7 ; 0 + + punpcklbw mm0, mm2 ; e0f0g0h0 + psraw mm0, 11 ; sign extended shift right by 3 + punpckhbw mm7, mm2 ; a0b0c0d0 + psraw mm7, 11 ; sign extended shift right by 3 + packsswb mm0, mm7 ; Filter2 >>=3; + + ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0 + psubsb mm3, mm0 ; qs0 =qs0 - filter1 + paddsb mm6, mm5 ; ps0 =ps0 + Fitler2 + + ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0 + ; vp8_filter &= ~hev; + ; Filter2 = vp8_filter; + pandn mm4, mm1 ; vp8_filter&=~hev + + + ; mm3=qs0, mm4=filter2, mm6=ps0 + + ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7); + ; s = vp8_signed_char_clamp(qs0 - u); + ; *oq0 = s^0x80; + ; s = vp8_signed_char_clamp(ps0 + u); + ; *op0 = s^0x80; + pxor mm0, mm0 + + pxor mm1, mm1 + pxor mm2, mm2 + punpcklbw mm1, mm4 + punpckhbw mm2, mm4 + pmulhw mm1, [GLOBAL(s27)] + pmulhw mm2, [GLOBAL(s27)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] + psraw mm1, 7 + psraw mm2, 7 + packsswb mm1, mm2 + + psubsb mm3, mm1 + paddsb mm6, mm1 + + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] + movq [rsi+rax], mm6 + movq [rsi], mm3 + + ; roughly 2/7th difference across boundary + ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7); + ; s = vp8_signed_char_clamp(qs1 - u); + ; *oq1 = s^0x80; + ; s = vp8_signed_char_clamp(ps1 + u); + ; *op1 = s^0x80; + pxor mm1, mm1 + pxor mm2, mm2 + punpcklbw mm1, mm4 + punpckhbw mm2, mm4 + pmulhw mm1, [GLOBAL(s18)] + pmulhw mm2, [GLOBAL(s18)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] + psraw mm1, 7 + psraw mm2, 7 + packsswb mm1, mm2 + + movq mm3, [rdi] + movq mm6, [rsi+rax*2] ; p1 + + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] + + paddsb mm6, mm1 + psubsb mm3, mm1 + + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] + movq [rdi], mm3 + movq [rsi+rax*2], mm6 + + ; roughly 1/7th difference across boundary + ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7); + ; s = vp8_signed_char_clamp(qs2 - u); + ; *oq2 = s^0x80; + ; s = vp8_signed_char_clamp(ps2 + u); + ; *op2 = s^0x80; + pxor mm1, mm1 + pxor mm2, mm2 + punpcklbw mm1, mm4 + punpckhbw mm2, mm4 + pmulhw mm1, [GLOBAL(s9)] + pmulhw mm2, [GLOBAL(s9)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] + psraw mm1, 7 + psraw mm2, 7 + packsswb mm1, mm2 + + + movq mm6, [rdi+rax*4] + neg rax + movq mm3, [rdi+rax ] + + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] + + paddsb mm6, mm1 + psubsb mm3, mm1 + + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] + movq [rdi+rax ], mm3 + neg rax + movq [rdi+rax*4], mm6 + +;EARLY_BREAK_OUT: + neg rax + add rsi,8 + dec rcx + jnz .next8_mbh + + add rsp, 32 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_mbloop_filter_vertical_edge_mmx +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit, +; const char *limit, +; const char *thresh, +; int count +;) +global sym(vp8_mbloop_filter_vertical_edge_mmx) PRIVATE +sym(vp8_mbloop_filter_vertical_edge_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 96 ; reserve 96 bytes + %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; + %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; + %define srct [rsp + 32] ;__declspec(align(16)) char srct[64]; + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? + + lea rsi, [rsi + rax*4 - 4] + + movsxd rcx, dword ptr arg(5) ;count +.next8_mbv: + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + + ;transpose + movq mm0, [rdi+2*rax] ; 77 76 75 74 73 72 71 70 + movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60 + + movq mm7, mm6 ; 77 76 75 74 73 72 71 70 + punpckhbw mm7, mm0 ; 77 67 76 66 75 65 74 64 + + punpcklbw mm6, mm0 ; 73 63 72 62 71 61 70 60 + movq mm0, [rsi+rax] ; 57 56 55 54 53 52 51 50 + + movq mm4, [rsi] ; 47 46 45 44 43 42 41 40 + movq mm5, mm4 ; 47 46 45 44 43 42 41 40 + + punpckhbw mm5, mm0 ; 57 47 56 46 55 45 54 44 + punpcklbw mm4, mm0 ; 53 43 52 42 51 41 50 40 + + movq mm3, mm5 ; 57 47 56 46 55 45 54 44 + punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46 + + punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44 + movq mm2, mm4 ; 53 43 52 42 51 41 50 40 + + punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42 + punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40 + + neg rax + + movq mm7, [rsi+rax] ; 37 36 35 34 33 32 31 30 + movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20 + + movq mm1, mm6 ; 27 26 25 24 23 22 21 20 + punpckhbw mm6, mm7 ; 37 27 36 36 35 25 34 24 + + punpcklbw mm1, mm7 ; 33 23 32 22 31 21 30 20 + + movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00 + punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04 + + movq mm0, mm7 ; 17 07 16 06 15 05 14 04 + punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06 + + punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04 + movq mm6, mm7 ; 37 27 17 07 36 26 16 06 + + punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3 + punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2 + + lea rdx, srct + movq mm5, mm6 ; 76 66 56 46 36 26 16 06 + + movq [rdx+56], mm7 + psubusb mm5, mm7 ; q2-q3 + + + movq [rdx+48], mm6 + psubusb mm7, mm6 ; q3-q2 + + por mm7, mm5; ; mm7=abs (q3-q2) + movq mm5, mm0 ; 35 25 15 05 34 24 14 04 + + punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1 + punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0 + + movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1 + psubusb mm3, mm6 ; q1-q2 + + psubusb mm6, mm5 ; q2-q1 + por mm6, mm3 ; mm6=abs(q2-q1) + + movq [rdx+40], mm5 ; save q1 + movq [rdx+32], mm0 ; save q0 + + movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00 + punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00 + + movq mm0, mm3 ; 13 03 12 02 11 01 10 00 + punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00 + + punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02 + movq mm1, mm0 ; 31 21 11 01 30 20 10 00 + + punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3 + punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2 + + movq [rdx], mm0 ; save p3 + movq [rdx+8], mm1 ; save p2 + + movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2 + psubusb mm2, mm0 ; p2-p3 + + psubusb mm0, mm1 ; p3-p2 + por mm0, mm2 ; mm0=abs(p3-p2) + + movq mm2, mm3 ; 33 23 13 03 32 22 12 02 + punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1 + + punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0 + movq [rdx+24], mm3 ; save p0 + + movq [rdx+16], mm2 ; save p1 + movq mm5, mm2 ; mm5 = p1 + + psubusb mm2, mm1 ; p1-p2 + psubusb mm1, mm5 ; p2-p1 + + por mm1, mm2 ; mm1=abs(p2-p1) + mov rdx, arg(3) ;limit + + movq mm4, [rdx] ; mm4 = limit + psubusb mm7, mm4 ; abs(q3-q2) > limit + + psubusb mm0, mm4 ; abs(p3-p2) > limit + psubusb mm1, mm4 ; abs(p2-p1) > limit + + psubusb mm6, mm4 ; abs(q2-q1) > limit + por mm7, mm6 ; or + + por mm0, mm1 ; + por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit + + movq mm1, mm5 ; p1 + + movq mm7, mm3 ; mm3=mm7=p0 + psubusb mm7, mm5 ; p0 - p1 + + psubusb mm5, mm3 ; p1 - p0 + por mm5, mm7 ; abs(p1-p0) + + movq t0, mm5 ; save abs(p1-p0) + lea rdx, srct + + psubusb mm5, mm4 ; mm5 = abs(p1-p0) > limit + por mm0, mm5 ; mm0=mask + + movq mm5, [rdx+32] ; mm5=q0 + movq mm7, [rdx+40] ; mm7=q1 + + movq mm6, mm5 ; mm6=q0 + movq mm2, mm7 ; q1 + psubusb mm5, mm7 ; q0-q1 + + psubusb mm7, mm6 ; q1-q0 + por mm7, mm5 ; abs(q1-q0) + + movq t1, mm7 ; save abs(q1-q0) + psubusb mm7, mm4 ; mm7=abs(q1-q0)> limit + + por mm0, mm7 ; mask + + movq mm5, mm2 ; q1 + psubusb mm5, mm1 ; q1-=p1 + psubusb mm1, mm2 ; p1-=q1 + por mm5, mm1 ; abs(p1-q1) + pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero + psrlw mm5, 1 ; abs(p1-q1)/2 + + mov rdx, arg(2) ;blimit ; + + movq mm4, [rdx] ;blimit + movq mm1, mm3 ; mm1=mm3=p0 + + movq mm7, mm6 ; mm7=mm6=q0 + psubusb mm1, mm7 ; p0-q0 + + psubusb mm7, mm3 ; q0-p0 + por mm1, mm7 ; abs(q0-p0) + paddusb mm1, mm1 ; abs(q0-p0)*2 + paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + + psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit + por mm1, mm0; ; mask + + pxor mm0, mm0 + pcmpeqb mm1, mm0 + + ; calculate high edge variance + mov rdx, arg(4) ;thresh ; get thresh + movq mm7, [rdx] + ; + movq mm4, t0 ; get abs (q1 - q0) + psubusb mm4, mm7 ; abs(q1 - q0) > thresh + + movq mm3, t1 ; get abs (p1 - p0) + psubusb mm3, mm7 ; abs(p1 - p0)> thresh + + por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh + pcmpeqb mm4, mm0 + + pcmpeqb mm0, mm0 + pxor mm4, mm0 + + + + + ; start work on filters + lea rdx, srct + + ; start work on filters + movq mm2, [rdx+16] ; p1 + movq mm7, [rdx+40] ; q1 + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values + psubsb mm2, mm7 ; p1 - q1 + + movq mm6, [rdx+24] ; p0 + movq mm0, [rdx+32] ; q0 + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values + + movq mm3, mm0 ; q0 + psubsb mm0, mm6 ; q0 - p0 + paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1) + paddsb mm2, mm0 ; 2 * (q0 - p0) + paddsb mm2, mm0 ; 3 * (q0 - p0) + (p1 - q1) + pand mm1, mm2 ; mask filter values we don't care about + + ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0 + movq mm2, mm1 ; vp8_filter + pand mm2, mm4; ; Filter2 = vp8_filter & hev + + movq mm5, mm2 ; + paddsb mm5, [GLOBAL(t3)]; + + pxor mm0, mm0 ; 0 + pxor mm7, mm7 ; 0 + + punpcklbw mm0, mm5 ; e0f0g0h0 + psraw mm0, 11 ; sign extended shift right by 3 + punpckhbw mm7, mm5 ; a0b0c0d0 + psraw mm7, 11 ; sign extended shift right by 3 + packsswb mm0, mm7 ; Filter2 >>=3; + + movq mm5, mm0 ; Filter2 + + paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) + pxor mm0, mm0 ; 0 + pxor mm7, mm7 ; 0 + + punpcklbw mm0, mm2 ; e0f0g0h0 + psraw mm0, 11 ; sign extended shift right by 3 + punpckhbw mm7, mm2 ; a0b0c0d0 + psraw mm7, 11 ; sign extended shift right by 3 + packsswb mm0, mm7 ; Filter2 >>=3; + + ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0 + psubsb mm3, mm0 ; qs0 =qs0 - filter1 + paddsb mm6, mm5 ; ps0 =ps0 + Fitler2 + + ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0 + ; vp8_filter &= ~hev; + ; Filter2 = vp8_filter; + pandn mm4, mm1 ; vp8_filter&=~hev + + + ; mm3=qs0, mm4=filter2, mm6=ps0 + + ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7); + ; s = vp8_signed_char_clamp(qs0 - u); + ; *oq0 = s^0x80; + ; s = vp8_signed_char_clamp(ps0 + u); + ; *op0 = s^0x80; + pxor mm0, mm0 + + pxor mm1, mm1 + pxor mm2, mm2 + punpcklbw mm1, mm4 + punpckhbw mm2, mm4 + pmulhw mm1, [GLOBAL(s27)] + pmulhw mm2, [GLOBAL(s27)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] + psraw mm1, 7 + psraw mm2, 7 + packsswb mm1, mm2 + + psubsb mm3, mm1 + paddsb mm6, mm1 + + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] + movq [rdx+24], mm6 + movq [rdx+32], mm3 + + ; roughly 2/7th difference across boundary + ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7); + ; s = vp8_signed_char_clamp(qs1 - u); + ; *oq1 = s^0x80; + ; s = vp8_signed_char_clamp(ps1 + u); + ; *op1 = s^0x80; + pxor mm1, mm1 + pxor mm2, mm2 + punpcklbw mm1, mm4 + punpckhbw mm2, mm4 + pmulhw mm1, [GLOBAL(s18)] + pmulhw mm2, [GLOBAL(s18)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] + psraw mm1, 7 + psraw mm2, 7 + packsswb mm1, mm2 + + movq mm3, [rdx + 40] + movq mm6, [rdx + 16] ; p1 + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] + + paddsb mm6, mm1 + psubsb mm3, mm1 + + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] + movq [rdx + 40], mm3 + movq [rdx + 16], mm6 + + ; roughly 1/7th difference across boundary + ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7); + ; s = vp8_signed_char_clamp(qs2 - u); + ; *oq2 = s^0x80; + ; s = vp8_signed_char_clamp(ps2 + u); + ; *op2 = s^0x80; + pxor mm1, mm1 + pxor mm2, mm2 + punpcklbw mm1, mm4 + punpckhbw mm2, mm4 + pmulhw mm1, [GLOBAL(s9)] + pmulhw mm2, [GLOBAL(s9)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] + psraw mm1, 7 + psraw mm2, 7 + packsswb mm1, mm2 + + movq mm6, [rdx+ 8] + movq mm3, [rdx+48] + + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] + + paddsb mm6, mm1 + psubsb mm3, mm1 + + pxor mm6, [GLOBAL(t80)] ; mm6 = 71 61 51 41 31 21 11 01 + pxor mm3, [GLOBAL(t80)] ; mm3 = 76 66 56 46 36 26 15 06 + + ; transpose and write back + movq mm0, [rdx] ; mm0 = 70 60 50 40 30 20 10 00 + movq mm1, mm0 ; mm0 = 70 60 50 40 30 20 10 00 + + punpcklbw mm0, mm6 ; mm0 = 31 30 21 20 11 10 01 00 + punpckhbw mm1, mm6 ; mm3 = 71 70 61 60 51 50 41 40 + + movq mm2, [rdx+16] ; mm2 = 72 62 52 42 32 22 12 02 + movq mm6, mm2 ; mm3 = 72 62 52 42 32 22 12 02 + + punpcklbw mm2, [rdx+24] ; mm2 = 33 32 23 22 13 12 03 02 + punpckhbw mm6, [rdx+24] ; mm3 = 73 72 63 62 53 52 43 42 + + movq mm5, mm0 ; mm5 = 31 30 21 20 11 10 01 00 + punpcklwd mm0, mm2 ; mm0 = 13 12 11 10 03 02 01 00 + + punpckhwd mm5, mm2 ; mm5 = 33 32 31 30 23 22 21 20 + movq mm4, mm1 ; mm4 = 71 70 61 60 51 50 41 40 + + punpcklwd mm1, mm6 ; mm1 = 53 52 51 50 43 42 41 40 + punpckhwd mm4, mm6 ; mm4 = 73 72 71 70 63 62 61 60 + + movq mm2, [rdx+32] ; mm2 = 74 64 54 44 34 24 14 04 + punpcklbw mm2, [rdx+40] ; mm2 = 35 34 25 24 15 14 05 04 + + movq mm6, mm3 ; mm6 = 76 66 56 46 36 26 15 06 + punpcklbw mm6, [rdx+56] ; mm6 = 37 36 27 26 17 16 07 06 + + movq mm7, mm2 ; mm7 = 35 34 25 24 15 14 05 04 + punpcklwd mm2, mm6 ; mm2 = 17 16 15 14 07 06 05 04 + + punpckhwd mm7, mm6 ; mm7 = 37 36 35 34 27 26 25 24 + movq mm6, mm0 ; mm6 = 13 12 11 10 03 02 01 00 + + punpckldq mm0, mm2 ; mm0 = 07 06 05 04 03 02 01 00 + punpckhdq mm6, mm2 ; mm6 = 17 16 15 14 13 12 11 10 + + movq [rsi+rax*4], mm0 ; write out + movq [rdi+rax*4], mm6 ; write out + + movq mm0, mm5 ; mm0 = 33 32 31 30 23 22 21 20 + punpckldq mm0, mm7 ; mm0 = 27 26 25 24 23 22 20 20 + + punpckhdq mm5, mm7 ; mm5 = 37 36 35 34 33 32 31 30 + movq [rsi+rax*2], mm0 ; write out + + movq [rdi+rax*2], mm5 ; write out + movq mm2, [rdx+32] ; mm2 = 74 64 54 44 34 24 14 04 + + punpckhbw mm2, [rdx+40] ; mm2 = 75 74 65 64 54 54 45 44 + punpckhbw mm3, [rdx+56] ; mm3 = 77 76 67 66 57 56 47 46 + + movq mm5, mm2 ; mm5 = 75 74 65 64 54 54 45 44 + punpcklwd mm2, mm3 ; mm2 = 57 56 55 54 47 46 45 44 + + punpckhwd mm5, mm3 ; mm5 = 77 76 75 74 67 66 65 64 + movq mm0, mm1 ; mm0= 53 52 51 50 43 42 41 40 + + movq mm3, mm4 ; mm4 = 73 72 71 70 63 62 61 60 + punpckldq mm0, mm2 ; mm0 = 47 46 45 44 43 42 41 40 + + punpckhdq mm1, mm2 ; mm1 = 57 56 55 54 53 52 51 50 + movq [rsi], mm0 ; write out + + movq [rdi], mm1 ; write out + neg rax + + punpckldq mm3, mm5 ; mm3 = 67 66 65 64 63 62 61 60 + punpckhdq mm4, mm5 ; mm4 = 77 76 75 74 73 72 71 60 + + movq [rsi+rax*2], mm3 + movq [rdi+rax*2], mm4 + + lea rsi, [rsi+rax*8] + dec rcx + + jnz .next8_mbv + + add rsp, 96 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_loop_filter_simple_horizontal_edge_mmx +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit +;) +global sym(vp8_loop_filter_simple_horizontal_edge_mmx) PRIVATE +sym(vp8_loop_filter_simple_horizontal_edge_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 3 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? + + mov rcx, 2 ; count +.nexts8_h: + mov rdx, arg(2) ;blimit ; get blimit + movq mm3, [rdx] ; + + mov rdi, rsi ; rdi points to row +1 for indirect addressing + add rdi, rax + neg rax + + ; calculate mask + movq mm1, [rsi+2*rax] ; p1 + movq mm0, [rdi] ; q1 + movq mm2, mm1 + movq mm7, mm0 + movq mm4, mm0 + psubusb mm0, mm1 ; q1-=p1 + psubusb mm1, mm4 ; p1-=q1 + por mm1, mm0 ; abs(p1-q1) + pand mm1, [GLOBAL(tfe)] ; set lsb of each byte to zero + psrlw mm1, 1 ; abs(p1-q1)/2 + + movq mm5, [rsi+rax] ; p0 + movq mm4, [rsi] ; q0 + movq mm0, mm4 ; q0 + movq mm6, mm5 ; p0 + psubusb mm5, mm4 ; p0-=q0 + psubusb mm4, mm6 ; q0-=p0 + por mm5, mm4 ; abs(p0 - q0) + paddusb mm5, mm5 ; abs(p0-q0)*2 + paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + + psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit + pxor mm3, mm3 + pcmpeqb mm5, mm3 + + ; start work on filters + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values + psubsb mm2, mm7 ; p1 - q1 + + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values + movq mm3, mm0 ; q0 + psubsb mm0, mm6 ; q0 - p0 + paddsb mm2, mm0 ; p1 - q1 + 1 * (q0 - p0) + paddsb mm2, mm0 ; p1 - q1 + 2 * (q0 - p0) + paddsb mm2, mm0 ; p1 - q1 + 3 * (q0 - p0) + pand mm5, mm2 ; mask filter values we don't care about + + ; do + 4 side + paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 + + movq mm0, mm5 ; get a copy of filters + psllw mm0, 8 ; shift left 8 + psraw mm0, 3 ; arithmetic shift right 11 + psrlw mm0, 8 + movq mm1, mm5 ; get a copy of filters + psraw mm1, 11 ; arithmetic shift right 11 + psllw mm1, 8 ; shift left 8 to put it back + + por mm0, mm1 ; put the two together to get result + + psubsb mm3, mm0 ; q0-= q0 add + pxor mm3, [GLOBAL(t80)] ; unoffset + movq [rsi], mm3 ; write back + + + ; now do +3 side + psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 + + movq mm0, mm5 ; get a copy of filters + psllw mm0, 8 ; shift left 8 + psraw mm0, 3 ; arithmetic shift right 11 + psrlw mm0, 8 + psraw mm5, 11 ; arithmetic shift right 11 + psllw mm5, 8 ; shift left 8 to put it back + por mm0, mm5 ; put the two together to get result + + + paddsb mm6, mm0 ; p0+= p0 add + pxor mm6, [GLOBAL(t80)] ; unoffset + movq [rsi+rax], mm6 ; write back + + add rsi,8 + neg rax + dec rcx + jnz .nexts8_h + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_loop_filter_simple_vertical_edge_mmx +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *blimit +;) +global sym(vp8_loop_filter_simple_vertical_edge_mmx) PRIVATE +sym(vp8_loop_filter_simple_vertical_edge_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 3 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 32 ; reserve 32 bytes + %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; + %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? + + lea rsi, [rsi + rax*4- 2]; ; + mov rcx, 2 ; count +.nexts8_v: + + lea rdi, [rsi + rax]; + movd mm0, [rdi + rax * 2] ; xx xx xx xx 73 72 71 70 + + movd mm6, [rsi + rax * 2] ; xx xx xx xx 63 62 61 60 + punpcklbw mm6, mm0 ; 73 63 72 62 71 61 70 60 + + movd mm0, [rsi + rax] ; xx xx xx xx 53 52 51 50 + movd mm4, [rsi] ; xx xx xx xx 43 42 41 40 + + punpcklbw mm4, mm0 ; 53 43 52 42 51 41 50 40 + movq mm5, mm4 ; 53 43 52 42 51 41 50 40 + + punpcklwd mm4, mm6 ; 71 61 51 41 70 60 50 40 + punpckhwd mm5, mm6 ; 73 63 53 43 72 62 52 42 + + neg rax + + movd mm7, [rsi + rax] ; xx xx xx xx 33 32 31 30 + movd mm6, [rsi + rax * 2] ; xx xx xx xx 23 22 21 20 + + punpcklbw mm6, mm7 ; 33 23 32 22 31 21 30 20 + movd mm1, [rdi + rax * 4] ; xx xx xx xx 13 12 11 10 + + movd mm0, [rsi + rax * 4] ; xx xx xx xx 03 02 01 00 + punpcklbw mm0, mm1 ; 13 03 12 02 11 01 10 00 + + movq mm2, mm0 ; 13 03 12 02 11 01 10 00 + punpcklwd mm0, mm6 ; 31 21 11 01 30 20 10 00 + + punpckhwd mm2, mm6 ; 33 23 13 03 32 22 12 02 + movq mm1, mm0 ; 13 03 12 02 11 01 10 00 + + punpckldq mm0, mm4 ; 70 60 50 40 30 20 10 00 = p1 + movq mm3, mm2 ; 33 23 13 03 32 22 12 02 + + punpckhdq mm1, mm4 ; 71 61 51 41 31 21 11 01 = p0 + punpckldq mm2, mm5 ; 72 62 52 42 32 22 12 02 = q0 + + punpckhdq mm3, mm5 ; 73 63 53 43 33 23 13 03 = q1 + + + ; calculate mask + movq mm6, mm0 ; p1 + movq mm7, mm3 ; q1 + psubusb mm7, mm6 ; q1-=p1 + psubusb mm6, mm3 ; p1-=q1 + por mm6, mm7 ; abs(p1-q1) + pand mm6, [GLOBAL(tfe)] ; set lsb of each byte to zero + psrlw mm6, 1 ; abs(p1-q1)/2 + + movq mm5, mm1 ; p0 + movq mm4, mm2 ; q0 + + psubusb mm5, mm2 ; p0-=q0 + psubusb mm4, mm1 ; q0-=p0 + + por mm5, mm4 ; abs(p0 - q0) + paddusb mm5, mm5 ; abs(p0-q0)*2 + paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + + mov rdx, arg(2) ;blimit ; get blimit + movq mm7, [rdx] + + psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit + pxor mm7, mm7 + pcmpeqb mm5, mm7 ; mm5 = mask + + ; start work on filters + movq t0, mm0 + movq t1, mm3 + + pxor mm0, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm3, [GLOBAL(t80)] ; q1 offset to convert to signed values + + psubsb mm0, mm3 ; p1 - q1 + movq mm6, mm1 ; p0 + + movq mm7, mm2 ; q0 + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + + pxor mm7, [GLOBAL(t80)] ; offset to convert to signed values + movq mm3, mm7 ; offseted ; q0 + + psubsb mm7, mm6 ; q0 - p0 + paddsb mm0, mm7 ; p1 - q1 + 1 * (q0 - p0) + + paddsb mm0, mm7 ; p1 - q1 + 2 * (q0 - p0) + paddsb mm0, mm7 ; p1 - q1 + 3 * (q0 - p0) + + pand mm5, mm0 ; mask filter values we don't care about + + paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 + + movq mm0, mm5 ; get a copy of filters + psllw mm0, 8 ; shift left 8 + psraw mm0, 3 ; arithmetic shift right 11 + psrlw mm0, 8 + + movq mm7, mm5 ; get a copy of filters + psraw mm7, 11 ; arithmetic shift right 11 + psllw mm7, 8 ; shift left 8 to put it back + + por mm0, mm7 ; put the two together to get result + + psubsb mm3, mm0 ; q0-= q0sz add + pxor mm3, [GLOBAL(t80)] ; unoffset + + ; now do +3 side + psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 + + movq mm0, mm5 ; get a copy of filters + psllw mm0, 8 ; shift left 8 + psraw mm0, 3 ; arithmetic shift right 11 + psrlw mm0, 8 + + psraw mm5, 11 ; arithmetic shift right 11 + psllw mm5, 8 ; shift left 8 to put it back + por mm0, mm5 ; put the two together to get result + + paddsb mm6, mm0 ; p0+= p0 add + pxor mm6, [GLOBAL(t80)] ; unoffset + + + movq mm0, t0 + movq mm4, t1 + + ; mm0 = 70 60 50 40 30 20 10 00 + ; mm6 = 71 61 51 41 31 21 11 01 + ; mm3 = 72 62 52 42 32 22 12 02 + ; mm4 = 73 63 53 43 33 23 13 03 + ; transpose back to write out + + movq mm1, mm0 ; + punpcklbw mm0, mm6 ; 31 30 21 20 11 10 01 00 + + punpckhbw mm1, mm6 ; 71 70 61 60 51 50 41 40 + movq mm2, mm3 ; + + punpcklbw mm2, mm4 ; 33 32 23 22 13 12 03 02 + movq mm5, mm1 ; 71 70 61 60 51 50 41 40 + + punpckhbw mm3, mm4 ; 73 72 63 62 53 52 43 42 + movq mm6, mm0 ; 31 30 21 20 11 10 01 00 + + punpcklwd mm0, mm2 ; 13 12 11 10 03 02 01 00 + punpckhwd mm6, mm2 ; 33 32 31 30 23 22 21 20 + + movd [rsi+rax*4], mm0 ; write 03 02 01 00 + punpcklwd mm1, mm3 ; 53 52 51 50 43 42 41 40 + + psrlq mm0, 32 ; xx xx xx xx 13 12 11 10 + punpckhwd mm5, mm3 ; 73 72 71 70 63 62 61 60 + + movd [rdi+rax*4], mm0 ; write 13 12 11 10 + movd [rsi+rax*2], mm6 ; write 23 22 21 20 + + psrlq mm6, 32 ; 33 32 31 30 + movd [rsi], mm1 ; write 43 42 41 40 + + movd [rsi + rax], mm6 ; write 33 32 31 30 + neg rax + + movd [rsi + rax*2], mm5 ; write 63 62 61 60 + psrlq mm1, 32 ; 53 52 51 50 + + movd [rdi], mm1 ; write out 53 52 51 50 + psrlq mm5, 32 ; 73 72 71 70 + + movd [rdi + rax*2], mm5 ; write 73 72 71 70 + + lea rsi, [rsi+rax*8] ; next 8 + + dec rcx + jnz .nexts8_v + + add rsp, 32 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + + +;void fast_loop_filter_vertical_edges_mmx(unsigned char *y_ptr, +; int y_stride, +; loop_filter_info *lfi) +;{ +; +; +; vp8_loop_filter_simple_vertical_edge_mmx(y_ptr+4, y_stride, lfi->flim,lfi->lim,lfi->thr,2); +; vp8_loop_filter_simple_vertical_edge_mmx(y_ptr+8, y_stride, lfi->flim,lfi->lim,lfi->thr,2); +; vp8_loop_filter_simple_vertical_edge_mmx(y_ptr+12, y_stride, lfi->flim,lfi->lim,lfi->thr,2); +;} + +SECTION_RODATA +align 16 +tfe: + times 8 db 0xfe +align 16 +t80: + times 8 db 0x80 +align 16 +t1s: + times 8 db 0x01 +align 16 +t3: + times 8 db 0x03 +align 16 +t4: + times 8 db 0x04 +align 16 +ones: + times 4 dw 0x0001 +align 16 +s27: + times 4 dw 0x1b00 +align 16 +s18: + times 4 dw 0x1200 +align 16 +s9: + times 4 dw 0x0900 +align 16 +s63: + times 4 dw 0x003f diff --git a/thirdparty/libvpx/vp8/decoder/dboolhuff.c b/thirdparty/libvpx/vp8/decoder/dboolhuff.c new file mode 100644 index 00000000000..5cdd2a24915 --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/dboolhuff.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "dboolhuff.h" +#include "vp8/common/common.h" +#include "vpx_dsp/vpx_dsp_common.h" + +int vp8dx_start_decode(BOOL_DECODER *br, + const unsigned char *source, + unsigned int source_sz, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) +{ + br->user_buffer_end = source+source_sz; + br->user_buffer = source; + br->value = 0; + br->count = -8; + br->range = 255; + br->decrypt_cb = decrypt_cb; + br->decrypt_state = decrypt_state; + + if (source_sz && !source) + return 1; + + /* Populate the buffer */ + vp8dx_bool_decoder_fill(br); + + return 0; +} + +void vp8dx_bool_decoder_fill(BOOL_DECODER *br) +{ + const unsigned char *bufptr = br->user_buffer; + VP8_BD_VALUE value = br->value; + int count = br->count; + int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); + size_t bytes_left = br->user_buffer_end - bufptr; + size_t bits_left = bytes_left * CHAR_BIT; + int x = shift + CHAR_BIT - (int)bits_left; + int loop_end = 0; + unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1]; + + if (br->decrypt_cb) { + size_t n = VPXMIN(sizeof(decrypted), bytes_left); + br->decrypt_cb(br->decrypt_state, bufptr, decrypted, (int)n); + bufptr = decrypted; + } + + if(x >= 0) + { + count += VP8_LOTS_OF_BITS; + loop_end = x; + } + + if (x < 0 || bits_left) + { + while(shift >= loop_end) + { + count += CHAR_BIT; + value |= (VP8_BD_VALUE)*bufptr << shift; + ++bufptr; + ++br->user_buffer; + shift -= CHAR_BIT; + } + } + + br->value = value; + br->count = count; +} diff --git a/thirdparty/libvpx/vp8/decoder/dboolhuff.h b/thirdparty/libvpx/vp8/decoder/dboolhuff.h new file mode 100644 index 00000000000..1b1bbf868ec --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/dboolhuff.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_DECODER_DBOOLHUFF_H_ +#define VP8_DECODER_DBOOLHUFF_H_ + +#include +#include + +#include "./vpx_config.h" +#include "vpx_ports/mem.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef size_t VP8_BD_VALUE; + +#define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT) + +/*This is meant to be a large, positive constant that can still be efficiently + loaded as an immediate (on platforms like ARM, for example). + Even relatively modest values like 100 would work fine.*/ +#define VP8_LOTS_OF_BITS (0x40000000) + +typedef struct +{ + const unsigned char *user_buffer_end; + const unsigned char *user_buffer; + VP8_BD_VALUE value; + int count; + unsigned int range; + vpx_decrypt_cb decrypt_cb; + void *decrypt_state; +} BOOL_DECODER; + +DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); + +int vp8dx_start_decode(BOOL_DECODER *br, + const unsigned char *source, + unsigned int source_sz, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state); + +void vp8dx_bool_decoder_fill(BOOL_DECODER *br); + + +static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { + unsigned int bit = 0; + VP8_BD_VALUE value; + unsigned int split; + VP8_BD_VALUE bigsplit; + int count; + unsigned int range; + + split = 1 + (((br->range - 1) * probability) >> 8); + + if(br->count < 0) + vp8dx_bool_decoder_fill(br); + + value = br->value; + count = br->count; + + bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); + + range = split; + + if (value >= bigsplit) + { + range = br->range - split; + value = value - bigsplit; + bit = 1; + } + + { + register int shift = vp8_norm[range]; + range <<= shift; + value <<= shift; + count -= shift; + } + br->value = value; + br->count = count; + br->range = range; + + return bit; +} + +static INLINE int vp8_decode_value(BOOL_DECODER *br, int bits) +{ + int z = 0; + int bit; + + for (bit = bits - 1; bit >= 0; bit--) + { + z |= (vp8dx_decode_bool(br, 0x80) << bit); + } + + return z; +} + +static INLINE int vp8dx_bool_error(BOOL_DECODER *br) +{ + /* Check if we have reached the end of the buffer. + * + * Variable 'count' stores the number of bits in the 'value' buffer, minus + * 8. The top byte is part of the algorithm, and the remainder is buffered + * to be shifted into it. So if count == 8, the top 16 bits of 'value' are + * occupied, 8 for the algorithm and 8 in the buffer. + * + * When reading a byte from the user's buffer, count is filled with 8 and + * one byte is filled into the value buffer. When we reach the end of the + * data, count is additionally filled with VP8_LOTS_OF_BITS. So when + * count == VP8_LOTS_OF_BITS - 1, the user's data has been exhausted. + */ + if ((br->count > VP8_BD_VALUE_SIZE) && (br->count < VP8_LOTS_OF_BITS)) + { + /* We have tried to decode bits after the end of + * stream was encountered. + */ + return 1; + } + + /* No error. */ + return 0; +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_DECODER_DBOOLHUFF_H_ diff --git a/thirdparty/libvpx/vp8/decoder/decodeframe.c b/thirdparty/libvpx/vp8/decoder/decodeframe.c new file mode 100644 index 00000000000..51acdbb9c8a --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/decodeframe.c @@ -0,0 +1,1397 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#include "./vpx_scale_rtcd.h" +#include "onyxd_int.h" +#include "vp8/common/header.h" +#include "vp8/common/reconintra4x4.h" +#include "vp8/common/reconinter.h" +#include "detokenize.h" +#include "vp8/common/common.h" +#include "vp8/common/invtrans.h" +#include "vp8/common/alloccommon.h" +#include "vp8/common/entropymode.h" +#include "vp8/common/quant_common.h" +#include "vpx_scale/vpx_scale.h" +#include "vp8/common/reconintra.h" +#include "vp8/common/setupintrarecon.h" + +#include "decodemv.h" +#include "vp8/common/extend.h" +#if CONFIG_ERROR_CONCEALMENT +#include "error_concealment.h" +#endif +#include "vpx_mem/vpx_mem.h" +#include "vp8/common/threading.h" +#include "decoderthreading.h" +#include "dboolhuff.h" +#include "vpx_dsp/vpx_dsp_common.h" + +#include +#include + +void vp8cx_init_de_quantizer(VP8D_COMP *pbi) +{ + int Q; + VP8_COMMON *const pc = & pbi->common; + + for (Q = 0; Q < QINDEX_RANGE; Q++) + { + pc->Y1dequant[Q][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q); + pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q); + pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q); + + pc->Y1dequant[Q][1] = (short)vp8_ac_yquant(Q); + pc->Y2dequant[Q][1] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q); + pc->UVdequant[Q][1] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q); + } +} + +void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) +{ + int i; + int QIndex; + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + VP8_COMMON *const pc = & pbi->common; + + /* Decide whether to use the default or alternate baseline Q value. */ + if (xd->segmentation_enabled) + { + /* Abs Value */ + if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) + QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; + + /* Delta Value */ + else + QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; + + QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */ + } + else + QIndex = pc->base_qindex; + + /* Set up the macroblock dequant constants */ + xd->dequant_y1_dc[0] = 1; + xd->dequant_y1[0] = pc->Y1dequant[QIndex][0]; + xd->dequant_y2[0] = pc->Y2dequant[QIndex][0]; + xd->dequant_uv[0] = pc->UVdequant[QIndex][0]; + + for (i = 1; i < 16; i++) + { + xd->dequant_y1_dc[i] = + xd->dequant_y1[i] = pc->Y1dequant[QIndex][1]; + xd->dequant_y2[i] = pc->Y2dequant[QIndex][1]; + xd->dequant_uv[i] = pc->UVdequant[QIndex][1]; + } +} + +static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, + unsigned int mb_idx) +{ + MB_PREDICTION_MODE mode; + int i; +#if CONFIG_ERROR_CONCEALMENT + int corruption_detected = 0; +#else + (void)mb_idx; +#endif + + if (xd->mode_info_context->mbmi.mb_skip_coeff) + { + vp8_reset_mb_tokens_context(xd); + } + else if (!vp8dx_bool_error(xd->current_bc)) + { + int eobtotal; + eobtotal = vp8_decode_mb_tokens(pbi, xd); + + /* Special case: Force the loopfilter to skip when eobtotal is zero */ + xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0); + } + + mode = xd->mode_info_context->mbmi.mode; + + if (xd->segmentation_enabled) + vp8_mb_init_dequantizer(pbi, xd); + + +#if CONFIG_ERROR_CONCEALMENT + + if(pbi->ec_active) + { + int throw_residual; + /* When we have independent partitions we can apply residual even + * though other partitions within the frame are corrupt. + */ + throw_residual = (!pbi->independent_partitions && + pbi->frame_corrupt_residual); + throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); + + if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) + { + /* MB with corrupt residuals or corrupt mode/motion vectors. + * Better to use the predictor as reconstruction. + */ + pbi->frame_corrupt_residual = 1; + memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); + + corruption_detected = 1; + + /* force idct to be skipped for B_PRED and use the + * prediction only for reconstruction + * */ + memset(xd->eobs, 0, 25); + } + } +#endif + + /* do prediction */ + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) + { + vp8_build_intra_predictors_mbuv_s(xd, + xd->recon_above[1], + xd->recon_above[2], + xd->recon_left[1], + xd->recon_left[2], + xd->recon_left_stride[1], + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride); + + if (mode != B_PRED) + { + vp8_build_intra_predictors_mby_s(xd, + xd->recon_above[0], + xd->recon_left[0], + xd->recon_left_stride[0], + xd->dst.y_buffer, + xd->dst.y_stride); + } + else + { + short *DQC = xd->dequant_y1; + int dst_stride = xd->dst.y_stride; + + /* clear out residual eob info */ + if(xd->mode_info_context->mbmi.mb_skip_coeff) + memset(xd->eobs, 0, 25); + + intra_prediction_down_copy(xd, xd->recon_above[0] + 16); + + for (i = 0; i < 16; i++) + { + BLOCKD *b = &xd->block[i]; + unsigned char *dst = xd->dst.y_buffer + b->offset; + B_PREDICTION_MODE b_mode = + xd->mode_info_context->bmi[i].as_mode; + unsigned char *Above = dst - dst_stride; + unsigned char *yleft = dst - 1; + int left_stride = dst_stride; + unsigned char top_left = Above[-1]; + + vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, + dst, dst_stride, top_left); + + if (xd->eobs[i]) + { + if (xd->eobs[i] > 1) + { + vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); + } + else + { + vp8_dc_only_idct_add + (b->qcoeff[0] * DQC[0], + dst, dst_stride, + dst, dst_stride); + memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); + } + } + } + } + } + else + { + vp8_build_inter_predictors_mb(xd); + } + + +#if CONFIG_ERROR_CONCEALMENT + if (corruption_detected) + { + return; + } +#endif + + if(!xd->mode_info_context->mbmi.mb_skip_coeff) + { + /* dequantization and idct */ + if (mode != B_PRED) + { + short *DQC = xd->dequant_y1; + + if (mode != SPLITMV) + { + BLOCKD *b = &xd->block[24]; + + /* do 2nd order transform on the dc block */ + if (xd->eobs[24] > 1) + { + vp8_dequantize_b(b, xd->dequant_y2); + + vp8_short_inv_walsh4x4(&b->dqcoeff[0], + xd->qcoeff); + memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); + } + else + { + b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; + vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], + xd->qcoeff); + memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); + } + + /* override the dc dequant constant in order to preserve the + * dc components + */ + DQC = xd->dequant_y1_dc; + } + + vp8_dequant_idct_add_y_block + (xd->qcoeff, DQC, + xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs); + } + + vp8_dequant_idct_add_uv_block + (xd->qcoeff+16*16, xd->dequant_uv, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs+16); + } +} + +static int get_delta_q(vp8_reader *bc, int prev, int *q_update) +{ + int ret_val = 0; + + if (vp8_read_bit(bc)) + { + ret_val = vp8_read_literal(bc, 4); + + if (vp8_read_bit(bc)) + ret_val = -ret_val; + } + + /* Trigger a quantizer update if the delta-q value has changed */ + if (ret_val != prev) + *q_update = 1; + + return ret_val; +} + +#ifdef PACKET_TESTING +#include +FILE *vpxlog = 0; +#endif + +static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf) +{ + int i; + unsigned char *src_ptr1; + unsigned char *dest_ptr1; + + unsigned int Border; + int plane_stride; + + /***********/ + /* Y Plane */ + /***********/ + Border = ybf->border; + plane_stride = ybf->y_stride; + src_ptr1 = ybf->y_buffer - Border; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + + for (i = 0; i < (int)Border; i++) + { + memcpy(dest_ptr1, src_ptr1, plane_stride); + dest_ptr1 += plane_stride; + } + + + /***********/ + /* U Plane */ + /***********/ + plane_stride = ybf->uv_stride; + Border /= 2; + src_ptr1 = ybf->u_buffer - Border; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + + for (i = 0; i < (int)(Border); i++) + { + memcpy(dest_ptr1, src_ptr1, plane_stride); + dest_ptr1 += plane_stride; + } + + /***********/ + /* V Plane */ + /***********/ + + src_ptr1 = ybf->v_buffer - Border; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + + for (i = 0; i < (int)(Border); i++) + { + memcpy(dest_ptr1, src_ptr1, plane_stride); + dest_ptr1 += plane_stride; + } +} + +static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf) +{ + int i; + unsigned char *src_ptr1, *src_ptr2; + unsigned char *dest_ptr2; + + unsigned int Border; + int plane_stride; + int plane_height; + + /***********/ + /* Y Plane */ + /***********/ + Border = ybf->border; + plane_stride = ybf->y_stride; + plane_height = ybf->y_height; + + src_ptr1 = ybf->y_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)Border; i++) + { + memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr2 += plane_stride; + } + + + /***********/ + /* U Plane */ + /***********/ + plane_stride = ybf->uv_stride; + plane_height = ybf->uv_height; + Border /= 2; + + src_ptr1 = ybf->u_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)(Border); i++) + { + memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr2 += plane_stride; + } + + /***********/ + /* V Plane */ + /***********/ + + src_ptr1 = ybf->v_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)(Border); i++) + { + memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr2 += plane_stride; + } +} + +static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf, + unsigned char *y_src, + unsigned char *u_src, + unsigned char *v_src) +{ + int i; + unsigned char *src_ptr1, *src_ptr2; + unsigned char *dest_ptr1, *dest_ptr2; + + unsigned int Border; + int plane_stride; + int plane_height; + int plane_width; + + /***********/ + /* Y Plane */ + /***********/ + Border = ybf->border; + plane_stride = ybf->y_stride; + plane_height = 16; + plane_width = ybf->y_width; + + /* copy the left and right most columns out */ + src_ptr1 = y_src; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) + { + memset(dest_ptr1, src_ptr1[0], Border); + memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /***********/ + /* U Plane */ + /***********/ + plane_stride = ybf->uv_stride; + plane_height = 8; + plane_width = ybf->uv_width; + Border /= 2; + + /* copy the left and right most columns out */ + src_ptr1 = u_src; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) + { + memset(dest_ptr1, src_ptr1[0], Border); + memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /***********/ + /* V Plane */ + /***********/ + + /* copy the left and right most columns out */ + src_ptr1 = v_src; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) + { + memset(dest_ptr1, src_ptr1[0], Border); + memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } +} + +static void decode_mb_rows(VP8D_COMP *pbi) +{ + VP8_COMMON *const pc = & pbi->common; + MACROBLOCKD *const xd = & pbi->mb; + + MODE_INFO *lf_mic = xd->mode_info_context; + + int ibc = 0; + int num_part = 1 << pc->multi_token_partition; + + int recon_yoffset, recon_uvoffset; + int mb_row, mb_col; + int mb_idx = 0; + + YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; + + int recon_y_stride = yv12_fb_new->y_stride; + int recon_uv_stride = yv12_fb_new->uv_stride; + + unsigned char *ref_buffer[MAX_REF_FRAMES][3]; + unsigned char *dst_buffer[3]; + unsigned char *lf_dst[3]; + unsigned char *eb_dst[3]; + int i; + int ref_fb_corrupted[MAX_REF_FRAMES]; + + ref_fb_corrupted[INTRA_FRAME] = 0; + + for(i = 1; i < MAX_REF_FRAMES; i++) + { + YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; + + ref_buffer[i][0] = this_fb->y_buffer; + ref_buffer[i][1] = this_fb->u_buffer; + ref_buffer[i][2] = this_fb->v_buffer; + + ref_fb_corrupted[i] = this_fb->corrupted; + } + + /* Set up the buffer pointers */ + eb_dst[0] = lf_dst[0] = dst_buffer[0] = yv12_fb_new->y_buffer; + eb_dst[1] = lf_dst[1] = dst_buffer[1] = yv12_fb_new->u_buffer; + eb_dst[2] = lf_dst[2] = dst_buffer[2] = yv12_fb_new->v_buffer; + + xd->up_available = 0; + + /* Initialize the loop filter for this frame. */ + if(pc->filter_level) + vp8_loop_filter_frame_init(pc, xd, pc->filter_level); + + vp8_setup_intra_recon_top_line(yv12_fb_new); + + /* Decode the individual macro block */ + for (mb_row = 0; mb_row < pc->mb_rows; mb_row++) + { + if (num_part > 1) + { + xd->current_bc = & pbi->mbc[ibc]; + ibc++; + + if (ibc == num_part) + ibc = 0; + } + + recon_yoffset = mb_row * recon_y_stride * 16; + recon_uvoffset = mb_row * recon_uv_stride * 8; + + /* reset contexts */ + xd->above_context = pc->above_context; + memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + + xd->left_available = 0; + + xd->mb_to_top_edge = -((mb_row * 16) << 3); + xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; + + xd->recon_above[0] = dst_buffer[0] + recon_yoffset; + xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; + xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; + + xd->recon_left[0] = xd->recon_above[0] - 1; + xd->recon_left[1] = xd->recon_above[1] - 1; + xd->recon_left[2] = xd->recon_above[2] - 1; + + xd->recon_above[0] -= xd->dst.y_stride; + xd->recon_above[1] -= xd->dst.uv_stride; + xd->recon_above[2] -= xd->dst.uv_stride; + + /* TODO: move to outside row loop */ + xd->recon_left_stride[0] = xd->dst.y_stride; + xd->recon_left_stride[1] = xd->dst.uv_stride; + + setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], + xd->recon_left[2], xd->dst.y_stride, + xd->dst.uv_stride); + + for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) + { + /* Distance of Mb to the various image edges. + * These are specified to 8th pel as they are always compared to values + * that are in 1/8th pel units + */ + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; + +#if CONFIG_ERROR_CONCEALMENT + { + int corrupt_residual = (!pbi->independent_partitions && + pbi->frame_corrupt_residual) || + vp8dx_bool_error(xd->current_bc); + if (pbi->ec_active && + xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME && + corrupt_residual) + { + /* We have an intra block with corrupt coefficients, better to + * conceal with an inter block. Interpolate MVs from neighboring + * MBs. + * + * Note that for the first mb with corrupt residual in a frame, + * we might not discover that before decoding the residual. That + * happens after this check, and therefore no inter concealment + * will be done. + */ + vp8_interpolate_motion(xd, + mb_row, mb_col, + pc->mb_rows, pc->mb_cols); + } + } +#endif + + xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; + xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; + xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; + + if (xd->mode_info_context->mbmi.ref_frame >= LAST_FRAME) { + const MV_REFERENCE_FRAME ref = xd->mode_info_context->mbmi.ref_frame; + xd->pre.y_buffer = ref_buffer[ref][0] + recon_yoffset; + xd->pre.u_buffer = ref_buffer[ref][1] + recon_uvoffset; + xd->pre.v_buffer = ref_buffer[ref][2] + recon_uvoffset; + } else { + // ref_frame is INTRA_FRAME, pre buffer should not be used. + xd->pre.y_buffer = 0; + xd->pre.u_buffer = 0; + xd->pre.v_buffer = 0; + } + + /* propagate errors from reference frames */ + xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; + + decode_macroblock(pbi, xd, mb_idx); + + mb_idx++; + xd->left_available = 1; + + /* check if the boolean decoder has suffered an error */ + xd->corrupted |= vp8dx_bool_error(xd->current_bc); + + xd->recon_above[0] += 16; + xd->recon_above[1] += 8; + xd->recon_above[2] += 8; + xd->recon_left[0] += 16; + xd->recon_left[1] += 8; + xd->recon_left[2] += 8; + + recon_yoffset += 16; + recon_uvoffset += 8; + + ++xd->mode_info_context; /* next mb */ + + xd->above_context++; + } + + /* adjust to the next row of mbs */ + vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); + + ++xd->mode_info_context; /* skip prediction column */ + xd->up_available = 1; + + if(pc->filter_level) + { + if(mb_row > 0) + { + if (pc->filter_type == NORMAL_LOOPFILTER) + vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, + recon_y_stride, recon_uv_stride, + lf_dst[0], lf_dst[1], lf_dst[2]); + else + vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, + recon_y_stride, recon_uv_stride, + lf_dst[0], lf_dst[1], lf_dst[2]); + if(mb_row > 1) + { + yv12_extend_frame_left_right_c(yv12_fb_new, + eb_dst[0], + eb_dst[1], + eb_dst[2]); + + eb_dst[0] += recon_y_stride * 16; + eb_dst[1] += recon_uv_stride * 8; + eb_dst[2] += recon_uv_stride * 8; + } + + lf_dst[0] += recon_y_stride * 16; + lf_dst[1] += recon_uv_stride * 8; + lf_dst[2] += recon_uv_stride * 8; + lf_mic += pc->mb_cols; + lf_mic++; /* Skip border mb */ + } + } + else + { + if(mb_row > 0) + { + /**/ + yv12_extend_frame_left_right_c(yv12_fb_new, + eb_dst[0], + eb_dst[1], + eb_dst[2]); + eb_dst[0] += recon_y_stride * 16; + eb_dst[1] += recon_uv_stride * 8; + eb_dst[2] += recon_uv_stride * 8; + } + } + } + + if(pc->filter_level) + { + if (pc->filter_type == NORMAL_LOOPFILTER) + vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, recon_y_stride, + recon_uv_stride, lf_dst[0], lf_dst[1], + lf_dst[2]); + else + vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, recon_y_stride, + recon_uv_stride, lf_dst[0], lf_dst[1], + lf_dst[2]); + + yv12_extend_frame_left_right_c(yv12_fb_new, + eb_dst[0], + eb_dst[1], + eb_dst[2]); + eb_dst[0] += recon_y_stride * 16; + eb_dst[1] += recon_uv_stride * 8; + eb_dst[2] += recon_uv_stride * 8; + } + yv12_extend_frame_left_right_c(yv12_fb_new, + eb_dst[0], + eb_dst[1], + eb_dst[2]); + yv12_extend_frame_top_c(yv12_fb_new); + yv12_extend_frame_bottom_c(yv12_fb_new); + +} + +static unsigned int read_partition_size(VP8D_COMP *pbi, + const unsigned char *cx_size) +{ + unsigned char temp[3]; + if (pbi->decrypt_cb) + { + pbi->decrypt_cb(pbi->decrypt_state, cx_size, temp, 3); + cx_size = temp; + } + return cx_size[0] + (cx_size[1] << 8) + (cx_size[2] << 16); +} + +static int read_is_valid(const unsigned char *start, + size_t len, + const unsigned char *end) +{ + return (start + len > start && start + len <= end); +} + +static unsigned int read_available_partition_size( + VP8D_COMP *pbi, + const unsigned char *token_part_sizes, + const unsigned char *fragment_start, + const unsigned char *first_fragment_end, + const unsigned char *fragment_end, + int i, + int num_part) +{ + VP8_COMMON* pc = &pbi->common; + const unsigned char *partition_size_ptr = token_part_sizes + i * 3; + unsigned int partition_size = 0; + ptrdiff_t bytes_left = fragment_end - fragment_start; + /* Calculate the length of this partition. The last partition + * size is implicit. If the partition size can't be read, then + * either use the remaining data in the buffer (for EC mode) + * or throw an error. + */ + if (i < num_part - 1) + { + if (read_is_valid(partition_size_ptr, 3, first_fragment_end)) + partition_size = read_partition_size(pbi, partition_size_ptr); + else if (pbi->ec_active) + partition_size = (unsigned int)bytes_left; + else + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated partition size data"); + } + else + partition_size = (unsigned int)bytes_left; + + /* Validate the calculated partition length. If the buffer + * described by the partition can't be fully read, then restrict + * it to the portion that can be (for EC mode) or throw an error. + */ + if (!read_is_valid(fragment_start, partition_size, fragment_end)) + { + if (pbi->ec_active) + partition_size = (unsigned int)bytes_left; + else + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt partition " + "%d length", i + 1); + } + return partition_size; +} + + +static void setup_token_decoder(VP8D_COMP *pbi, + const unsigned char* token_part_sizes) +{ + vp8_reader *bool_decoder = &pbi->mbc[0]; + unsigned int partition_idx; + unsigned int fragment_idx; + unsigned int num_token_partitions; + const unsigned char *first_fragment_end = pbi->fragments.ptrs[0] + + pbi->fragments.sizes[0]; + + TOKEN_PARTITION multi_token_partition = + (TOKEN_PARTITION)vp8_read_literal(&pbi->mbc[8], 2); + if (!vp8dx_bool_error(&pbi->mbc[8])) + pbi->common.multi_token_partition = multi_token_partition; + num_token_partitions = 1 << pbi->common.multi_token_partition; + + /* Check for partitions within the fragments and unpack the fragments + * so that each fragment pointer points to its corresponding partition. */ + for (fragment_idx = 0; fragment_idx < pbi->fragments.count; ++fragment_idx) + { + unsigned int fragment_size = pbi->fragments.sizes[fragment_idx]; + const unsigned char *fragment_end = pbi->fragments.ptrs[fragment_idx] + + fragment_size; + /* Special case for handling the first partition since we have already + * read its size. */ + if (fragment_idx == 0) + { + /* Size of first partition + token partition sizes element */ + ptrdiff_t ext_first_part_size = token_part_sizes - + pbi->fragments.ptrs[0] + 3 * (num_token_partitions - 1); + fragment_size -= (unsigned int)ext_first_part_size; + if (fragment_size > 0) + { + pbi->fragments.sizes[0] = (unsigned int)ext_first_part_size; + /* The fragment contains an additional partition. Move to + * next. */ + fragment_idx++; + pbi->fragments.ptrs[fragment_idx] = pbi->fragments.ptrs[0] + + pbi->fragments.sizes[0]; + } + } + /* Split the chunk into partitions read from the bitstream */ + while (fragment_size > 0) + { + ptrdiff_t partition_size = read_available_partition_size( + pbi, + token_part_sizes, + pbi->fragments.ptrs[fragment_idx], + first_fragment_end, + fragment_end, + fragment_idx - 1, + num_token_partitions); + pbi->fragments.sizes[fragment_idx] = (unsigned int)partition_size; + fragment_size -= (unsigned int)partition_size; + assert(fragment_idx <= num_token_partitions); + if (fragment_size > 0) + { + /* The fragment contains an additional partition. + * Move to next. */ + fragment_idx++; + pbi->fragments.ptrs[fragment_idx] = + pbi->fragments.ptrs[fragment_idx - 1] + partition_size; + } + } + } + + pbi->fragments.count = num_token_partitions + 1; + + for (partition_idx = 1; partition_idx < pbi->fragments.count; ++partition_idx) + { + if (vp8dx_start_decode(bool_decoder, + pbi->fragments.ptrs[partition_idx], + pbi->fragments.sizes[partition_idx], + pbi->decrypt_cb, pbi->decrypt_state)) + vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, + "Failed to allocate bool decoder %d", + partition_idx); + + bool_decoder++; + } + +#if CONFIG_MULTITHREAD + /* Clamp number of decoder threads */ + if (pbi->decoding_thread_count > num_token_partitions - 1) + pbi->decoding_thread_count = num_token_partitions - 1; +#endif +} + + +static void init_frame(VP8D_COMP *pbi) +{ + VP8_COMMON *const pc = & pbi->common; + MACROBLOCKD *const xd = & pbi->mb; + + if (pc->frame_type == KEY_FRAME) + { + /* Various keyframe initializations */ + memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); + + vp8_init_mbmode_probs(pc); + + vp8_default_coef_probs(pc); + + /* reset the segment feature data to 0 with delta coding (Default state). */ + memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); + xd->mb_segement_abs_delta = SEGMENT_DELTADATA; + + /* reset the mode ref deltasa for loop filter */ + memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); + memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); + + /* All buffers are implicitly updated on key frames. */ + pc->refresh_golden_frame = 1; + pc->refresh_alt_ref_frame = 1; + pc->copy_buffer_to_gf = 0; + pc->copy_buffer_to_arf = 0; + + /* Note that Golden and Altref modes cannot be used on a key frame so + * ref_frame_sign_bias[] is undefined and meaningless + */ + pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0; + pc->ref_frame_sign_bias[ALTREF_FRAME] = 0; + } + else + { + /* To enable choice of different interploation filters */ + if (!pc->use_bilinear_mc_filter) + { + xd->subpixel_predict = vp8_sixtap_predict4x4; + xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; + xd->subpixel_predict8x8 = vp8_sixtap_predict8x8; + xd->subpixel_predict16x16 = vp8_sixtap_predict16x16; + } + else + { + xd->subpixel_predict = vp8_bilinear_predict4x4; + xd->subpixel_predict8x4 = vp8_bilinear_predict8x4; + xd->subpixel_predict8x8 = vp8_bilinear_predict8x8; + xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; + } + + if (pbi->decoded_key_frame && pbi->ec_enabled && !pbi->ec_active) + pbi->ec_active = 1; + } + + xd->left_context = &pc->left_context; + xd->mode_info_context = pc->mi; + xd->frame_type = pc->frame_type; + xd->mode_info_context->mbmi.mode = DC_PRED; + xd->mode_info_stride = pc->mode_info_stride; + xd->corrupted = 0; /* init without corruption */ + + xd->fullpixel_mask = 0xffffffff; + if(pc->full_pixel) + xd->fullpixel_mask = 0xfffffff8; + +} + +int vp8_decode_frame(VP8D_COMP *pbi) +{ + vp8_reader *const bc = &pbi->mbc[8]; + VP8_COMMON *const pc = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + const unsigned char *data = pbi->fragments.ptrs[0]; + const unsigned int data_sz = pbi->fragments.sizes[0]; + const unsigned char *data_end = data + data_sz; + ptrdiff_t first_partition_length_in_bytes; + + int i, j, k, l; + const int *const mb_feature_data_bits = vp8_mb_feature_data_bits; + int corrupt_tokens = 0; + int prev_independent_partitions = pbi->independent_partitions; + + YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; + + /* start with no corruption of current frame */ + xd->corrupted = 0; + yv12_fb_new->corrupted = 0; + + if (data_end - data < 3) + { + if (!pbi->ec_active) + { + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet"); + } + + /* Declare the missing frame as an inter frame since it will + be handled as an inter frame when we have estimated its + motion vectors. */ + pc->frame_type = INTER_FRAME; + pc->version = 0; + pc->show_frame = 1; + first_partition_length_in_bytes = 0; + } + else + { + unsigned char clear_buffer[10]; + const unsigned char *clear = data; + if (pbi->decrypt_cb) + { + int n = (int)VPXMIN(sizeof(clear_buffer), data_sz); + pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n); + clear = clear_buffer; + } + + pc->frame_type = (FRAME_TYPE)(clear[0] & 1); + pc->version = (clear[0] >> 1) & 7; + pc->show_frame = (clear[0] >> 4) & 1; + first_partition_length_in_bytes = + (clear[0] | (clear[1] << 8) | (clear[2] << 16)) >> 5; + + if (!pbi->ec_active && + (data + first_partition_length_in_bytes > data_end + || data + first_partition_length_in_bytes < data)) + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt partition 0 length"); + + data += 3; + clear += 3; + + vp8_setup_version(pc); + + + if (pc->frame_type == KEY_FRAME) + { + /* vet via sync code */ + /* When error concealment is enabled we should only check the sync + * code if we have enough bits available + */ + if (!pbi->ec_active || data + 3 < data_end) + { + if (clear[0] != 0x9d || clear[1] != 0x01 || clear[2] != 0x2a) + vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid frame sync code"); + } + + /* If error concealment is enabled we should only parse the new size + * if we have enough data. Otherwise we will end up with the wrong + * size. + */ + if (!pbi->ec_active || data + 6 < data_end) + { + pc->Width = (clear[3] | (clear[4] << 8)) & 0x3fff; + pc->horiz_scale = clear[4] >> 6; + pc->Height = (clear[5] | (clear[6] << 8)) & 0x3fff; + pc->vert_scale = clear[6] >> 6; + } + data += 7; + } + else + { + memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); + memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); + } + } + if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME)) + { + return -1; + } + + init_frame(pbi); + + if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data), + pbi->decrypt_cb, pbi->decrypt_state)) + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate bool decoder 0"); + if (pc->frame_type == KEY_FRAME) { + (void)vp8_read_bit(bc); // colorspace + pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc); + } + + /* Is segmentation enabled */ + xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc); + + if (xd->segmentation_enabled) + { + /* Signal whether or not the segmentation map is being explicitly updated this frame. */ + xd->update_mb_segmentation_map = (unsigned char)vp8_read_bit(bc); + xd->update_mb_segmentation_data = (unsigned char)vp8_read_bit(bc); + + if (xd->update_mb_segmentation_data) + { + xd->mb_segement_abs_delta = (unsigned char)vp8_read_bit(bc); + + memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); + + /* For each segmentation feature (Quant and loop filter level) */ + for (i = 0; i < MB_LVL_MAX; i++) + { + for (j = 0; j < MAX_MB_SEGMENTS; j++) + { + /* Frame level data */ + if (vp8_read_bit(bc)) + { + xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]); + + if (vp8_read_bit(bc)) + xd->segment_feature_data[i][j] = -xd->segment_feature_data[i][j]; + } + else + xd->segment_feature_data[i][j] = 0; + } + } + } + + if (xd->update_mb_segmentation_map) + { + /* Which macro block level features are enabled */ + memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); + + /* Read the probs used to decode the segment id for each macro block. */ + for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) + { + /* If not explicitly set value is defaulted to 255 by memset above */ + if (vp8_read_bit(bc)) + xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8); + } + } + } + else + { + /* No segmentation updates on this frame */ + xd->update_mb_segmentation_map = 0; + xd->update_mb_segmentation_data = 0; + } + + /* Read the loop filter level and type */ + pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(bc); + pc->filter_level = vp8_read_literal(bc, 6); + pc->sharpness_level = vp8_read_literal(bc, 3); + + /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */ + xd->mode_ref_lf_delta_update = 0; + xd->mode_ref_lf_delta_enabled = (unsigned char)vp8_read_bit(bc); + + if (xd->mode_ref_lf_delta_enabled) + { + /* Do the deltas need to be updated */ + xd->mode_ref_lf_delta_update = (unsigned char)vp8_read_bit(bc); + + if (xd->mode_ref_lf_delta_update) + { + /* Send update */ + for (i = 0; i < MAX_REF_LF_DELTAS; i++) + { + if (vp8_read_bit(bc)) + { + /*sign = vp8_read_bit( bc );*/ + xd->ref_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); + + if (vp8_read_bit(bc)) /* Apply sign */ + xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1; + } + } + + /* Send update */ + for (i = 0; i < MAX_MODE_LF_DELTAS; i++) + { + if (vp8_read_bit(bc)) + { + /*sign = vp8_read_bit( bc );*/ + xd->mode_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); + + if (vp8_read_bit(bc)) /* Apply sign */ + xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1; + } + } + } + } + + setup_token_decoder(pbi, data + first_partition_length_in_bytes); + + xd->current_bc = &pbi->mbc[0]; + + /* Read the default quantizers. */ + { + int Q, q_update; + + Q = vp8_read_literal(bc, 7); /* AC 1st order Q = default */ + pc->base_qindex = Q; + q_update = 0; + pc->y1dc_delta_q = get_delta_q(bc, pc->y1dc_delta_q, &q_update); + pc->y2dc_delta_q = get_delta_q(bc, pc->y2dc_delta_q, &q_update); + pc->y2ac_delta_q = get_delta_q(bc, pc->y2ac_delta_q, &q_update); + pc->uvdc_delta_q = get_delta_q(bc, pc->uvdc_delta_q, &q_update); + pc->uvac_delta_q = get_delta_q(bc, pc->uvac_delta_q, &q_update); + + if (q_update) + vp8cx_init_de_quantizer(pbi); + + /* MB level dequantizer setup */ + vp8_mb_init_dequantizer(pbi, &pbi->mb); + } + + /* Determine if the golden frame or ARF buffer should be updated and how. + * For all non key frames the GF and ARF refresh flags and sign bias + * flags must be set explicitly. + */ + if (pc->frame_type != KEY_FRAME) + { + /* Should the GF or ARF be updated from the current frame */ + pc->refresh_golden_frame = vp8_read_bit(bc); +#if CONFIG_ERROR_CONCEALMENT + /* Assume we shouldn't refresh golden if the bit is missing */ + xd->corrupted |= vp8dx_bool_error(bc); + if (pbi->ec_active && xd->corrupted) + pc->refresh_golden_frame = 0; +#endif + + pc->refresh_alt_ref_frame = vp8_read_bit(bc); +#if CONFIG_ERROR_CONCEALMENT + /* Assume we shouldn't refresh altref if the bit is missing */ + xd->corrupted |= vp8dx_bool_error(bc); + if (pbi->ec_active && xd->corrupted) + pc->refresh_alt_ref_frame = 0; +#endif + + /* Buffer to buffer copy flags. */ + pc->copy_buffer_to_gf = 0; + + if (!pc->refresh_golden_frame) + pc->copy_buffer_to_gf = vp8_read_literal(bc, 2); + +#if CONFIG_ERROR_CONCEALMENT + /* Assume we shouldn't copy to the golden if the bit is missing */ + xd->corrupted |= vp8dx_bool_error(bc); + if (pbi->ec_active && xd->corrupted) + pc->copy_buffer_to_gf = 0; +#endif + + pc->copy_buffer_to_arf = 0; + + if (!pc->refresh_alt_ref_frame) + pc->copy_buffer_to_arf = vp8_read_literal(bc, 2); + +#if CONFIG_ERROR_CONCEALMENT + /* Assume we shouldn't copy to the alt-ref if the bit is missing */ + xd->corrupted |= vp8dx_bool_error(bc); + if (pbi->ec_active && xd->corrupted) + pc->copy_buffer_to_arf = 0; +#endif + + + pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp8_read_bit(bc); + pc->ref_frame_sign_bias[ALTREF_FRAME] = vp8_read_bit(bc); + } + + pc->refresh_entropy_probs = vp8_read_bit(bc); +#if CONFIG_ERROR_CONCEALMENT + /* Assume we shouldn't refresh the probabilities if the bit is + * missing */ + xd->corrupted |= vp8dx_bool_error(bc); + if (pbi->ec_active && xd->corrupted) + pc->refresh_entropy_probs = 0; +#endif + if (pc->refresh_entropy_probs == 0) + { + memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc)); + } + + pc->refresh_last_frame = pc->frame_type == KEY_FRAME || vp8_read_bit(bc); + +#if CONFIG_ERROR_CONCEALMENT + /* Assume we should refresh the last frame if the bit is missing */ + xd->corrupted |= vp8dx_bool_error(bc); + if (pbi->ec_active && xd->corrupted) + pc->refresh_last_frame = 1; +#endif + + if (0) + { + FILE *z = fopen("decodestats.stt", "a"); + fprintf(z, "%6d F:%d,G:%d,A:%d,L:%d,Q:%d\n", + pc->current_video_frame, + pc->frame_type, + pc->refresh_golden_frame, + pc->refresh_alt_ref_frame, + pc->refresh_last_frame, + pc->base_qindex); + fclose(z); + } + + { + pbi->independent_partitions = 1; + + /* read coef probability tree */ + for (i = 0; i < BLOCK_TYPES; i++) + for (j = 0; j < COEF_BANDS; j++) + for (k = 0; k < PREV_COEF_CONTEXTS; k++) + for (l = 0; l < ENTROPY_NODES; l++) + { + + vp8_prob *const p = pc->fc.coef_probs [i][j][k] + l; + + if (vp8_read(bc, vp8_coef_update_probs [i][j][k][l])) + { + *p = (vp8_prob)vp8_read_literal(bc, 8); + + } + if (k > 0 && *p != pc->fc.coef_probs[i][j][k-1][l]) + pbi->independent_partitions = 0; + + } + } + + /* clear out the coeff buffer */ + memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); + + vp8_decode_mode_mvs(pbi); + +#if CONFIG_ERROR_CONCEALMENT + if (pbi->ec_active && + pbi->mvs_corrupt_from_mb < (unsigned int)pc->mb_cols * pc->mb_rows) + { + /* Motion vectors are missing in this frame. We will try to estimate + * them and then continue decoding the frame as usual */ + vp8_estimate_missing_mvs(pbi); + } +#endif + + memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); + pbi->frame_corrupt_residual = 0; + +#if CONFIG_MULTITHREAD + if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) + { + unsigned int thread; + vp8mt_decode_mb_rows(pbi, xd); + vp8_yv12_extend_frame_borders(yv12_fb_new); + for (thread = 0; thread < pbi->decoding_thread_count; ++thread) + corrupt_tokens |= pbi->mb_row_di[thread].mbd.corrupted; + } + else +#endif + { + decode_mb_rows(pbi); + corrupt_tokens |= xd->corrupted; + } + + /* Collect information about decoder corruption. */ + /* 1. Check first boolean decoder for errors. */ + yv12_fb_new->corrupted = vp8dx_bool_error(bc); + /* 2. Check the macroblock information */ + yv12_fb_new->corrupted |= corrupt_tokens; + + if (!pbi->decoded_key_frame) + { + if (pc->frame_type == KEY_FRAME && + !yv12_fb_new->corrupted) + pbi->decoded_key_frame = 1; + else + vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, + "A stream must start with a complete key frame"); + } + + /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */ + + if (pc->refresh_entropy_probs == 0) + { + memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc)); + pbi->independent_partitions = prev_independent_partitions; + } + +#ifdef PACKET_TESTING + { + FILE *f = fopen("decompressor.VP8", "ab"); + unsigned int size = pbi->bc2.pos + pbi->bc.pos + 8; + fwrite((void *) &size, 4, 1, f); + fwrite((void *) pbi->Source, size, 1, f); + fclose(f); + } +#endif + + return 0; +} diff --git a/thirdparty/libvpx/vp8/decoder/decodemv.c b/thirdparty/libvpx/vp8/decoder/decodemv.c new file mode 100644 index 00000000000..1d155e7e16d --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/decodemv.c @@ -0,0 +1,670 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "treereader.h" +#include "vp8/common/entropymv.h" +#include "vp8/common/entropymode.h" +#include "onyxd_int.h" +#include "vp8/common/findnearmv.h" + +#if CONFIG_DEBUG +#include +#endif +static B_PREDICTION_MODE read_bmode(vp8_reader *bc, const vp8_prob *p) +{ + const int i = vp8_treed_read(bc, vp8_bmode_tree, p); + + return (B_PREDICTION_MODE)i; +} + +static MB_PREDICTION_MODE read_ymode(vp8_reader *bc, const vp8_prob *p) +{ + const int i = vp8_treed_read(bc, vp8_ymode_tree, p); + + return (MB_PREDICTION_MODE)i; +} + +static MB_PREDICTION_MODE read_kf_ymode(vp8_reader *bc, const vp8_prob *p) +{ + const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p); + + return (MB_PREDICTION_MODE)i; +} + +static MB_PREDICTION_MODE read_uv_mode(vp8_reader *bc, const vp8_prob *p) +{ + const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p); + + return (MB_PREDICTION_MODE)i; +} + +static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi) +{ + vp8_reader *const bc = & pbi->mbc[8]; + const int mis = pbi->common.mode_info_stride; + + mi->mbmi.ref_frame = INTRA_FRAME; + mi->mbmi.mode = read_kf_ymode(bc, vp8_kf_ymode_prob); + + if (mi->mbmi.mode == B_PRED) + { + int i = 0; + mi->mbmi.is_4x4 = 1; + + do + { + const B_PREDICTION_MODE A = above_block_mode(mi, i, mis); + const B_PREDICTION_MODE L = left_block_mode(mi, i); + + mi->bmi[i].as_mode = + read_bmode(bc, vp8_kf_bmode_prob [A] [L]); + } + while (++i < 16); + } + + mi->mbmi.uv_mode = read_uv_mode(bc, vp8_kf_uv_mode_prob); +} + +static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) +{ + const vp8_prob *const p = (const vp8_prob *) mvc; + int x = 0; + + if (vp8_read(r, p [mvpis_short])) /* Large */ + { + int i = 0; + + do + { + x += vp8_read(r, p [MVPbits + i]) << i; + } + while (++i < 3); + + i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */ + + do + { + x += vp8_read(r, p [MVPbits + i]) << i; + } + while (--i > 3); + + if (!(x & 0xFFF0) || vp8_read(r, p [MVPbits + 3])) + x += 8; + } + else /* small */ + x = vp8_treed_read(r, vp8_small_mvtree, p + MVPshort); + + if (x && vp8_read(r, p [MVPsign])) + x = -x; + + return x; +} + +static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc) +{ + mv->row = (short)(read_mvcomponent(r, mvc) * 2); + mv->col = (short)(read_mvcomponent(r, ++mvc) * 2); +} + + +static void read_mvcontexts(vp8_reader *bc, MV_CONTEXT *mvc) +{ + int i = 0; + + do + { + const vp8_prob *up = vp8_mv_update_probs[i].prob; + vp8_prob *p = (vp8_prob *)(mvc + i); + vp8_prob *const pstop = p + MVPcount; + + do + { + if (vp8_read(bc, *up++)) + { + const vp8_prob x = (vp8_prob)vp8_read_literal(bc, 7); + + *p = x ? x << 1 : 1; + } + } + while (++p < pstop); + } + while (++i < 2); +} + +static const unsigned char mbsplit_fill_count[4] = {8, 8, 4, 1}; +static const unsigned char mbsplit_fill_offset[4][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15}, + { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} +}; + + +static void mb_mode_mv_init(VP8D_COMP *pbi) +{ + vp8_reader *const bc = & pbi->mbc[8]; + MV_CONTEXT *const mvc = pbi->common.fc.mvc; + +#if CONFIG_ERROR_CONCEALMENT + /* Default is that no macroblock is corrupt, therefore we initialize + * mvs_corrupt_from_mb to something very big, which we can be sure is + * outside the frame. */ + pbi->mvs_corrupt_from_mb = UINT_MAX; +#endif + /* Read the mb_no_coeff_skip flag */ + pbi->common.mb_no_coeff_skip = (int)vp8_read_bit(bc); + + pbi->prob_skip_false = 0; + if (pbi->common.mb_no_coeff_skip) + pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8); + + if(pbi->common.frame_type != KEY_FRAME) + { + pbi->prob_intra = (vp8_prob)vp8_read_literal(bc, 8); + pbi->prob_last = (vp8_prob)vp8_read_literal(bc, 8); + pbi->prob_gf = (vp8_prob)vp8_read_literal(bc, 8); + + if (vp8_read_bit(bc)) + { + int i = 0; + + do + { + pbi->common.fc.ymode_prob[i] = + (vp8_prob) vp8_read_literal(bc, 8); + } + while (++i < 4); + } + + if (vp8_read_bit(bc)) + { + int i = 0; + + do + { + pbi->common.fc.uv_mode_prob[i] = + (vp8_prob) vp8_read_literal(bc, 8); + } + while (++i < 3); + } + + read_mvcontexts(bc, mvc); + } +} + +const vp8_prob vp8_sub_mv_ref_prob3 [8][VP8_SUBMVREFS-1] = +{ + { 147, 136, 18 }, /* SUBMVREF_NORMAL */ + { 223, 1 , 34 }, /* SUBMVREF_LEFT_ABOVE_SAME */ + { 106, 145, 1 }, /* SUBMVREF_LEFT_ZED */ + { 208, 1 , 1 }, /* SUBMVREF_LEFT_ABOVE_ZED */ + { 179, 121, 1 }, /* SUBMVREF_ABOVE_ZED */ + { 223, 1 , 34 }, /* SUBMVREF_LEFT_ABOVE_SAME */ + { 179, 121, 1 }, /* SUBMVREF_ABOVE_ZED */ + { 208, 1 , 1 } /* SUBMVREF_LEFT_ABOVE_ZED */ +}; + +static +const vp8_prob * get_sub_mv_ref_prob(const int left, const int above) +{ + int lez = (left == 0); + int aez = (above == 0); + int lea = (left == above); + const vp8_prob * prob; + + prob = vp8_sub_mv_ref_prob3[(aez << 2) | + (lez << 1) | + (lea)]; + + return prob; +} + +static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi, + const MODE_INFO *left_mb, const MODE_INFO *above_mb, + MB_MODE_INFO *mbmi, int_mv best_mv, + MV_CONTEXT *const mvc, int mb_to_left_edge, + int mb_to_right_edge, int mb_to_top_edge, + int mb_to_bottom_edge) +{ + int s; /* split configuration (16x8, 8x16, 8x8, 4x4) */ + int num_p; /* number of partitions in the split configuration + (see vp8_mbsplit_count) */ + int j = 0; + + s = 3; + num_p = 16; + if( vp8_read(bc, 110) ) + { + s = 2; + num_p = 4; + if( vp8_read(bc, 111) ) + { + s = vp8_read(bc, 150); + num_p = 2; + } + } + + do /* for each subset j */ + { + int_mv leftmv, abovemv; + int_mv blockmv; + int k; /* first block in subset j */ + + const vp8_prob *prob; + k = vp8_mbsplit_offset[s][j]; + + if (!(k & 3)) + { + /* On L edge, get from MB to left of us */ + if(left_mb->mbmi.mode != SPLITMV) + leftmv.as_int = left_mb->mbmi.mv.as_int; + else + leftmv.as_int = (left_mb->bmi + k + 4 - 1)->mv.as_int; + } + else + leftmv.as_int = (mi->bmi + k - 1)->mv.as_int; + + if (!(k >> 2)) + { + /* On top edge, get from MB above us */ + if(above_mb->mbmi.mode != SPLITMV) + abovemv.as_int = above_mb->mbmi.mv.as_int; + else + abovemv.as_int = (above_mb->bmi + k + 16 - 4)->mv.as_int; + } + else + abovemv.as_int = (mi->bmi + k - 4)->mv.as_int; + + prob = get_sub_mv_ref_prob(leftmv.as_int, abovemv.as_int); + + if( vp8_read(bc, prob[0]) ) + { + if( vp8_read(bc, prob[1]) ) + { + blockmv.as_int = 0; + if( vp8_read(bc, prob[2]) ) + { + blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) * 2; + blockmv.as_mv.row += best_mv.as_mv.row; + blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) * 2; + blockmv.as_mv.col += best_mv.as_mv.col; + } + } + else + { + blockmv.as_int = abovemv.as_int; + } + } + else + { + blockmv.as_int = leftmv.as_int; + } + + mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv, + mb_to_left_edge, + mb_to_right_edge, + mb_to_top_edge, + mb_to_bottom_edge); + + { + /* Fill (uniform) modes, mvs of jth subset. + Must do it here because ensuing subsets can + refer back to us via "left" or "above". */ + const unsigned char *fill_offset; + unsigned int fill_count = mbsplit_fill_count[s]; + + fill_offset = &mbsplit_fill_offset[s] + [(unsigned char)j * mbsplit_fill_count[s]]; + + do { + mi->bmi[ *fill_offset].mv.as_int = blockmv.as_int; + fill_offset++; + }while (--fill_count); + } + + } + while (++j < num_p); + + mbmi->partitioning = s; +} + +static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi) +{ + vp8_reader *const bc = & pbi->mbc[8]; + mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra); + if (mbmi->ref_frame) /* inter MB */ + { + enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV}; + int cnt[4]; + int *cntx = cnt; + int_mv near_mvs[4]; + int_mv *nmv = near_mvs; + const int mis = pbi->mb.mode_info_stride; + const MODE_INFO *above = mi - mis; + const MODE_INFO *left = mi - 1; + const MODE_INFO *aboveleft = above - 1; + int *ref_frame_sign_bias = pbi->common.ref_frame_sign_bias; + + mbmi->need_to_clamp_mvs = 0; + + if (vp8_read(bc, pbi->prob_last)) + { + mbmi->ref_frame = + (MV_REFERENCE_FRAME)((int)(2 + vp8_read(bc, pbi->prob_gf))); + } + + /* Zero accumulators */ + nmv[0].as_int = nmv[1].as_int = nmv[2].as_int = 0; + cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0; + + /* Process above */ + if (above->mbmi.ref_frame != INTRA_FRAME) + { + if (above->mbmi.mv.as_int) + { + (++nmv)->as_int = above->mbmi.mv.as_int; + mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], + mbmi->ref_frame, nmv, ref_frame_sign_bias); + ++cntx; + } + + *cntx += 2; + } + + /* Process left */ + if (left->mbmi.ref_frame != INTRA_FRAME) + { + if (left->mbmi.mv.as_int) + { + int_mv this_mv; + + this_mv.as_int = left->mbmi.mv.as_int; + mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], + mbmi->ref_frame, &this_mv, ref_frame_sign_bias); + + if (this_mv.as_int != nmv->as_int) + { + (++nmv)->as_int = this_mv.as_int; + ++cntx; + } + + *cntx += 2; + } + else + cnt[CNT_INTRA] += 2; + } + + /* Process above left */ + if (aboveleft->mbmi.ref_frame != INTRA_FRAME) + { + if (aboveleft->mbmi.mv.as_int) + { + int_mv this_mv; + + this_mv.as_int = aboveleft->mbmi.mv.as_int; + mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], + mbmi->ref_frame, &this_mv, ref_frame_sign_bias); + + if (this_mv.as_int != nmv->as_int) + { + (++nmv)->as_int = this_mv.as_int; + ++cntx; + } + + *cntx += 1; + } + else + cnt[CNT_INTRA] += 1; + } + + if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_INTRA]] [0]) ) + { + + /* If we have three distinct MV's ... */ + /* See if above-left MV can be merged with NEAREST */ + cnt[CNT_NEAREST] += ( (cnt[CNT_SPLITMV] > 0) & + (nmv->as_int == near_mvs[CNT_NEAREST].as_int)); + + /* Swap near and nearest if necessary */ + if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) + { + int tmp; + tmp = cnt[CNT_NEAREST]; + cnt[CNT_NEAREST] = cnt[CNT_NEAR]; + cnt[CNT_NEAR] = tmp; + tmp = near_mvs[CNT_NEAREST].as_int; + near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int; + near_mvs[CNT_NEAR].as_int = tmp; + } + + if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_NEAREST]] [1]) ) + { + + if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_NEAR]] [2]) ) + { + int mb_to_top_edge; + int mb_to_bottom_edge; + int mb_to_left_edge; + int mb_to_right_edge; + MV_CONTEXT *const mvc = pbi->common.fc.mvc; + int near_index; + + mb_to_top_edge = pbi->mb.mb_to_top_edge; + mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge; + mb_to_top_edge -= LEFT_TOP_MARGIN; + mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN; + mb_to_right_edge = pbi->mb.mb_to_right_edge; + mb_to_right_edge += RIGHT_BOTTOM_MARGIN; + mb_to_left_edge = pbi->mb.mb_to_left_edge; + mb_to_left_edge -= LEFT_TOP_MARGIN; + + /* Use near_mvs[0] to store the "best" MV */ + near_index = CNT_INTRA + + (cnt[CNT_NEAREST] >= cnt[CNT_INTRA]); + + vp8_clamp_mv2(&near_mvs[near_index], &pbi->mb); + + cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV) + + (left->mbmi.mode == SPLITMV)) * 2 + + (aboveleft->mbmi.mode == SPLITMV); + + if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_SPLITMV]] [3]) ) + { + decode_split_mv(bc, mi, left, above, + mbmi, + near_mvs[near_index], + mvc, mb_to_left_edge, + mb_to_right_edge, + mb_to_top_edge, + mb_to_bottom_edge); + mbmi->mv.as_int = mi->bmi[15].mv.as_int; + mbmi->mode = SPLITMV; + mbmi->is_4x4 = 1; + } + else + { + int_mv *const mbmi_mv = & mbmi->mv; + read_mv(bc, &mbmi_mv->as_mv, (const MV_CONTEXT *) mvc); + mbmi_mv->as_mv.row += near_mvs[near_index].as_mv.row; + mbmi_mv->as_mv.col += near_mvs[near_index].as_mv.col; + + /* Don't need to check this on NEARMV and NEARESTMV + * modes since those modes clamp the MV. The NEWMV mode + * does not, so signal to the prediction stage whether + * special handling may be required. + */ + mbmi->need_to_clamp_mvs = + vp8_check_mv_bounds(mbmi_mv, mb_to_left_edge, + mb_to_right_edge, + mb_to_top_edge, + mb_to_bottom_edge); + mbmi->mode = NEWMV; + } + } + else + { + mbmi->mode = NEARMV; + mbmi->mv.as_int = near_mvs[CNT_NEAR].as_int; + vp8_clamp_mv2(&mbmi->mv, &pbi->mb); + } + } + else + { + mbmi->mode = NEARESTMV; + mbmi->mv.as_int = near_mvs[CNT_NEAREST].as_int; + vp8_clamp_mv2(&mbmi->mv, &pbi->mb); + } + } + else + { + mbmi->mode = ZEROMV; + mbmi->mv.as_int = 0; + } + +#if CONFIG_ERROR_CONCEALMENT + if(pbi->ec_enabled && (mbmi->mode != SPLITMV)) + { + mi->bmi[ 0].mv.as_int = + mi->bmi[ 1].mv.as_int = + mi->bmi[ 2].mv.as_int = + mi->bmi[ 3].mv.as_int = + mi->bmi[ 4].mv.as_int = + mi->bmi[ 5].mv.as_int = + mi->bmi[ 6].mv.as_int = + mi->bmi[ 7].mv.as_int = + mi->bmi[ 8].mv.as_int = + mi->bmi[ 9].mv.as_int = + mi->bmi[10].mv.as_int = + mi->bmi[11].mv.as_int = + mi->bmi[12].mv.as_int = + mi->bmi[13].mv.as_int = + mi->bmi[14].mv.as_int = + mi->bmi[15].mv.as_int = mbmi->mv.as_int; + } +#endif + } + else + { + /* required for left and above block mv */ + mbmi->mv.as_int = 0; + + /* MB is intra coded */ + if ((mbmi->mode = read_ymode(bc, pbi->common.fc.ymode_prob)) == B_PRED) + { + int j = 0; + mbmi->is_4x4 = 1; + do + { + mi->bmi[j].as_mode = read_bmode(bc, pbi->common.fc.bmode_prob); + } + while (++j < 16); + } + + mbmi->uv_mode = read_uv_mode(bc, pbi->common.fc.uv_mode_prob); + } + +} + +static void read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x) +{ + /* Is segmentation enabled */ + if (x->segmentation_enabled && x->update_mb_segmentation_map) + { + /* If so then read the segment id. */ + if (vp8_read(r, x->mb_segment_tree_probs[0])) + mi->segment_id = + (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2])); + else + mi->segment_id = + (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1])); + } +} + +static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi, + MB_MODE_INFO *mbmi) +{ + (void)mbmi; + + /* Read the Macroblock segmentation map if it is being updated explicitly + * this frame (reset to 0 above by default) + * By default on a key frame reset all MBs to segment 0 + */ + if (pbi->mb.update_mb_segmentation_map) + read_mb_features(&pbi->mbc[8], &mi->mbmi, &pbi->mb); + else if(pbi->common.frame_type == KEY_FRAME) + mi->mbmi.segment_id = 0; + + /* Read the macroblock coeff skip flag if this feature is in use, + * else default to 0 */ + if (pbi->common.mb_no_coeff_skip) + mi->mbmi.mb_skip_coeff = vp8_read(&pbi->mbc[8], pbi->prob_skip_false); + else + mi->mbmi.mb_skip_coeff = 0; + + mi->mbmi.is_4x4 = 0; + if(pbi->common.frame_type == KEY_FRAME) + read_kf_modes(pbi, mi); + else + read_mb_modes_mv(pbi, mi, &mi->mbmi); + +} + +void vp8_decode_mode_mvs(VP8D_COMP *pbi) +{ + MODE_INFO *mi = pbi->common.mi; + int mb_row = -1; + int mb_to_right_edge_start; + + mb_mode_mv_init(pbi); + + pbi->mb.mb_to_top_edge = 0; + pbi->mb.mb_to_bottom_edge = ((pbi->common.mb_rows - 1) * 16) << 3; + mb_to_right_edge_start = ((pbi->common.mb_cols - 1) * 16) << 3; + + while (++mb_row < pbi->common.mb_rows) + { + int mb_col = -1; + + pbi->mb.mb_to_left_edge = 0; + pbi->mb.mb_to_right_edge = mb_to_right_edge_start; + + while (++mb_col < pbi->common.mb_cols) + { +#if CONFIG_ERROR_CONCEALMENT + int mb_num = mb_row * pbi->common.mb_cols + mb_col; +#endif + + decode_mb_mode_mvs(pbi, mi, &mi->mbmi); + +#if CONFIG_ERROR_CONCEALMENT + /* look for corruption. set mvs_corrupt_from_mb to the current + * mb_num if the frame is corrupt from this macroblock. */ + if (vp8dx_bool_error(&pbi->mbc[8]) && mb_num < + (int)pbi->mvs_corrupt_from_mb) + { + pbi->mvs_corrupt_from_mb = mb_num; + /* no need to continue since the partition is corrupt from + * here on. + */ + return; + } +#endif + + pbi->mb.mb_to_left_edge -= (16 << 3); + pbi->mb.mb_to_right_edge -= (16 << 3); + mi++; /* next macroblock */ + } + pbi->mb.mb_to_top_edge -= (16 << 3); + pbi->mb.mb_to_bottom_edge -= (16 << 3); + + mi++; /* skip left predictor each row */ + } +} diff --git a/thirdparty/libvpx/vp8/decoder/decodemv.h b/thirdparty/libvpx/vp8/decoder/decodemv.h new file mode 100644 index 00000000000..f33b07351d3 --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/decodemv.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP8_DECODER_DECODEMV_H_ +#define VP8_DECODER_DECODEMV_H_ + +#include "onyxd_int.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_decode_mode_mvs(VP8D_COMP *); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_DECODER_DECODEMV_H_ diff --git a/thirdparty/libvpx/vp8/decoder/decoderthreading.h b/thirdparty/libvpx/vp8/decoder/decoderthreading.h new file mode 100644 index 00000000000..c563cf6e93a --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/decoderthreading.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP8_DECODER_DECODERTHREADING_H_ +#define VP8_DECODER_DECODERTHREADING_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#if CONFIG_MULTITHREAD +void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd); +void vp8_decoder_remove_threads(VP8D_COMP *pbi); +void vp8_decoder_create_threads(VP8D_COMP *pbi); +void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows); +void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows); +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_DECODER_DECODERTHREADING_H_ diff --git a/thirdparty/libvpx/vp8/decoder/detokenize.c b/thirdparty/libvpx/vp8/decoder/detokenize.c new file mode 100644 index 00000000000..fcc7533c50f --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/detokenize.c @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vp8/common/blockd.h" +#include "onyxd_int.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" +#include "detokenize.h" + +void vp8_reset_mb_tokens_context(MACROBLOCKD *x) +{ + ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context); + ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context); + + memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); + memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); + + /* Clear entropy contexts for Y2 blocks */ + if (!x->mode_info_context->mbmi.is_4x4) + { + a_ctx[8] = l_ctx[8] = 0; + } +} + +/* + ------------------------------------------------------------------------------ + Residual decoding (Paragraph 13.2 / 13.3) +*/ +static const uint8_t kBands[16 + 1] = { + 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, + 0 /* extra entry as sentinel */ +}; + +static const uint8_t kCat3[] = { 173, 148, 140, 0 }; +static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 }; +static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 }; +static const uint8_t kCat6[] = + { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; +static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 }; +static const uint8_t kZigzag[16] = { + 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; + +#define VP8GetBit vp8dx_decode_bool +#define NUM_PROBAS 11 +#define NUM_CTX 3 + +/* for const-casting */ +typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; + +static int GetSigned(BOOL_DECODER *br, int value_to_sign) +{ + int split = (br->range + 1) >> 1; + VP8_BD_VALUE bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); + int v; + + if(br->count < 0) + vp8dx_bool_decoder_fill(br); + + if ( br->value < bigsplit ) + { + br->range = split; + v= value_to_sign; + } + else + { + br->range = br->range-split; + br->value = br->value-bigsplit; + v = -value_to_sign; + } + br->range +=br->range; + br->value +=br->value; + br->count--; + + return v; +} +/* + Returns the position of the last non-zero coeff plus one + (and 0 if there's no coeff at all) +*/ +static int GetCoeffs(BOOL_DECODER *br, ProbaArray prob, + int ctx, int n, int16_t* out) +{ + const uint8_t* p = prob[n][ctx]; + if (!VP8GetBit(br, p[0])) + { /* first EOB is more a 'CBP' bit. */ + return 0; + } + while (1) + { + ++n; + if (!VP8GetBit(br, p[1])) + { + p = prob[kBands[n]][0]; + } + else + { /* non zero coeff */ + int v, j; + if (!VP8GetBit(br, p[2])) + { + p = prob[kBands[n]][1]; + v = 1; + } + else + { + if (!VP8GetBit(br, p[3])) + { + if (!VP8GetBit(br, p[4])) + { + v = 2; + } + else + { + v = 3 + VP8GetBit(br, p[5]); + } + } + else + { + if (!VP8GetBit(br, p[6])) + { + if (!VP8GetBit(br, p[7])) + { + v = 5 + VP8GetBit(br, 159); + } else + { + v = 7 + 2 * VP8GetBit(br, 165); + v += VP8GetBit(br, 145); + } + } + else + { + const uint8_t* tab; + const int bit1 = VP8GetBit(br, p[8]); + const int bit0 = VP8GetBit(br, p[9 + bit1]); + const int cat = 2 * bit1 + bit0; + v = 0; + for (tab = kCat3456[cat]; *tab; ++tab) + { + v += v + VP8GetBit(br, *tab); + } + v += 3 + (8 << cat); + } + } + p = prob[kBands[n]][2]; + } + j = kZigzag[n - 1]; + + out[j] = GetSigned(br, v); + + if (n == 16 || !VP8GetBit(br, p[0])) + { /* EOB */ + return n; + } + } + if (n == 16) + { + return 16; + } + } +} + +int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) +{ + BOOL_DECODER *bc = x->current_bc; + const FRAME_CONTEXT * const fc = &dx->common.fc; + char *eobs = x->eobs; + + int i; + int nonzeros; + int eobtotal = 0; + + short *qcoeff_ptr; + ProbaArray coef_probs; + ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context); + ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context); + ENTROPY_CONTEXT *a; + ENTROPY_CONTEXT *l; + int skip_dc = 0; + + qcoeff_ptr = &x->qcoeff[0]; + + if (!x->mode_info_context->mbmi.is_4x4) + { + a = a_ctx + 8; + l = l_ctx + 8; + + coef_probs = fc->coef_probs [1]; + + nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr + 24 * 16); + *a = *l = (nonzeros > 0); + + eobs[24] = nonzeros; + eobtotal += nonzeros - 16; + + coef_probs = fc->coef_probs [0]; + skip_dc = 1; + } + else + { + coef_probs = fc->coef_probs [3]; + skip_dc = 0; + } + + for (i = 0; i < 16; ++i) + { + a = a_ctx + (i&3); + l = l_ctx + ((i&0xc)>>2); + + nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), skip_dc, qcoeff_ptr); + *a = *l = (nonzeros > 0); + + nonzeros += skip_dc; + eobs[i] = nonzeros; + eobtotal += nonzeros; + qcoeff_ptr += 16; + } + + coef_probs = fc->coef_probs [2]; + + a_ctx += 4; + l_ctx += 4; + for (i = 16; i < 24; ++i) + { + a = a_ctx + ((i > 19)<<1) + (i&1); + l = l_ctx + ((i > 19)<<1) + ((i&3)>1); + + nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr); + *a = *l = (nonzeros > 0); + + eobs[i] = nonzeros; + eobtotal += nonzeros; + qcoeff_ptr += 16; + } + + return eobtotal; +} + diff --git a/thirdparty/libvpx/vp8/decoder/detokenize.h b/thirdparty/libvpx/vp8/decoder/detokenize.h new file mode 100644 index 00000000000..f0b125444f0 --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/detokenize.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP8_DECODER_DETOKENIZE_H_ +#define VP8_DECODER_DETOKENIZE_H_ + +#include "onyxd_int.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_reset_mb_tokens_context(MACROBLOCKD *x); +int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_DECODER_DETOKENIZE_H_ diff --git a/thirdparty/libvpx/vp8/decoder/onyxd_if.c b/thirdparty/libvpx/vp8/decoder/onyxd_if.c new file mode 100644 index 00000000000..3468268a2a1 --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/onyxd_if.c @@ -0,0 +1,521 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vp8/common/onyxc_int.h" +#if CONFIG_POSTPROC +#include "vp8/common/postproc.h" +#endif +#include "vp8/common/onyxd.h" +#include "onyxd_int.h" +#include "vpx_mem/vpx_mem.h" +#include "vp8/common/alloccommon.h" +#include "vp8/common/loopfilter.h" +#include "vp8/common/swapyv12buffer.h" +#include "vp8/common/threading.h" +#include "decoderthreading.h" +#include +#include + +#include "vp8/common/quant_common.h" +#include "vp8/common/reconintra.h" +#include "./vpx_dsp_rtcd.h" +#include "./vpx_scale_rtcd.h" +#include "vpx_scale/vpx_scale.h" +#include "vp8/common/systemdependent.h" +#include "vpx_ports/vpx_once.h" +#include "vpx_ports/vpx_timer.h" +#include "detokenize.h" +#if CONFIG_ERROR_CONCEALMENT +#include "error_concealment.h" +#endif +#if ARCH_ARM +#include "vpx_ports/arm.h" +#endif + +extern void vp8_init_loop_filter(VP8_COMMON *cm); +extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi); +static int get_free_fb (VP8_COMMON *cm); +static void ref_cnt_fb (int *buf, int *idx, int new_idx); + +static void initialize_dec(void) { + static volatile int init_done = 0; + + if (!init_done) + { + vpx_dsp_rtcd(); + vp8_init_intra_predictors(); + init_done = 1; + } +} + +static void remove_decompressor(VP8D_COMP *pbi) +{ +#if CONFIG_ERROR_CONCEALMENT + vp8_de_alloc_overlap_lists(pbi); +#endif + vp8_remove_common(&pbi->common); + vpx_free(pbi); +} + +static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf) +{ + VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP)); + + if (!pbi) + return NULL; + + memset(pbi, 0, sizeof(VP8D_COMP)); + + if (setjmp(pbi->common.error.jmp)) + { + pbi->common.error.setjmp = 0; + remove_decompressor(pbi); + return 0; + } + + pbi->common.error.setjmp = 1; + + vp8_create_common(&pbi->common); + + pbi->common.current_video_frame = 0; + pbi->ready_for_new_data = 1; + + /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid + * unnecessary calling of vp8cx_init_de_quantizer() for every frame. + */ + vp8cx_init_de_quantizer(pbi); + + vp8_loop_filter_init(&pbi->common); + + pbi->common.error.setjmp = 0; + +#if CONFIG_ERROR_CONCEALMENT + pbi->ec_enabled = oxcf->error_concealment; + pbi->overlaps = NULL; +#else + (void)oxcf; + pbi->ec_enabled = 0; +#endif + /* Error concealment is activated after a key frame has been + * decoded without errors when error concealment is enabled. + */ + pbi->ec_active = 0; + + pbi->decoded_key_frame = 0; + + /* Independent partitions is activated when a frame updates the + * token probability table to have equal probabilities over the + * PREV_COEF context. + */ + pbi->independent_partitions = 0; + + vp8_setup_block_dptrs(&pbi->mb); + + once(initialize_dec); + + return pbi; +} + +vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) +{ + VP8_COMMON *cm = &pbi->common; + int ref_fb_idx; + + if (ref_frame_flag == VP8_LAST_FRAME) + ref_fb_idx = cm->lst_fb_idx; + else if (ref_frame_flag == VP8_GOLD_FRAME) + ref_fb_idx = cm->gld_fb_idx; + else if (ref_frame_flag == VP8_ALTR_FRAME) + ref_fb_idx = cm->alt_fb_idx; + else{ + vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, + "Invalid reference frame"); + return pbi->common.error.error_code; + } + + if(cm->yv12_fb[ref_fb_idx].y_height != sd->y_height || + cm->yv12_fb[ref_fb_idx].y_width != sd->y_width || + cm->yv12_fb[ref_fb_idx].uv_height != sd->uv_height || + cm->yv12_fb[ref_fb_idx].uv_width != sd->uv_width){ + vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, + "Incorrect buffer dimensions"); + } + else + vp8_yv12_copy_frame(&cm->yv12_fb[ref_fb_idx], sd); + + return pbi->common.error.error_code; +} + + +vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) +{ + VP8_COMMON *cm = &pbi->common; + int *ref_fb_ptr = NULL; + int free_fb; + + if (ref_frame_flag == VP8_LAST_FRAME) + ref_fb_ptr = &cm->lst_fb_idx; + else if (ref_frame_flag == VP8_GOLD_FRAME) + ref_fb_ptr = &cm->gld_fb_idx; + else if (ref_frame_flag == VP8_ALTR_FRAME) + ref_fb_ptr = &cm->alt_fb_idx; + else{ + vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, + "Invalid reference frame"); + return pbi->common.error.error_code; + } + + if(cm->yv12_fb[*ref_fb_ptr].y_height != sd->y_height || + cm->yv12_fb[*ref_fb_ptr].y_width != sd->y_width || + cm->yv12_fb[*ref_fb_ptr].uv_height != sd->uv_height || + cm->yv12_fb[*ref_fb_ptr].uv_width != sd->uv_width){ + vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, + "Incorrect buffer dimensions"); + } + else{ + /* Find an empty frame buffer. */ + free_fb = get_free_fb(cm); + /* Decrease fb_idx_ref_cnt since it will be increased again in + * ref_cnt_fb() below. */ + cm->fb_idx_ref_cnt[free_fb]--; + + /* Manage the reference counters and copy image. */ + ref_cnt_fb (cm->fb_idx_ref_cnt, ref_fb_ptr, free_fb); + vp8_yv12_copy_frame(sd, &cm->yv12_fb[*ref_fb_ptr]); + } + + return pbi->common.error.error_code; +} + +static int get_free_fb (VP8_COMMON *cm) +{ + int i; + for (i = 0; i < NUM_YV12_BUFFERS; i++) + if (cm->fb_idx_ref_cnt[i] == 0) + break; + + assert(i < NUM_YV12_BUFFERS); + cm->fb_idx_ref_cnt[i] = 1; + return i; +} + +static void ref_cnt_fb (int *buf, int *idx, int new_idx) +{ + if (buf[*idx] > 0) + buf[*idx]--; + + *idx = new_idx; + + buf[new_idx]++; +} + +/* If any buffer copy / swapping is signalled it should be done here. */ +static int swap_frame_buffers (VP8_COMMON *cm) +{ + int err = 0; + + /* The alternate reference frame or golden frame can be updated + * using the new, last, or golden/alt ref frame. If it + * is updated using the newly decoded frame it is a refresh. + * An update using the last or golden/alt ref frame is a copy. + */ + if (cm->copy_buffer_to_arf) + { + int new_fb = 0; + + if (cm->copy_buffer_to_arf == 1) + new_fb = cm->lst_fb_idx; + else if (cm->copy_buffer_to_arf == 2) + new_fb = cm->gld_fb_idx; + else + err = -1; + + ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb); + } + + if (cm->copy_buffer_to_gf) + { + int new_fb = 0; + + if (cm->copy_buffer_to_gf == 1) + new_fb = cm->lst_fb_idx; + else if (cm->copy_buffer_to_gf == 2) + new_fb = cm->alt_fb_idx; + else + err = -1; + + ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb); + } + + if (cm->refresh_golden_frame) + ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx); + + if (cm->refresh_alt_ref_frame) + ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx); + + if (cm->refresh_last_frame) + { + ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx); + + cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx]; + } + else + cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; + + cm->fb_idx_ref_cnt[cm->new_fb_idx]--; + + return err; +} + +static int check_fragments_for_errors(VP8D_COMP *pbi) +{ + if (!pbi->ec_active && + pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0) + { + VP8_COMMON *cm = &pbi->common; + + /* If error concealment is disabled we won't signal missing frames + * to the decoder. + */ + if (cm->fb_idx_ref_cnt[cm->lst_fb_idx] > 1) + { + /* The last reference shares buffer with another reference + * buffer. Move it to its own buffer before setting it as + * corrupt, otherwise we will make multiple buffers corrupt. + */ + const int prev_idx = cm->lst_fb_idx; + cm->fb_idx_ref_cnt[prev_idx]--; + cm->lst_fb_idx = get_free_fb(cm); + vp8_yv12_copy_frame(&cm->yv12_fb[prev_idx], + &cm->yv12_fb[cm->lst_fb_idx]); + } + /* This is used to signal that we are missing frames. + * We do not know if the missing frame(s) was supposed to update + * any of the reference buffers, but we act conservative and + * mark only the last buffer as corrupted. + */ + cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; + + /* Signal that we have no frame to show. */ + cm->show_frame = 0; + + /* Nothing more to do. */ + return 0; + } + + return 1; +} + +int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, + const uint8_t *source, + int64_t time_stamp) +{ + VP8_COMMON *cm = &pbi->common; + int retcode = -1; + (void)size; + (void)source; + + pbi->common.error.error_code = VPX_CODEC_OK; + + retcode = check_fragments_for_errors(pbi); + if(retcode <= 0) + return retcode; + + cm->new_fb_idx = get_free_fb (cm); + + /* setup reference frames for vp8_decode_frame */ + pbi->dec_fb_ref[INTRA_FRAME] = &cm->yv12_fb[cm->new_fb_idx]; + pbi->dec_fb_ref[LAST_FRAME] = &cm->yv12_fb[cm->lst_fb_idx]; + pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx]; + pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx]; + + if (setjmp(pbi->common.error.jmp)) + { + /* We do not know if the missing frame(s) was supposed to update + * any of the reference buffers, but we act conservative and + * mark only the last buffer as corrupted. + */ + cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; + + if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) + cm->fb_idx_ref_cnt[cm->new_fb_idx]--; + + goto decode_exit; + } + + pbi->common.error.setjmp = 1; + + retcode = vp8_decode_frame(pbi); + + if (retcode < 0) + { + if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) + cm->fb_idx_ref_cnt[cm->new_fb_idx]--; + + pbi->common.error.error_code = VPX_CODEC_ERROR; + goto decode_exit; + } + + if (swap_frame_buffers (cm)) + { + pbi->common.error.error_code = VPX_CODEC_ERROR; + goto decode_exit; + } + + vp8_clear_system_state(); + + if (cm->show_frame) + { + cm->current_video_frame++; + cm->show_frame_mi = cm->mi; + } + + #if CONFIG_ERROR_CONCEALMENT + /* swap the mode infos to storage for future error concealment */ + if (pbi->ec_enabled && pbi->common.prev_mi) + { + MODE_INFO* tmp = pbi->common.prev_mi; + int row, col; + pbi->common.prev_mi = pbi->common.mi; + pbi->common.mi = tmp; + + /* Propagate the segment_ids to the next frame */ + for (row = 0; row < pbi->common.mb_rows; ++row) + { + for (col = 0; col < pbi->common.mb_cols; ++col) + { + const int i = row*pbi->common.mode_info_stride + col; + pbi->common.mi[i].mbmi.segment_id = + pbi->common.prev_mi[i].mbmi.segment_id; + } + } + } +#endif + + pbi->ready_for_new_data = 0; + pbi->last_time_stamp = time_stamp; + +decode_exit: + pbi->common.error.setjmp = 0; + vp8_clear_system_state(); + return retcode; +} +int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags) +{ + int ret = -1; + + if (pbi->ready_for_new_data == 1) + return ret; + + /* ie no raw frame to show!!! */ + if (pbi->common.show_frame == 0) + return ret; + + pbi->ready_for_new_data = 1; + *time_stamp = pbi->last_time_stamp; + *time_end_stamp = 0; + +#if CONFIG_POSTPROC + ret = vp8_post_proc_frame(&pbi->common, sd, flags); +#else + (void)flags; + + if (pbi->common.frame_to_show) + { + *sd = *pbi->common.frame_to_show; + sd->y_width = pbi->common.Width; + sd->y_height = pbi->common.Height; + sd->uv_height = pbi->common.Height / 2; + ret = 0; + } + else + { + ret = -1; + } + +#endif /*!CONFIG_POSTPROC*/ + vp8_clear_system_state(); + return ret; +} + + +/* This function as written isn't decoder specific, but the encoder has + * much faster ways of computing this, so it's ok for it to live in a + * decode specific file. + */ +int vp8dx_references_buffer( VP8_COMMON *oci, int ref_frame ) +{ + const MODE_INFO *mi = oci->mi; + int mb_row, mb_col; + + for (mb_row = 0; mb_row < oci->mb_rows; mb_row++) + { + for (mb_col = 0; mb_col < oci->mb_cols; mb_col++,mi++) + { + if( mi->mbmi.ref_frame == ref_frame) + return 1; + } + mi++; + } + return 0; + +} + +int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf) +{ + if(!fb->use_frame_threads) + { + /* decoder instance for single thread mode */ + fb->pbi[0] = create_decompressor(oxcf); + if(!fb->pbi[0]) + return VPX_CODEC_ERROR; + +#if CONFIG_MULTITHREAD + /* enable row-based threading only when use_frame_threads + * is disabled */ + fb->pbi[0]->max_threads = oxcf->max_threads; + vp8_decoder_create_threads(fb->pbi[0]); +#endif + } + else + { + /* TODO : create frame threads and decoder instances for each + * thread here */ + } + + return VPX_CODEC_OK; +} + +int vp8_remove_decoder_instances(struct frame_buffers *fb) +{ + if(!fb->use_frame_threads) + { + VP8D_COMP *pbi = fb->pbi[0]; + + if (!pbi) + return VPX_CODEC_ERROR; +#if CONFIG_MULTITHREAD + if (pbi->b_multithreaded_rd) + vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); + vp8_decoder_remove_threads(pbi); +#endif + + /* decoder instance for single thread mode */ + remove_decompressor(pbi); + } + else + { + /* TODO : remove frame threads and decoder instances for each + * thread here */ + } + + return VPX_CODEC_OK; +} diff --git a/thirdparty/libvpx/vp8/decoder/onyxd_int.h b/thirdparty/libvpx/vp8/decoder/onyxd_int.h new file mode 100644 index 00000000000..313fe01c077 --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/onyxd_int.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_DECODER_ONYXD_INT_H_ +#define VP8_DECODER_ONYXD_INT_H_ + +#include "vpx_config.h" +#include "vp8/common/onyxd.h" +#include "treereader.h" +#include "vp8/common/onyxc_int.h" +#include "vp8/common/threading.h" + +#if CONFIG_ERROR_CONCEALMENT +#include "ec_types.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct +{ + int ithread; + void *ptr1; + void *ptr2; +} DECODETHREAD_DATA; + +typedef struct +{ + MACROBLOCKD mbd; +} MB_ROW_DEC; + + +typedef struct +{ + int enabled; + unsigned int count; + const unsigned char *ptrs[MAX_PARTITIONS]; + unsigned int sizes[MAX_PARTITIONS]; +} FRAGMENT_DATA; + +#define MAX_FB_MT_DEC 32 + +struct frame_buffers +{ + /* + * this struct will be populated with frame buffer management + * info in future commits. */ + + /* enable/disable frame-based threading */ + int use_frame_threads; + + /* decoder instances */ + struct VP8D_COMP *pbi[MAX_FB_MT_DEC]; + +}; + +typedef struct VP8D_COMP +{ + DECLARE_ALIGNED(16, MACROBLOCKD, mb); + + YV12_BUFFER_CONFIG *dec_fb_ref[NUM_YV12_BUFFERS]; + + DECLARE_ALIGNED(16, VP8_COMMON, common); + + /* the last partition will be used for the modes/mvs */ + vp8_reader mbc[MAX_PARTITIONS]; + + VP8D_CONFIG oxcf; + + FRAGMENT_DATA fragments; + +#if CONFIG_MULTITHREAD + /* variable for threading */ + + int b_multithreaded_rd; + int max_threads; + int current_mb_col_main; + unsigned int decoding_thread_count; + int allocated_decoding_thread_count; + + int mt_baseline_filter_level[MAX_MB_SEGMENTS]; + int sync_range; + int *mt_current_mb_col; /* Each row remembers its already decoded column. */ + pthread_mutex_t *pmutex; + pthread_mutex_t mt_mutex; /* mutex for b_multithreaded_rd */ + + unsigned char **mt_yabove_row; /* mb_rows x width */ + unsigned char **mt_uabove_row; + unsigned char **mt_vabove_row; + unsigned char **mt_yleft_col; /* mb_rows x 16 */ + unsigned char **mt_uleft_col; /* mb_rows x 8 */ + unsigned char **mt_vleft_col; /* mb_rows x 8 */ + + MB_ROW_DEC *mb_row_di; + DECODETHREAD_DATA *de_thread_data; + + pthread_t *h_decoding_thread; + sem_t *h_event_start_decoding; + sem_t h_event_end_decoding; + /* end of threading data */ +#endif + + int64_t last_time_stamp; + int ready_for_new_data; + + vp8_prob prob_intra; + vp8_prob prob_last; + vp8_prob prob_gf; + vp8_prob prob_skip_false; + +#if CONFIG_ERROR_CONCEALMENT + MB_OVERLAP *overlaps; + /* the mb num from which modes and mvs (first partition) are corrupt */ + unsigned int mvs_corrupt_from_mb; +#endif + int ec_enabled; + int ec_active; + int decoded_key_frame; + int independent_partitions; + int frame_corrupt_residual; + + vpx_decrypt_cb decrypt_cb; + void *decrypt_state; +} VP8D_COMP; + +int vp8_decode_frame(VP8D_COMP *cpi); + +int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf); +int vp8_remove_decoder_instances(struct frame_buffers *fb); + +#if CONFIG_DEBUG +#define CHECK_MEM_ERROR(lval,expr) do {\ + lval = (expr); \ + if(!lval) \ + vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,\ + "Failed to allocate "#lval" at %s:%d", \ + __FILE__,__LINE__);\ + } while(0) +#else +#define CHECK_MEM_ERROR(lval,expr) do {\ + lval = (expr); \ + if(!lval) \ + vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,\ + "Failed to allocate "#lval);\ + } while(0) +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_DECODER_ONYXD_INT_H_ diff --git a/thirdparty/libvpx/vp8/decoder/threading.c b/thirdparty/libvpx/vp8/decoder/threading.c new file mode 100644 index 00000000000..3c1b8387ec9 --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/threading.c @@ -0,0 +1,928 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1 +# include +#endif +#include "onyxd_int.h" +#include "vpx_mem/vpx_mem.h" +#include "vp8/common/threading.h" + +#include "vp8/common/loopfilter.h" +#include "vp8/common/extend.h" +#include "vpx_ports/vpx_timer.h" +#include "detokenize.h" +#include "vp8/common/reconintra4x4.h" +#include "vp8/common/reconinter.h" +#include "vp8/common/reconintra.h" +#include "vp8/common/setupintrarecon.h" +#if CONFIG_ERROR_CONCEALMENT +#include "error_concealment.h" +#endif + +#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n))) +#define CALLOC_ARRAY_ALIGNED(p, n, algn) do { \ + CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \ + memset((p), 0, (n) * sizeof(*(p))); \ +} while (0) + + +void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); + +static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) +{ + VP8_COMMON *const pc = & pbi->common; + int i; + + for (i = 0; i < count; i++) + { + MACROBLOCKD *mbd = &mbrd[i].mbd; + mbd->subpixel_predict = xd->subpixel_predict; + mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; + mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; + mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; + + mbd->frame_type = pc->frame_type; + mbd->pre = xd->pre; + mbd->dst = xd->dst; + + mbd->segmentation_enabled = xd->segmentation_enabled; + mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; + memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); + + /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/ + memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); + /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/ + memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas)); + /*unsigned char mode_ref_lf_delta_enabled; + unsigned char mode_ref_lf_delta_update;*/ + mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; + mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; + + mbd->current_bc = &pbi->mbc[0]; + + memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); + memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); + memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); + memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); + + mbd->fullpixel_mask = 0xffffffff; + + if (pc->full_pixel) + mbd->fullpixel_mask = 0xfffffff8; + + } + + for (i = 0; i < pc->mb_rows; i++) + pbi->mt_current_mb_col[i] = -1; +} + +static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, + unsigned int mb_idx) +{ + MB_PREDICTION_MODE mode; + int i; +#if CONFIG_ERROR_CONCEALMENT + int corruption_detected = 0; +#else + (void)mb_idx; +#endif + + if (xd->mode_info_context->mbmi.mb_skip_coeff) + { + vp8_reset_mb_tokens_context(xd); + } + else if (!vp8dx_bool_error(xd->current_bc)) + { + int eobtotal; + eobtotal = vp8_decode_mb_tokens(pbi, xd); + + /* Special case: Force the loopfilter to skip when eobtotal is zero */ + xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0); + } + + mode = xd->mode_info_context->mbmi.mode; + + if (xd->segmentation_enabled) + vp8_mb_init_dequantizer(pbi, xd); + + +#if CONFIG_ERROR_CONCEALMENT + + if(pbi->ec_active) + { + int throw_residual; + /* When we have independent partitions we can apply residual even + * though other partitions within the frame are corrupt. + */ + throw_residual = (!pbi->independent_partitions && + pbi->frame_corrupt_residual); + throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); + + if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) + { + /* MB with corrupt residuals or corrupt mode/motion vectors. + * Better to use the predictor as reconstruction. + */ + pbi->frame_corrupt_residual = 1; + memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); + + corruption_detected = 1; + + /* force idct to be skipped for B_PRED and use the + * prediction only for reconstruction + * */ + memset(xd->eobs, 0, 25); + } + } +#endif + + /* do prediction */ + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) + { + vp8_build_intra_predictors_mbuv_s(xd, + xd->recon_above[1], + xd->recon_above[2], + xd->recon_left[1], + xd->recon_left[2], + xd->recon_left_stride[1], + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride); + + if (mode != B_PRED) + { + vp8_build_intra_predictors_mby_s(xd, + xd->recon_above[0], + xd->recon_left[0], + xd->recon_left_stride[0], + xd->dst.y_buffer, + xd->dst.y_stride); + } + else + { + short *DQC = xd->dequant_y1; + int dst_stride = xd->dst.y_stride; + + /* clear out residual eob info */ + if(xd->mode_info_context->mbmi.mb_skip_coeff) + memset(xd->eobs, 0, 25); + + intra_prediction_down_copy(xd, xd->recon_above[0] + 16); + + for (i = 0; i < 16; i++) + { + BLOCKD *b = &xd->block[i]; + unsigned char *dst = xd->dst.y_buffer + b->offset; + B_PREDICTION_MODE b_mode = + xd->mode_info_context->bmi[i].as_mode; + unsigned char *Above; + unsigned char *yleft; + int left_stride; + unsigned char top_left; + + /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/ + if (i < 4 && pbi->common.filter_level) + Above = xd->recon_above[0] + b->offset; + else + Above = dst - dst_stride; + + if (i%4==0 && pbi->common.filter_level) + { + yleft = xd->recon_left[0] + i; + left_stride = 1; + } + else + { + yleft = dst - 1; + left_stride = dst_stride; + } + + if ((i==4 || i==8 || i==12) && pbi->common.filter_level) + top_left = *(xd->recon_left[0] + i - 1); + else + top_left = Above[-1]; + + vp8_intra4x4_predict(Above, yleft, left_stride, + b_mode, dst, dst_stride, top_left); + + if (xd->eobs[i] ) + { + if (xd->eobs[i] > 1) + { + vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); + } + else + { + vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], + dst, dst_stride, dst, dst_stride); + memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); + } + } + } + } + } + else + { + vp8_build_inter_predictors_mb(xd); + } + + +#if CONFIG_ERROR_CONCEALMENT + if (corruption_detected) + { + return; + } +#endif + + if(!xd->mode_info_context->mbmi.mb_skip_coeff) + { + /* dequantization and idct */ + if (mode != B_PRED) + { + short *DQC = xd->dequant_y1; + + if (mode != SPLITMV) + { + BLOCKD *b = &xd->block[24]; + + /* do 2nd order transform on the dc block */ + if (xd->eobs[24] > 1) + { + vp8_dequantize_b(b, xd->dequant_y2); + + vp8_short_inv_walsh4x4(&b->dqcoeff[0], + xd->qcoeff); + memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); + } + else + { + b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; + vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], + xd->qcoeff); + memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); + } + + /* override the dc dequant constant in order to preserve the + * dc components + */ + DQC = xd->dequant_y1_dc; + } + + vp8_dequant_idct_add_y_block + (xd->qcoeff, DQC, + xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs); + } + + vp8_dequant_idct_add_uv_block + (xd->qcoeff+16*16, xd->dequant_uv, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs+16); + } +} + +static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) +{ + const int *last_row_current_mb_col; + int *current_mb_col; + int mb_row; + VP8_COMMON *pc = &pbi->common; + const int nsync = pbi->sync_range; + const int first_row_no_sync_above = pc->mb_cols + nsync; + int num_part = 1 << pbi->common.multi_token_partition; + int last_mb_row = start_mb_row; + + YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; + YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME]; + + int recon_y_stride = yv12_fb_new->y_stride; + int recon_uv_stride = yv12_fb_new->uv_stride; + + unsigned char *ref_buffer[MAX_REF_FRAMES][3]; + unsigned char *dst_buffer[3]; + int i; + int ref_fb_corrupted[MAX_REF_FRAMES]; + + ref_fb_corrupted[INTRA_FRAME] = 0; + + for(i = 1; i < MAX_REF_FRAMES; i++) + { + YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; + + ref_buffer[i][0] = this_fb->y_buffer; + ref_buffer[i][1] = this_fb->u_buffer; + ref_buffer[i][2] = this_fb->v_buffer; + + ref_fb_corrupted[i] = this_fb->corrupted; + } + + dst_buffer[0] = yv12_fb_new->y_buffer; + dst_buffer[1] = yv12_fb_new->u_buffer; + dst_buffer[2] = yv12_fb_new->v_buffer; + + xd->up_available = (start_mb_row != 0); + + xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row; + xd->mode_info_stride = pc->mode_info_stride; + + for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) + { + int recon_yoffset, recon_uvoffset; + int mb_col; + int filter_level; + loop_filter_info_n *lfi_n = &pc->lf_info; + + /* save last row processed by this thread */ + last_mb_row = mb_row; + /* select bool coder for current partition */ + xd->current_bc = &pbi->mbc[mb_row%num_part]; + + if (mb_row > 0) + last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1]; + else + last_row_current_mb_col = &first_row_no_sync_above; + + current_mb_col = &pbi->mt_current_mb_col[mb_row]; + + recon_yoffset = mb_row * recon_y_stride * 16; + recon_uvoffset = mb_row * recon_uv_stride * 8; + + /* reset contexts */ + xd->above_context = pc->above_context; + memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + + xd->left_available = 0; + + xd->mb_to_top_edge = -((mb_row * 16)) << 3; + xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; + + if (pbi->common.filter_level) + { + xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0*16 +32; + xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0*8 +16; + xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0*8 +16; + + xd->recon_left[0] = pbi->mt_yleft_col[mb_row]; + xd->recon_left[1] = pbi->mt_uleft_col[mb_row]; + xd->recon_left[2] = pbi->mt_vleft_col[mb_row]; + + /* TODO: move to outside row loop */ + xd->recon_left_stride[0] = 1; + xd->recon_left_stride[1] = 1; + } + else + { + xd->recon_above[0] = dst_buffer[0] + recon_yoffset; + xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; + xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; + + xd->recon_left[0] = xd->recon_above[0] - 1; + xd->recon_left[1] = xd->recon_above[1] - 1; + xd->recon_left[2] = xd->recon_above[2] - 1; + + xd->recon_above[0] -= xd->dst.y_stride; + xd->recon_above[1] -= xd->dst.uv_stride; + xd->recon_above[2] -= xd->dst.uv_stride; + + /* TODO: move to outside row loop */ + xd->recon_left_stride[0] = xd->dst.y_stride; + xd->recon_left_stride[1] = xd->dst.uv_stride; + + setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], + xd->recon_left[2], xd->dst.y_stride, + xd->dst.uv_stride); + } + + for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) { + if (((mb_col - 1) % nsync) == 0) { + pthread_mutex_t *mutex = &pbi->pmutex[mb_row]; + protected_write(mutex, current_mb_col, mb_col - 1); + } + + if (mb_row && !(mb_col & (nsync - 1))) { + pthread_mutex_t *mutex = &pbi->pmutex[mb_row-1]; + sync_read(mutex, mb_col, last_row_current_mb_col, nsync); + } + + /* Distance of MB to the various image edges. + * These are specified to 8th pel as they are always + * compared to values that are in 1/8th pel units. + */ + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; + + #if CONFIG_ERROR_CONCEALMENT + { + int corrupt_residual = + (!pbi->independent_partitions && + pbi->frame_corrupt_residual) || + vp8dx_bool_error(xd->current_bc); + if (pbi->ec_active && + (xd->mode_info_context->mbmi.ref_frame == + INTRA_FRAME) && + corrupt_residual) + { + /* We have an intra block with corrupt + * coefficients, better to conceal with an inter + * block. + * Interpolate MVs from neighboring MBs + * + * Note that for the first mb with corrupt + * residual in a frame, we might not discover + * that before decoding the residual. That + * happens after this check, and therefore no + * inter concealment will be done. + */ + vp8_interpolate_motion(xd, + mb_row, mb_col, + pc->mb_rows, pc->mb_cols); + } + } + #endif + + + xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; + xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; + xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; + + xd->pre.y_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][0] + recon_yoffset; + xd->pre.u_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][1] + recon_uvoffset; + xd->pre.v_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][2] + recon_uvoffset; + + /* propagate errors from reference frames */ + xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; + + mt_decode_macroblock(pbi, xd, 0); + + xd->left_available = 1; + + /* check if the boolean decoder has suffered an error */ + xd->corrupted |= vp8dx_bool_error(xd->current_bc); + + xd->recon_above[0] += 16; + xd->recon_above[1] += 8; + xd->recon_above[2] += 8; + + if (!pbi->common.filter_level) + { + xd->recon_left[0] += 16; + xd->recon_left[1] += 8; + xd->recon_left[2] += 8; + } + + if (pbi->common.filter_level) + { + int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED && + xd->mode_info_context->mbmi.mode != SPLITMV && + xd->mode_info_context->mbmi.mb_skip_coeff); + + const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode]; + const int seg = xd->mode_info_context->mbmi.segment_id; + const int ref_frame = xd->mode_info_context->mbmi.ref_frame; + + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if( mb_row != pc->mb_rows-1 ) + { + /* Save decoded MB last row data for next-row decoding */ + memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16); + memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8); + memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8); + } + + /* save left_col for next MB decoding */ + if(mb_col != pc->mb_cols-1) + { + MODE_INFO *next = xd->mode_info_context +1; + + if (next->mbmi.ref_frame == INTRA_FRAME) + { + for (i = 0; i < 16; i++) + pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15]; + for (i = 0; i < 8; i++) + { + pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7]; + pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7]; + } + } + } + + /* loopfilter on this macroblock. */ + if (filter_level) + { + if(pc->filter_type == NORMAL_LOOPFILTER) + { + loop_filter_info lfi; + FRAME_TYPE frame_type = pc->frame_type; + const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; + lfi.mblim = lfi_n->mblim[filter_level]; + lfi.blim = lfi_n->blim[filter_level]; + lfi.lim = lfi_n->lim[filter_level]; + lfi.hev_thr = lfi_n->hev_thr[hev_index]; + + if (mb_col > 0) + vp8_loop_filter_mbv + (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); + + if (!skip_lf) + vp8_loop_filter_bv + (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_mbh + (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); + + if (!skip_lf) + vp8_loop_filter_bh + (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); + } + else + { + if (mb_col > 0) + vp8_loop_filter_simple_mbv + (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bv + (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_simple_mbh + (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bh + (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); + } + } + + } + + recon_yoffset += 16; + recon_uvoffset += 8; + + ++xd->mode_info_context; /* next mb */ + + xd->above_context++; + } + + /* adjust to the next row of mbs */ + if (pbi->common.filter_level) + { + if(mb_row != pc->mb_rows-1) + { + int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS; + int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1); + + for (i = 0; i < 4; i++) + { + pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1]; + pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1]; + pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1]; + } + } + } + else + vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); + + /* last MB of row is ready just after extension is done */ + protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync); + + ++xd->mode_info_context; /* skip prediction column */ + xd->up_available = 1; + + /* since we have multithread */ + xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; + } + + /* signal end of frame decoding if this thread processed the last mb_row */ + if (last_mb_row == (pc->mb_rows - 1)) + sem_post(&pbi->h_event_end_decoding); + +} + + +static THREAD_FUNCTION thread_decoding_proc(void *p_data) +{ + int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; + VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); + MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); + ENTROPY_CONTEXT_PLANES mb_row_left_context; + + while (1) + { + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) + break; + + if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) + { + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) + break; + else + { + MACROBLOCKD *xd = &mbrd->mbd; + xd->left_context = &mb_row_left_context; + + mt_decode_mb_rows(pbi, xd, ithread+1); + } + } + } + + return 0 ; +} + + +void vp8_decoder_create_threads(VP8D_COMP *pbi) +{ + int core_count = 0; + unsigned int ithread; + + pbi->b_multithreaded_rd = 0; + pbi->allocated_decoding_thread_count = 0; + pthread_mutex_init(&pbi->mt_mutex, NULL); + + /* limit decoding threads to the max number of token partitions */ + core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads; + + /* limit decoding threads to the available cores */ + if (core_count > pbi->common.processor_core_count) + core_count = pbi->common.processor_core_count; + + if (core_count > 1) + { + pbi->b_multithreaded_rd = 1; + pbi->decoding_thread_count = core_count - 1; + + CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count); + CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count); + CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32); + CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count); + + for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++) + { + sem_init(&pbi->h_event_start_decoding[ithread], 0, 0); + + vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd); + + pbi->de_thread_data[ithread].ithread = ithread; + pbi->de_thread_data[ithread].ptr1 = (void *)pbi; + pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread]; + + pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread])); + } + + sem_init(&pbi->h_event_end_decoding, 0, 0); + + pbi->allocated_decoding_thread_count = pbi->decoding_thread_count; + } +} + + +void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) +{ + int i; + + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) + { + /* De-allocate mutex */ + if (pbi->pmutex != NULL) { + for (i = 0; i < mb_rows; i++) { + pthread_mutex_destroy(&pbi->pmutex[i]); + } + vpx_free(pbi->pmutex); + pbi->pmutex = NULL; + } + + vpx_free(pbi->mt_current_mb_col); + pbi->mt_current_mb_col = NULL ; + + /* Free above_row buffers. */ + if (pbi->mt_yabove_row) + { + for (i=0; i< mb_rows; i++) + { + vpx_free(pbi->mt_yabove_row[i]); + pbi->mt_yabove_row[i] = NULL ; + } + vpx_free(pbi->mt_yabove_row); + pbi->mt_yabove_row = NULL ; + } + + if (pbi->mt_uabove_row) + { + for (i=0; i< mb_rows; i++) + { + vpx_free(pbi->mt_uabove_row[i]); + pbi->mt_uabove_row[i] = NULL ; + } + vpx_free(pbi->mt_uabove_row); + pbi->mt_uabove_row = NULL ; + } + + if (pbi->mt_vabove_row) + { + for (i=0; i< mb_rows; i++) + { + vpx_free(pbi->mt_vabove_row[i]); + pbi->mt_vabove_row[i] = NULL ; + } + vpx_free(pbi->mt_vabove_row); + pbi->mt_vabove_row = NULL ; + } + + /* Free left_col buffers. */ + if (pbi->mt_yleft_col) + { + for (i=0; i< mb_rows; i++) + { + vpx_free(pbi->mt_yleft_col[i]); + pbi->mt_yleft_col[i] = NULL ; + } + vpx_free(pbi->mt_yleft_col); + pbi->mt_yleft_col = NULL ; + } + + if (pbi->mt_uleft_col) + { + for (i=0; i< mb_rows; i++) + { + vpx_free(pbi->mt_uleft_col[i]); + pbi->mt_uleft_col[i] = NULL ; + } + vpx_free(pbi->mt_uleft_col); + pbi->mt_uleft_col = NULL ; + } + + if (pbi->mt_vleft_col) + { + for (i=0; i< mb_rows; i++) + { + vpx_free(pbi->mt_vleft_col[i]); + pbi->mt_vleft_col[i] = NULL ; + } + vpx_free(pbi->mt_vleft_col); + pbi->mt_vleft_col = NULL ; + } + } +} + + +void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) +{ + VP8_COMMON *const pc = & pbi->common; + int i; + int uv_width; + + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) + { + vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows); + + /* our internal buffers are always multiples of 16 */ + if ((width & 0xf) != 0) + width += 16 - (width & 0xf); + + if (width < 640) pbi->sync_range = 1; + else if (width <= 1280) pbi->sync_range = 8; + else if (width <= 2560) pbi->sync_range =16; + else pbi->sync_range = 32; + + uv_width = width >>1; + + /* Allocate mutex */ + CHECK_MEM_ERROR(pbi->pmutex, vpx_malloc(sizeof(*pbi->pmutex) * + pc->mb_rows)); + if (pbi->pmutex) { + for (i = 0; i < pc->mb_rows; i++) { + pthread_mutex_init(&pbi->pmutex[i], NULL); + } + } + + /* Allocate an int for each mb row. */ + CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows); + + /* Allocate memory for above_row buffers. */ + CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)))); + + CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); + + CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); + + /* Allocate memory for left_col buffers. */ + CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1)); + + CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); + + CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows); + for (i = 0; i < pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); + } +} + + +void vp8_decoder_remove_threads(VP8D_COMP *pbi) +{ + /* shutdown MB Decoding thread; */ + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) + { + int i; + + protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0); + + /* allow all threads to exit */ + for (i = 0; i < pbi->allocated_decoding_thread_count; i++) + { + sem_post(&pbi->h_event_start_decoding[i]); + pthread_join(pbi->h_decoding_thread[i], NULL); + } + + for (i = 0; i < pbi->allocated_decoding_thread_count; i++) + { + sem_destroy(&pbi->h_event_start_decoding[i]); + } + + sem_destroy(&pbi->h_event_end_decoding); + + vpx_free(pbi->h_decoding_thread); + pbi->h_decoding_thread = NULL; + + vpx_free(pbi->h_event_start_decoding); + pbi->h_event_start_decoding = NULL; + + vpx_free(pbi->mb_row_di); + pbi->mb_row_di = NULL ; + + vpx_free(pbi->de_thread_data); + pbi->de_thread_data = NULL; + } + pthread_mutex_destroy(&pbi->mt_mutex); +} + +void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) +{ + VP8_COMMON *pc = &pbi->common; + unsigned int i; + int j; + + int filter_level = pc->filter_level; + YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; + + if (filter_level) + { + /* Set above_row buffer to 127 for decoding first MB row */ + memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5); + memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); + memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); + + for (j=1; jmb_rows; j++) + { + memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); + memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); + memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); + } + + /* Set left_col to 129 initially */ + for (j=0; jmb_rows; j++) + { + memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); + memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); + memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); + } + + /* Initialize the loop filter for this frame. */ + vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level); + } + else + vp8_setup_intra_recon_top_line(yv12_fb_new); + + setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count); + + for (i = 0; i < pbi->decoding_thread_count; i++) + sem_post(&pbi->h_event_start_decoding[i]); + + mt_decode_mb_rows(pbi, xd, 0); + + sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ +} diff --git a/thirdparty/libvpx/vp8/decoder/treereader.h b/thirdparty/libvpx/vp8/decoder/treereader.h new file mode 100644 index 00000000000..f7d23c36989 --- /dev/null +++ b/thirdparty/libvpx/vp8/decoder/treereader.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_DECODER_TREEREADER_H_ +#define VP8_DECODER_TREEREADER_H_ + +#include "./vpx_config.h" +#include "vp8/common/treecoder.h" +#include "dboolhuff.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef BOOL_DECODER vp8_reader; + +#define vp8_read vp8dx_decode_bool +#define vp8_read_literal vp8_decode_value +#define vp8_read_bit(R) vp8_read(R, vp8_prob_half) + + +/* Intent of tree data structure is to make decoding trivial. */ + +static INLINE int vp8_treed_read( + vp8_reader *const r, /* !!! must return a 0 or 1 !!! */ + vp8_tree t, + const vp8_prob *const p +) +{ + register vp8_tree_index i = 0; + + while ((i = t[ i + vp8_read(r, p[i>>1])]) > 0) ; + + return -i; +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_DECODER_TREEREADER_H_ diff --git a/thirdparty/libvpx/vp8/vp8_dx_iface.c b/thirdparty/libvpx/vp8/vp8_dx_iface.c new file mode 100644 index 00000000000..fc9288d62b6 --- /dev/null +++ b/thirdparty/libvpx/vp8/vp8_dx_iface.c @@ -0,0 +1,828 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include +#include +#include "./vp8_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "./vpx_scale_rtcd.h" +#include "vpx/vpx_decoder.h" +#include "vpx/vp8dx.h" +#include "vpx/internal/vpx_codec_internal.h" +#include "vpx_version.h" +#include "common/alloccommon.h" +#include "common/common.h" +#include "common/onyxd.h" +#include "decoder/onyxd_int.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" +#if CONFIG_ERROR_CONCEALMENT +#include "decoder/error_concealment.h" +#endif +#include "decoder/decoderthreading.h" + +#define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) +#define VP8_CAP_ERROR_CONCEALMENT (CONFIG_ERROR_CONCEALMENT ? \ + VPX_CODEC_CAP_ERROR_CONCEALMENT : 0) + +typedef vpx_codec_stream_info_t vp8_stream_info_t; + +/* Structures for handling memory allocations */ +typedef enum +{ + VP8_SEG_ALG_PRIV = 256, + VP8_SEG_MAX +} mem_seg_id_t; +#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0]))) + +struct vpx_codec_alg_priv +{ + vpx_codec_priv_t base; + vpx_codec_dec_cfg_t cfg; + vp8_stream_info_t si; + int decoder_init; + int postproc_cfg_set; + vp8_postproc_cfg_t postproc_cfg; +#if CONFIG_POSTPROC_VISUALIZER + unsigned int dbg_postproc_flag; + int dbg_color_ref_frame_flag; + int dbg_color_mb_modes_flag; + int dbg_color_b_modes_flag; + int dbg_display_mv_flag; +#endif + vpx_decrypt_cb decrypt_cb; + void *decrypt_state; + vpx_image_t img; + int img_setup; + struct frame_buffers yv12_frame_buffers; + void *user_priv; + FRAGMENT_DATA fragments; +}; + +static int vp8_init_ctx(vpx_codec_ctx_t *ctx) +{ + vpx_codec_alg_priv_t *priv = + (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); + if (!priv) return 1; + + ctx->priv = (vpx_codec_priv_t *)priv; + ctx->priv->init_flags = ctx->init_flags; + + priv->si.sz = sizeof(priv->si); + priv->decrypt_cb = NULL; + priv->decrypt_state = NULL; + + if (ctx->config.dec) + { + /* Update the reference to the config structure to an internal copy. */ + priv->cfg = *ctx->config.dec; + ctx->config.dec = &priv->cfg; + } + + return 0; +} + +static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, + vpx_codec_priv_enc_mr_cfg_t *data) +{ + vpx_codec_err_t res = VPX_CODEC_OK; + vpx_codec_alg_priv_t *priv = NULL; + (void) data; + + vp8_rtcd(); + vpx_dsp_rtcd(); + vpx_scale_rtcd(); + + /* This function only allocates space for the vpx_codec_alg_priv_t + * structure. More memory may be required at the time the stream + * information becomes known. + */ + if (!ctx->priv) { + if (vp8_init_ctx(ctx)) return VPX_CODEC_MEM_ERROR; + priv = (vpx_codec_alg_priv_t *)ctx->priv; + + /* initialize number of fragments to zero */ + priv->fragments.count = 0; + /* is input fragments enabled? */ + priv->fragments.enabled = + (priv->base.init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS); + + /*post processing level initialized to do nothing */ + } else { + priv = (vpx_codec_alg_priv_t *)ctx->priv; + } + + priv->yv12_frame_buffers.use_frame_threads = + (ctx->priv->init_flags & VPX_CODEC_USE_FRAME_THREADING); + + /* for now, disable frame threading */ + priv->yv12_frame_buffers.use_frame_threads = 0; + + if (priv->yv12_frame_buffers.use_frame_threads && + ((ctx->priv->init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT) || + (ctx->priv->init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS))) { + /* row-based threading, error concealment, and input fragments will + * not be supported when using frame-based threading */ + res = VPX_CODEC_INVALID_PARAM; + } + + return res; +} + +static vpx_codec_err_t vp8_destroy(vpx_codec_alg_priv_t *ctx) +{ + vp8_remove_decoder_instances(&ctx->yv12_frame_buffers); + + vpx_free(ctx); + + return VPX_CODEC_OK; +} + +static vpx_codec_err_t vp8_peek_si_internal(const uint8_t *data, + unsigned int data_sz, + vpx_codec_stream_info_t *si, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) +{ + vpx_codec_err_t res = VPX_CODEC_OK; + + assert(data != NULL); + + if(data + data_sz <= data) + { + res = VPX_CODEC_INVALID_PARAM; + } + else + { + /* Parse uncompresssed part of key frame header. + * 3 bytes:- including version, frame type and an offset + * 3 bytes:- sync code (0x9d, 0x01, 0x2a) + * 4 bytes:- including image width and height in the lowest 14 bits + * of each 2-byte value. + */ + uint8_t clear_buffer[10]; + const uint8_t *clear = data; + if (decrypt_cb) + { + int n = VPXMIN(sizeof(clear_buffer), data_sz); + decrypt_cb(decrypt_state, data, clear_buffer, n); + clear = clear_buffer; + } + si->is_kf = 0; + + if (data_sz >= 10 && !(clear[0] & 0x01)) /* I-Frame */ + { + si->is_kf = 1; + + /* vet via sync code */ + if (clear[3] != 0x9d || clear[4] != 0x01 || clear[5] != 0x2a) + return VPX_CODEC_UNSUP_BITSTREAM; + + si->w = (clear[6] | (clear[7] << 8)) & 0x3fff; + si->h = (clear[8] | (clear[9] << 8)) & 0x3fff; + + /*printf("w=%d, h=%d\n", si->w, si->h);*/ + if (!(si->h | si->w)) + res = VPX_CODEC_UNSUP_BITSTREAM; + } + else + { + res = VPX_CODEC_UNSUP_BITSTREAM; + } + } + + return res; +} + +static vpx_codec_err_t vp8_peek_si(const uint8_t *data, + unsigned int data_sz, + vpx_codec_stream_info_t *si) { + return vp8_peek_si_internal(data, data_sz, si, NULL, NULL); +} + +static vpx_codec_err_t vp8_get_si(vpx_codec_alg_priv_t *ctx, + vpx_codec_stream_info_t *si) +{ + + unsigned int sz; + + if (si->sz >= sizeof(vp8_stream_info_t)) + sz = sizeof(vp8_stream_info_t); + else + sz = sizeof(vpx_codec_stream_info_t); + + memcpy(si, &ctx->si, sz); + si->sz = sz; + + return VPX_CODEC_OK; +} + + +static vpx_codec_err_t +update_error_state(vpx_codec_alg_priv_t *ctx, + const struct vpx_internal_error_info *error) +{ + vpx_codec_err_t res; + + if ((res = error->error_code)) + ctx->base.err_detail = error->has_detail + ? error->detail + : NULL; + + return res; +} + +static void yuvconfig2image(vpx_image_t *img, + const YV12_BUFFER_CONFIG *yv12, + void *user_priv) +{ + /** vpx_img_wrap() doesn't allow specifying independent strides for + * the Y, U, and V planes, nor other alignment adjustments that + * might be representable by a YV12_BUFFER_CONFIG, so we just + * initialize all the fields.*/ + img->fmt = VPX_IMG_FMT_I420; + img->w = yv12->y_stride; + img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15; + img->d_w = img->r_w = yv12->y_width; + img->d_h = img->r_h = yv12->y_height; + img->x_chroma_shift = 1; + img->y_chroma_shift = 1; + img->planes[VPX_PLANE_Y] = yv12->y_buffer; + img->planes[VPX_PLANE_U] = yv12->u_buffer; + img->planes[VPX_PLANE_V] = yv12->v_buffer; + img->planes[VPX_PLANE_ALPHA] = NULL; + img->stride[VPX_PLANE_Y] = yv12->y_stride; + img->stride[VPX_PLANE_U] = yv12->uv_stride; + img->stride[VPX_PLANE_V] = yv12->uv_stride; + img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; + img->bit_depth = 8; + img->bps = 12; + img->user_priv = user_priv; + img->img_data = yv12->buffer_alloc; + img->img_data_owner = 0; + img->self_allocd = 0; +} + +static int +update_fragments(vpx_codec_alg_priv_t *ctx, + const uint8_t *data, + unsigned int data_sz, + vpx_codec_err_t *res) +{ + *res = VPX_CODEC_OK; + + if (ctx->fragments.count == 0) + { + /* New frame, reset fragment pointers and sizes */ + memset((void*)ctx->fragments.ptrs, 0, sizeof(ctx->fragments.ptrs)); + memset(ctx->fragments.sizes, 0, sizeof(ctx->fragments.sizes)); + } + if (ctx->fragments.enabled && !(data == NULL && data_sz == 0)) + { + /* Store a pointer to this fragment and return. We haven't + * received the complete frame yet, so we will wait with decoding. + */ + ctx->fragments.ptrs[ctx->fragments.count] = data; + ctx->fragments.sizes[ctx->fragments.count] = data_sz; + ctx->fragments.count++; + if (ctx->fragments.count > (1 << EIGHT_PARTITION) + 1) + { + ctx->fragments.count = 0; + *res = VPX_CODEC_INVALID_PARAM; + return -1; + } + return 0; + } + + if (!ctx->fragments.enabled && (data == NULL && data_sz == 0)) + { + return 0; + } + + if (!ctx->fragments.enabled) + { + ctx->fragments.ptrs[0] = data; + ctx->fragments.sizes[0] = data_sz; + ctx->fragments.count = 1; + } + + return 1; +} + +static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, + const uint8_t *data, + unsigned int data_sz, + void *user_priv, + long deadline) +{ + vpx_codec_err_t res = VPX_CODEC_OK; + unsigned int resolution_change = 0; + unsigned int w, h; + + if (!ctx->fragments.enabled && (data == NULL && data_sz == 0)) + { + return 0; + } + + /* Update the input fragment data */ + if(update_fragments(ctx, data, data_sz, &res) <= 0) + return res; + + /* Determine the stream parameters. Note that we rely on peek_si to + * validate that we have a buffer that does not wrap around the top + * of the heap. + */ + w = ctx->si.w; + h = ctx->si.h; + + res = vp8_peek_si_internal(ctx->fragments.ptrs[0], ctx->fragments.sizes[0], + &ctx->si, ctx->decrypt_cb, ctx->decrypt_state); + + if((res == VPX_CODEC_UNSUP_BITSTREAM) && !ctx->si.is_kf) + { + /* the peek function returns an error for non keyframes, however for + * this case, it is not an error */ + res = VPX_CODEC_OK; + } + + if(!ctx->decoder_init && !ctx->si.is_kf) + res = VPX_CODEC_UNSUP_BITSTREAM; + + if ((ctx->si.h != h) || (ctx->si.w != w)) + resolution_change = 1; + + /* Initialize the decoder instance on the first frame*/ + if (!res && !ctx->decoder_init) + { + VP8D_CONFIG oxcf; + + oxcf.Width = ctx->si.w; + oxcf.Height = ctx->si.h; + oxcf.Version = 9; + oxcf.postprocess = 0; + oxcf.max_threads = ctx->cfg.threads; + oxcf.error_concealment = + (ctx->base.init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT); + + /* If postprocessing was enabled by the application and a + * configuration has not been provided, default it. + */ + if (!ctx->postproc_cfg_set + && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) { + ctx->postproc_cfg.post_proc_flag = + VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE; + ctx->postproc_cfg.deblocking_level = 4; + ctx->postproc_cfg.noise_level = 0; + } + + res = vp8_create_decoder_instances(&ctx->yv12_frame_buffers, &oxcf); + ctx->decoder_init = 1; + } + + /* Set these even if already initialized. The caller may have changed the + * decrypt config between frames. + */ + if (ctx->decoder_init) { + ctx->yv12_frame_buffers.pbi[0]->decrypt_cb = ctx->decrypt_cb; + ctx->yv12_frame_buffers.pbi[0]->decrypt_state = ctx->decrypt_state; + } + + if (!res) + { + VP8D_COMP *pbi = ctx->yv12_frame_buffers.pbi[0]; + if (resolution_change) + { + VP8_COMMON *const pc = & pbi->common; + MACROBLOCKD *const xd = & pbi->mb; +#if CONFIG_MULTITHREAD + int i; +#endif + pc->Width = ctx->si.w; + pc->Height = ctx->si.h; + { + int prev_mb_rows = pc->mb_rows; + + if (setjmp(pbi->common.error.jmp)) + { + pbi->common.error.setjmp = 0; + vp8_clear_system_state(); + /* same return value as used in vp8dx_receive_compressed_data */ + return -1; + } + + pbi->common.error.setjmp = 1; + + if (pc->Width <= 0) + { + pc->Width = w; + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid frame width"); + } + + if (pc->Height <= 0) + { + pc->Height = h; + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid frame height"); + } + + if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height)) + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffers"); + + xd->pre = pc->yv12_fb[pc->lst_fb_idx]; + xd->dst = pc->yv12_fb[pc->new_fb_idx]; + +#if CONFIG_MULTITHREAD + for (i = 0; i < pbi->allocated_decoding_thread_count; i++) + { + pbi->mb_row_di[i].mbd.dst = pc->yv12_fb[pc->new_fb_idx]; + vp8_build_block_doffsets(&pbi->mb_row_di[i].mbd); + } +#endif + vp8_build_block_doffsets(&pbi->mb); + + /* allocate memory for last frame MODE_INFO array */ +#if CONFIG_ERROR_CONCEALMENT + + if (pbi->ec_enabled) + { + /* old prev_mip was released by vp8_de_alloc_frame_buffers() + * called in vp8_alloc_frame_buffers() */ + pc->prev_mip = vpx_calloc( + (pc->mb_cols + 1) * (pc->mb_rows + 1), + sizeof(MODE_INFO)); + + if (!pc->prev_mip) + { + vp8_de_alloc_frame_buffers(pc); + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate" + "last frame MODE_INFO array"); + } + + pc->prev_mi = pc->prev_mip + pc->mode_info_stride + 1; + + if (vp8_alloc_overlap_lists(pbi)) + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate overlap lists " + "for error concealment"); + } + +#endif + +#if CONFIG_MULTITHREAD + if (pbi->b_multithreaded_rd) + vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); +#else + (void)prev_mb_rows; +#endif + } + + pbi->common.error.setjmp = 0; + + /* required to get past the first get_free_fb() call */ + pbi->common.fb_idx_ref_cnt[0] = 0; + } + + /* update the pbi fragment data */ + pbi->fragments = ctx->fragments; + + ctx->user_priv = user_priv; + if (vp8dx_receive_compressed_data(pbi, data_sz, data, deadline)) + { + res = update_error_state(ctx, &pbi->common.error); + } + + /* get ready for the next series of fragments */ + ctx->fragments.count = 0; + } + + return res; +} + +static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, + vpx_codec_iter_t *iter) +{ + vpx_image_t *img = NULL; + + /* iter acts as a flip flop, so an image is only returned on the first + * call to get_frame. + */ + if (!(*iter) && ctx->yv12_frame_buffers.pbi[0]) + { + YV12_BUFFER_CONFIG sd; + int64_t time_stamp = 0, time_end_stamp = 0; + vp8_ppflags_t flags; + vp8_zero(flags); + + if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) + { + flags.post_proc_flag= ctx->postproc_cfg.post_proc_flag +#if CONFIG_POSTPROC_VISUALIZER + + | ((ctx->dbg_color_ref_frame_flag != 0) ? VP8D_DEBUG_CLR_FRM_REF_BLKS : 0) + | ((ctx->dbg_color_mb_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0) + | ((ctx->dbg_color_b_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0) + | ((ctx->dbg_display_mv_flag != 0) ? VP8D_DEBUG_DRAW_MV : 0) +#endif + ; + flags.deblocking_level = ctx->postproc_cfg.deblocking_level; + flags.noise_level = ctx->postproc_cfg.noise_level; +#if CONFIG_POSTPROC_VISUALIZER + flags.display_ref_frame_flag= ctx->dbg_color_ref_frame_flag; + flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; + flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag; + flags.display_mv_flag = ctx->dbg_display_mv_flag; +#endif + } + + if (0 == vp8dx_get_raw_frame(ctx->yv12_frame_buffers.pbi[0], &sd, + &time_stamp, &time_end_stamp, &flags)) + { + yuvconfig2image(&ctx->img, &sd, ctx->user_priv); + + img = &ctx->img; + *iter = img; + } + } + + return img; +} + +static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, + YV12_BUFFER_CONFIG *yv12) +{ + const int y_w = img->d_w; + const int y_h = img->d_h; + const int uv_w = (img->d_w + 1) / 2; + const int uv_h = (img->d_h + 1) / 2; + vpx_codec_err_t res = VPX_CODEC_OK; + yv12->y_buffer = img->planes[VPX_PLANE_Y]; + yv12->u_buffer = img->planes[VPX_PLANE_U]; + yv12->v_buffer = img->planes[VPX_PLANE_V]; + + yv12->y_crop_width = y_w; + yv12->y_crop_height = y_h; + yv12->y_width = y_w; + yv12->y_height = y_h; + yv12->uv_crop_width = uv_w; + yv12->uv_crop_height = uv_h; + yv12->uv_width = uv_w; + yv12->uv_height = uv_h; + + yv12->y_stride = img->stride[VPX_PLANE_Y]; + yv12->uv_stride = img->stride[VPX_PLANE_U]; + + yv12->border = (img->stride[VPX_PLANE_Y] - img->d_w) / 2; + return res; +} + + +static vpx_codec_err_t vp8_set_reference(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + + vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); + + if (data && !ctx->yv12_frame_buffers.use_frame_threads) + { + vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; + YV12_BUFFER_CONFIG sd; + + image2yuvconfig(&frame->img, &sd); + + return vp8dx_set_reference(ctx->yv12_frame_buffers.pbi[0], + frame->frame_type, &sd); + } + else + return VPX_CODEC_INVALID_PARAM; + +} + +static vpx_codec_err_t vp8_get_reference(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + + vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); + + if (data && !ctx->yv12_frame_buffers.use_frame_threads) + { + vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; + YV12_BUFFER_CONFIG sd; + + image2yuvconfig(&frame->img, &sd); + + return vp8dx_get_reference(ctx->yv12_frame_buffers.pbi[0], + frame->frame_type, &sd); + } + else + return VPX_CODEC_INVALID_PARAM; + +} + +static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx, + va_list args) +{ +#if CONFIG_POSTPROC + vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); + + if (data) + { + ctx->postproc_cfg_set = 1; + ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); + return VPX_CODEC_OK; + } + else + return VPX_CODEC_INVALID_PARAM; + +#else + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +#endif +} + + +static vpx_codec_err_t vp8_set_dbg_color_ref_frame(vpx_codec_alg_priv_t *ctx, + va_list args) { +#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC + ctx->dbg_color_ref_frame_flag = va_arg(args, int); + return VPX_CODEC_OK; +#else + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +#endif +} + +static vpx_codec_err_t vp8_set_dbg_color_mb_modes(vpx_codec_alg_priv_t *ctx, + va_list args) { +#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC + ctx->dbg_color_mb_modes_flag = va_arg(args, int); + return VPX_CODEC_OK; +#else + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +#endif +} + +static vpx_codec_err_t vp8_set_dbg_color_b_modes(vpx_codec_alg_priv_t *ctx, + va_list args) { +#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC + ctx->dbg_color_b_modes_flag = va_arg(args, int); + return VPX_CODEC_OK; +#else + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +#endif +} + +static vpx_codec_err_t vp8_set_dbg_display_mv(vpx_codec_alg_priv_t *ctx, + va_list args) { +#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC + ctx->dbg_display_mv_flag = va_arg(args, int); + return VPX_CODEC_OK; +#else + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +#endif +} + +static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + int *update_info = va_arg(args, int *); + + if (update_info && !ctx->yv12_frame_buffers.use_frame_threads) + { + VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; + + *update_info = pbi->common.refresh_alt_ref_frame * (int) VP8_ALTR_FRAME + + pbi->common.refresh_golden_frame * (int) VP8_GOLD_FRAME + + pbi->common.refresh_last_frame * (int) VP8_LAST_FRAME; + + return VPX_CODEC_OK; + } + else + return VPX_CODEC_INVALID_PARAM; +} + +extern int vp8dx_references_buffer( VP8_COMMON *oci, int ref_frame ); +static vpx_codec_err_t vp8_get_last_ref_frame(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + int *ref_info = va_arg(args, int *); + + if (ref_info && !ctx->yv12_frame_buffers.use_frame_threads) + { + VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; + VP8_COMMON *oci = &pbi->common; + *ref_info = + (vp8dx_references_buffer( oci, ALTREF_FRAME )?VP8_ALTR_FRAME:0) | + (vp8dx_references_buffer( oci, GOLDEN_FRAME )?VP8_GOLD_FRAME:0) | + (vp8dx_references_buffer( oci, LAST_FRAME )?VP8_LAST_FRAME:0); + + return VPX_CODEC_OK; + } + else + return VPX_CODEC_INVALID_PARAM; +} + +static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + + int *corrupted = va_arg(args, int *); + VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; + + if (corrupted && pbi) + { + const YV12_BUFFER_CONFIG *const frame = pbi->common.frame_to_show; + if (frame == NULL) return VPX_CODEC_ERROR; + *corrupted = frame->corrupted; + return VPX_CODEC_OK; + } + else + return VPX_CODEC_INVALID_PARAM; + +} + +static vpx_codec_err_t vp8_set_decryptor(vpx_codec_alg_priv_t *ctx, + va_list args) +{ + vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *); + + if (init) + { + ctx->decrypt_cb = init->decrypt_cb; + ctx->decrypt_state = init->decrypt_state; + } + else + { + ctx->decrypt_cb = NULL; + ctx->decrypt_state = NULL; + } + return VPX_CODEC_OK; +} + +vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] = +{ + {VP8_SET_REFERENCE, vp8_set_reference}, + {VP8_COPY_REFERENCE, vp8_get_reference}, + {VP8_SET_POSTPROC, vp8_set_postproc}, + {VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_color_ref_frame}, + {VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_color_mb_modes}, + {VP8_SET_DBG_COLOR_B_MODES, vp8_set_dbg_color_b_modes}, + {VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_display_mv}, + {VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates}, + {VP8D_GET_FRAME_CORRUPTED, vp8_get_frame_corrupted}, + {VP8D_GET_LAST_REF_USED, vp8_get_last_ref_frame}, + {VPXD_SET_DECRYPTOR, vp8_set_decryptor}, + { -1, NULL}, +}; + + +#ifndef VERSION_STRING +#define VERSION_STRING +#endif +CODEC_INTERFACE(vpx_codec_vp8_dx) = +{ + "WebM Project VP8 Decoder" VERSION_STRING, + VPX_CODEC_INTERNAL_ABI_VERSION, + VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC | VP8_CAP_ERROR_CONCEALMENT | + VPX_CODEC_CAP_INPUT_FRAGMENTS, + /* vpx_codec_caps_t caps; */ + vp8_init, /* vpx_codec_init_fn_t init; */ + vp8_destroy, /* vpx_codec_destroy_fn_t destroy; */ + vp8_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ + { + vp8_peek_si, /* vpx_codec_peek_si_fn_t peek_si; */ + vp8_get_si, /* vpx_codec_get_si_fn_t get_si; */ + vp8_decode, /* vpx_codec_decode_fn_t decode; */ + vp8_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */ + NULL, + }, + { /* encoder functions */ + 0, + NULL, /* vpx_codec_enc_cfg_map_t */ + NULL, /* vpx_codec_encode_fn_t */ + NULL, /* vpx_codec_get_cx_data_fn_t */ + NULL, /* vpx_codec_enc_config_set_fn_t */ + NULL, /* vpx_codec_get_global_headers_fn_t */ + NULL, /* vpx_codec_get_preview_frame_fn_t */ + NULL /* vpx_codec_enc_mr_get_mem_loc_fn_t */ + } +}; diff --git a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c new file mode 100644 index 00000000000..1761fada2fa --- /dev/null +++ b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vp9_rtcd.h" +#include "./vpx_config.h" +#include "vp9/common/vp9_common.h" + +static int16_t sinpi_1_9 = 0x14a3; +static int16_t sinpi_2_9 = 0x26c9; +static int16_t sinpi_3_9 = 0x3441; +static int16_t sinpi_4_9 = 0x3b6c; +static int16_t cospi_8_64 = 0x3b21; +static int16_t cospi_16_64 = 0x2d41; +static int16_t cospi_24_64 = 0x187e; + +static INLINE void TRANSPOSE4X4( + int16x8_t *q8s16, + int16x8_t *q9s16) { + int32x4_t q8s32, q9s32; + int16x4x2_t d0x2s16, d1x2s16; + int32x4x2_t q0x2s32; + + d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16)); + d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16)); + + q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1])); + q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1])); + q0x2s32 = vtrnq_s32(q8s32, q9s32); + + *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]); + *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]); + return; +} + +static INLINE void GENERATE_COSINE_CONSTANTS( + int16x4_t *d0s16, + int16x4_t *d1s16, + int16x4_t *d2s16) { + *d0s16 = vdup_n_s16(cospi_8_64); + *d1s16 = vdup_n_s16(cospi_16_64); + *d2s16 = vdup_n_s16(cospi_24_64); + return; +} + +static INLINE void GENERATE_SINE_CONSTANTS( + int16x4_t *d3s16, + int16x4_t *d4s16, + int16x4_t *d5s16, + int16x8_t *q3s16) { + *d3s16 = vdup_n_s16(sinpi_1_9); + *d4s16 = vdup_n_s16(sinpi_2_9); + *q3s16 = vdupq_n_s16(sinpi_3_9); + *d5s16 = vdup_n_s16(sinpi_4_9); + return; +} + +static INLINE void IDCT4x4_1D( + int16x4_t *d0s16, + int16x4_t *d1s16, + int16x4_t *d2s16, + int16x8_t *q8s16, + int16x8_t *q9s16) { + int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16; + int16x4_t d26s16, d27s16, d28s16, d29s16; + int32x4_t q10s32, q13s32, q14s32, q15s32; + int16x8_t q13s16, q14s16; + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + + d23s16 = vadd_s16(d16s16, d18s16); + d24s16 = vsub_s16(d16s16, d18s16); + + q15s32 = vmull_s16(d17s16, *d2s16); + q10s32 = vmull_s16(d17s16, *d0s16); + q13s32 = vmull_s16(d23s16, *d1s16); + q14s32 = vmull_s16(d24s16, *d1s16); + q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16); + q10s32 = vmlal_s16(q10s32, d19s16, *d2s16); + + d26s16 = vqrshrn_n_s32(q13s32, 14); + d27s16 = vqrshrn_n_s32(q14s32, 14); + d29s16 = vqrshrn_n_s32(q15s32, 14); + d28s16 = vqrshrn_n_s32(q10s32, 14); + + q13s16 = vcombine_s16(d26s16, d27s16); + q14s16 = vcombine_s16(d28s16, d29s16); + *q8s16 = vaddq_s16(q13s16, q14s16); + *q9s16 = vsubq_s16(q13s16, q14s16); + *q9s16 = vcombine_s16(vget_high_s16(*q9s16), + vget_low_s16(*q9s16)); // vswp + return; +} + +static INLINE void IADST4x4_1D( + int16x4_t *d3s16, + int16x4_t *d4s16, + int16x4_t *d5s16, + int16x8_t *q3s16, + int16x8_t *q8s16, + int16x8_t *q9s16) { + int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16; + int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; + + d6s16 = vget_low_s16(*q3s16); + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + + q10s32 = vmull_s16(*d3s16, d16s16); + q11s32 = vmull_s16(*d4s16, d16s16); + q12s32 = vmull_s16(d6s16, d17s16); + q13s32 = vmull_s16(*d5s16, d18s16); + q14s32 = vmull_s16(*d3s16, d18s16); + q15s32 = vmovl_s16(d16s16); + q15s32 = vaddw_s16(q15s32, d19s16); + q8s32 = vmull_s16(*d4s16, d19s16); + q15s32 = vsubw_s16(q15s32, d18s16); + q9s32 = vmull_s16(*d5s16, d19s16); + + q10s32 = vaddq_s32(q10s32, q13s32); + q10s32 = vaddq_s32(q10s32, q8s32); + q11s32 = vsubq_s32(q11s32, q14s32); + q8s32 = vdupq_n_s32(sinpi_3_9); + q11s32 = vsubq_s32(q11s32, q9s32); + q15s32 = vmulq_s32(q15s32, q8s32); + + q13s32 = vaddq_s32(q10s32, q12s32); + q10s32 = vaddq_s32(q10s32, q11s32); + q14s32 = vaddq_s32(q11s32, q12s32); + q10s32 = vsubq_s32(q10s32, q12s32); + + d16s16 = vqrshrn_n_s32(q13s32, 14); + d17s16 = vqrshrn_n_s32(q14s32, 14); + d18s16 = vqrshrn_n_s32(q15s32, 14); + d19s16 = vqrshrn_n_s32(q10s32, 14); + + *q8s16 = vcombine_s16(d16s16, d17s16); + *q9s16 = vcombine_s16(d18s16, d19s16); + return; +} + +void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, + int dest_stride, int tx_type) { + uint8x8_t d26u8, d27u8; + int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16; + uint32x2_t d26u32, d27u32; + int16x8_t q3s16, q8s16, q9s16; + uint16x8_t q8u16, q9u16; + + d26u32 = d27u32 = vdup_n_u32(0); + + q8s16 = vld1q_s16(input); + q9s16 = vld1q_s16(input + 8); + + TRANSPOSE4X4(&q8s16, &q9s16); + + switch (tx_type) { + case 0: // idct_idct is not supported. Fall back to C + vp9_iht4x4_16_add_c(input, dest, dest_stride, tx_type); + return; + break; + case 1: // iadst_idct + // generate constants + GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); + GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); + + // first transform rows + IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); + + // transpose the matrix + TRANSPOSE4X4(&q8s16, &q9s16); + + // then transform columns + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + break; + case 2: // idct_iadst + // generate constantsyy + GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); + GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); + + // first transform rows + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + + // transpose the matrix + TRANSPOSE4X4(&q8s16, &q9s16); + + // then transform columns + IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); + break; + case 3: // iadst_iadst + // generate constants + GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); + + // first transform rows + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + + // transpose the matrix + TRANSPOSE4X4(&q8s16, &q9s16); + + // then transform columns + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + break; + default: // iadst_idct + assert(0); + break; + } + + q8s16 = vrshrq_n_s16(q8s16, 4); + q9s16 = vrshrq_n_s16(q9s16, 4); + + d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0); + dest += dest_stride; + d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1); + dest += dest_stride; + d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0); + dest += dest_stride; + d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1); + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32)); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32)); + + d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1); + dest -= dest_stride; + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0); + dest -= dest_stride; + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1); + dest -= dest_stride; + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0); + return; +} diff --git a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c new file mode 100644 index 00000000000..04b342c3d34 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vp9_rtcd.h" +#include "./vpx_config.h" +#include "vp9/common/vp9_common.h" + +static int16_t cospi_2_64 = 16305; +static int16_t cospi_4_64 = 16069; +static int16_t cospi_6_64 = 15679; +static int16_t cospi_8_64 = 15137; +static int16_t cospi_10_64 = 14449; +static int16_t cospi_12_64 = 13623; +static int16_t cospi_14_64 = 12665; +static int16_t cospi_16_64 = 11585; +static int16_t cospi_18_64 = 10394; +static int16_t cospi_20_64 = 9102; +static int16_t cospi_22_64 = 7723; +static int16_t cospi_24_64 = 6270; +static int16_t cospi_26_64 = 4756; +static int16_t cospi_28_64 = 3196; +static int16_t cospi_30_64 = 1606; + +static INLINE void TRANSPOSE8X8( + int16x8_t *q8s16, + int16x8_t *q9s16, + int16x8_t *q10s16, + int16x8_t *q11s16, + int16x8_t *q12s16, + int16x8_t *q13s16, + int16x8_t *q14s16, + int16x8_t *q15s16) { + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; + int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 + *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 + *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 + *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 + *q12s16 = vcombine_s16(d17s16, d25s16); + *q13s16 = vcombine_s16(d19s16, d27s16); + *q14s16 = vcombine_s16(d21s16, d29s16); + *q15s16 = vcombine_s16(d23s16, d31s16); + + q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q8s16), + vreinterpretq_s32_s16(*q10s16)); + q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q9s16), + vreinterpretq_s32_s16(*q11s16)); + q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q12s16), + vreinterpretq_s32_s16(*q14s16)); + q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q13s16), + vreinterpretq_s32_s16(*q15s16)); + + q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 + vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 + q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 + vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 + q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 + vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 + q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 + vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 + + *q8s16 = q0x2s16.val[0]; + *q9s16 = q0x2s16.val[1]; + *q10s16 = q1x2s16.val[0]; + *q11s16 = q1x2s16.val[1]; + *q12s16 = q2x2s16.val[0]; + *q13s16 = q2x2s16.val[1]; + *q14s16 = q3x2s16.val[0]; + *q15s16 = q3x2s16.val[1]; + return; +} + +static INLINE void IDCT8x8_1D( + int16x8_t *q8s16, + int16x8_t *q9s16, + int16x8_t *q10s16, + int16x8_t *q11s16, + int16x8_t *q12s16, + int16x8_t *q13s16, + int16x8_t *q14s16, + int16x8_t *q15s16) { + int16x4_t d0s16, d1s16, d2s16, d3s16; + int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; + int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32; + int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; + + d0s16 = vdup_n_s16(cospi_28_64); + d1s16 = vdup_n_s16(cospi_4_64); + d2s16 = vdup_n_s16(cospi_12_64); + d3s16 = vdup_n_s16(cospi_20_64); + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + q2s32 = vmull_s16(d18s16, d0s16); + q3s32 = vmull_s16(d19s16, d0s16); + q5s32 = vmull_s16(d26s16, d2s16); + q6s32 = vmull_s16(d27s16, d2s16); + + q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); + q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); + q5s32 = vmlsl_s16(q5s32, d22s16, d3s16); + q6s32 = vmlsl_s16(q6s32, d23s16, d3s16); + + d8s16 = vqrshrn_n_s32(q2s32, 14); + d9s16 = vqrshrn_n_s32(q3s32, 14); + d10s16 = vqrshrn_n_s32(q5s32, 14); + d11s16 = vqrshrn_n_s32(q6s32, 14); + q4s16 = vcombine_s16(d8s16, d9s16); + q5s16 = vcombine_s16(d10s16, d11s16); + + q2s32 = vmull_s16(d18s16, d1s16); + q3s32 = vmull_s16(d19s16, d1s16); + q9s32 = vmull_s16(d26s16, d3s16); + q13s32 = vmull_s16(d27s16, d3s16); + + q2s32 = vmlal_s16(q2s32, d30s16, d0s16); + q3s32 = vmlal_s16(q3s32, d31s16, d0s16); + q9s32 = vmlal_s16(q9s32, d22s16, d2s16); + q13s32 = vmlal_s16(q13s32, d23s16, d2s16); + + d14s16 = vqrshrn_n_s32(q2s32, 14); + d15s16 = vqrshrn_n_s32(q3s32, 14); + d12s16 = vqrshrn_n_s32(q9s32, 14); + d13s16 = vqrshrn_n_s32(q13s32, 14); + q6s16 = vcombine_s16(d12s16, d13s16); + q7s16 = vcombine_s16(d14s16, d15s16); + + d0s16 = vdup_n_s16(cospi_16_64); + + q2s32 = vmull_s16(d16s16, d0s16); + q3s32 = vmull_s16(d17s16, d0s16); + q13s32 = vmull_s16(d16s16, d0s16); + q15s32 = vmull_s16(d17s16, d0s16); + + q2s32 = vmlal_s16(q2s32, d24s16, d0s16); + q3s32 = vmlal_s16(q3s32, d25s16, d0s16); + q13s32 = vmlsl_s16(q13s32, d24s16, d0s16); + q15s32 = vmlsl_s16(q15s32, d25s16, d0s16); + + d0s16 = vdup_n_s16(cospi_24_64); + d1s16 = vdup_n_s16(cospi_8_64); + + d18s16 = vqrshrn_n_s32(q2s32, 14); + d19s16 = vqrshrn_n_s32(q3s32, 14); + d22s16 = vqrshrn_n_s32(q13s32, 14); + d23s16 = vqrshrn_n_s32(q15s32, 14); + *q9s16 = vcombine_s16(d18s16, d19s16); + *q11s16 = vcombine_s16(d22s16, d23s16); + + q2s32 = vmull_s16(d20s16, d0s16); + q3s32 = vmull_s16(d21s16, d0s16); + q8s32 = vmull_s16(d20s16, d1s16); + q12s32 = vmull_s16(d21s16, d1s16); + + q2s32 = vmlsl_s16(q2s32, d28s16, d1s16); + q3s32 = vmlsl_s16(q3s32, d29s16, d1s16); + q8s32 = vmlal_s16(q8s32, d28s16, d0s16); + q12s32 = vmlal_s16(q12s32, d29s16, d0s16); + + d26s16 = vqrshrn_n_s32(q2s32, 14); + d27s16 = vqrshrn_n_s32(q3s32, 14); + d30s16 = vqrshrn_n_s32(q8s32, 14); + d31s16 = vqrshrn_n_s32(q12s32, 14); + *q13s16 = vcombine_s16(d26s16, d27s16); + *q15s16 = vcombine_s16(d30s16, d31s16); + + q0s16 = vaddq_s16(*q9s16, *q15s16); + q1s16 = vaddq_s16(*q11s16, *q13s16); + q2s16 = vsubq_s16(*q11s16, *q13s16); + q3s16 = vsubq_s16(*q9s16, *q15s16); + + *q13s16 = vsubq_s16(q4s16, q5s16); + q4s16 = vaddq_s16(q4s16, q5s16); + *q14s16 = vsubq_s16(q7s16, q6s16); + q7s16 = vaddq_s16(q7s16, q6s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + + d16s16 = vdup_n_s16(cospi_16_64); + + q9s32 = vmull_s16(d28s16, d16s16); + q10s32 = vmull_s16(d29s16, d16s16); + q11s32 = vmull_s16(d28s16, d16s16); + q12s32 = vmull_s16(d29s16, d16s16); + + q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); + q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); + q11s32 = vmlal_s16(q11s32, d26s16, d16s16); + q12s32 = vmlal_s16(q12s32, d27s16, d16s16); + + d10s16 = vqrshrn_n_s32(q9s32, 14); + d11s16 = vqrshrn_n_s32(q10s32, 14); + d12s16 = vqrshrn_n_s32(q11s32, 14); + d13s16 = vqrshrn_n_s32(q12s32, 14); + q5s16 = vcombine_s16(d10s16, d11s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + *q8s16 = vaddq_s16(q0s16, q7s16); + *q9s16 = vaddq_s16(q1s16, q6s16); + *q10s16 = vaddq_s16(q2s16, q5s16); + *q11s16 = vaddq_s16(q3s16, q4s16); + *q12s16 = vsubq_s16(q3s16, q4s16); + *q13s16 = vsubq_s16(q2s16, q5s16); + *q14s16 = vsubq_s16(q1s16, q6s16); + *q15s16 = vsubq_s16(q0s16, q7s16); + return; +} + +static INLINE void IADST8X8_1D( + int16x8_t *q8s16, + int16x8_t *q9s16, + int16x8_t *q10s16, + int16x8_t *q11s16, + int16x8_t *q12s16, + int16x8_t *q13s16, + int16x8_t *q14s16, + int16x8_t *q15s16) { + int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; + int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int16x8_t q2s16, q4s16, q5s16, q6s16; + int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32; + int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + d14s16 = vdup_n_s16(cospi_2_64); + d15s16 = vdup_n_s16(cospi_30_64); + + q1s32 = vmull_s16(d30s16, d14s16); + q2s32 = vmull_s16(d31s16, d14s16); + q3s32 = vmull_s16(d30s16, d15s16); + q4s32 = vmull_s16(d31s16, d15s16); + + d30s16 = vdup_n_s16(cospi_18_64); + d31s16 = vdup_n_s16(cospi_14_64); + + q1s32 = vmlal_s16(q1s32, d16s16, d15s16); + q2s32 = vmlal_s16(q2s32, d17s16, d15s16); + q3s32 = vmlsl_s16(q3s32, d16s16, d14s16); + q4s32 = vmlsl_s16(q4s32, d17s16, d14s16); + + q5s32 = vmull_s16(d22s16, d30s16); + q6s32 = vmull_s16(d23s16, d30s16); + q7s32 = vmull_s16(d22s16, d31s16); + q8s32 = vmull_s16(d23s16, d31s16); + + q5s32 = vmlal_s16(q5s32, d24s16, d31s16); + q6s32 = vmlal_s16(q6s32, d25s16, d31s16); + q7s32 = vmlsl_s16(q7s32, d24s16, d30s16); + q8s32 = vmlsl_s16(q8s32, d25s16, d30s16); + + q11s32 = vaddq_s32(q1s32, q5s32); + q12s32 = vaddq_s32(q2s32, q6s32); + q1s32 = vsubq_s32(q1s32, q5s32); + q2s32 = vsubq_s32(q2s32, q6s32); + + d22s16 = vqrshrn_n_s32(q11s32, 14); + d23s16 = vqrshrn_n_s32(q12s32, 14); + *q11s16 = vcombine_s16(d22s16, d23s16); + + q12s32 = vaddq_s32(q3s32, q7s32); + q15s32 = vaddq_s32(q4s32, q8s32); + q3s32 = vsubq_s32(q3s32, q7s32); + q4s32 = vsubq_s32(q4s32, q8s32); + + d2s16 = vqrshrn_n_s32(q1s32, 14); + d3s16 = vqrshrn_n_s32(q2s32, 14); + d24s16 = vqrshrn_n_s32(q12s32, 14); + d25s16 = vqrshrn_n_s32(q15s32, 14); + d6s16 = vqrshrn_n_s32(q3s32, 14); + d7s16 = vqrshrn_n_s32(q4s32, 14); + *q12s16 = vcombine_s16(d24s16, d25s16); + + d0s16 = vdup_n_s16(cospi_10_64); + d1s16 = vdup_n_s16(cospi_22_64); + q4s32 = vmull_s16(d26s16, d0s16); + q5s32 = vmull_s16(d27s16, d0s16); + q2s32 = vmull_s16(d26s16, d1s16); + q6s32 = vmull_s16(d27s16, d1s16); + + d30s16 = vdup_n_s16(cospi_26_64); + d31s16 = vdup_n_s16(cospi_6_64); + + q4s32 = vmlal_s16(q4s32, d20s16, d1s16); + q5s32 = vmlal_s16(q5s32, d21s16, d1s16); + q2s32 = vmlsl_s16(q2s32, d20s16, d0s16); + q6s32 = vmlsl_s16(q6s32, d21s16, d0s16); + + q0s32 = vmull_s16(d18s16, d30s16); + q13s32 = vmull_s16(d19s16, d30s16); + + q0s32 = vmlal_s16(q0s32, d28s16, d31s16); + q13s32 = vmlal_s16(q13s32, d29s16, d31s16); + + q10s32 = vmull_s16(d18s16, d31s16); + q9s32 = vmull_s16(d19s16, d31s16); + + q10s32 = vmlsl_s16(q10s32, d28s16, d30s16); + q9s32 = vmlsl_s16(q9s32, d29s16, d30s16); + + q14s32 = vaddq_s32(q2s32, q10s32); + q15s32 = vaddq_s32(q6s32, q9s32); + q2s32 = vsubq_s32(q2s32, q10s32); + q6s32 = vsubq_s32(q6s32, q9s32); + + d28s16 = vqrshrn_n_s32(q14s32, 14); + d29s16 = vqrshrn_n_s32(q15s32, 14); + d4s16 = vqrshrn_n_s32(q2s32, 14); + d5s16 = vqrshrn_n_s32(q6s32, 14); + *q14s16 = vcombine_s16(d28s16, d29s16); + + q9s32 = vaddq_s32(q4s32, q0s32); + q10s32 = vaddq_s32(q5s32, q13s32); + q4s32 = vsubq_s32(q4s32, q0s32); + q5s32 = vsubq_s32(q5s32, q13s32); + + d30s16 = vdup_n_s16(cospi_8_64); + d31s16 = vdup_n_s16(cospi_24_64); + + d18s16 = vqrshrn_n_s32(q9s32, 14); + d19s16 = vqrshrn_n_s32(q10s32, 14); + d8s16 = vqrshrn_n_s32(q4s32, 14); + d9s16 = vqrshrn_n_s32(q5s32, 14); + *q9s16 = vcombine_s16(d18s16, d19s16); + + q5s32 = vmull_s16(d2s16, d30s16); + q6s32 = vmull_s16(d3s16, d30s16); + q7s32 = vmull_s16(d2s16, d31s16); + q0s32 = vmull_s16(d3s16, d31s16); + + q5s32 = vmlal_s16(q5s32, d6s16, d31s16); + q6s32 = vmlal_s16(q6s32, d7s16, d31s16); + q7s32 = vmlsl_s16(q7s32, d6s16, d30s16); + q0s32 = vmlsl_s16(q0s32, d7s16, d30s16); + + q1s32 = vmull_s16(d4s16, d30s16); + q3s32 = vmull_s16(d5s16, d30s16); + q10s32 = vmull_s16(d4s16, d31s16); + q2s32 = vmull_s16(d5s16, d31s16); + + q1s32 = vmlsl_s16(q1s32, d8s16, d31s16); + q3s32 = vmlsl_s16(q3s32, d9s16, d31s16); + q10s32 = vmlal_s16(q10s32, d8s16, d30s16); + q2s32 = vmlal_s16(q2s32, d9s16, d30s16); + + *q8s16 = vaddq_s16(*q11s16, *q9s16); + *q11s16 = vsubq_s16(*q11s16, *q9s16); + q4s16 = vaddq_s16(*q12s16, *q14s16); + *q12s16 = vsubq_s16(*q12s16, *q14s16); + + q14s32 = vaddq_s32(q5s32, q1s32); + q15s32 = vaddq_s32(q6s32, q3s32); + q5s32 = vsubq_s32(q5s32, q1s32); + q6s32 = vsubq_s32(q6s32, q3s32); + + d18s16 = vqrshrn_n_s32(q14s32, 14); + d19s16 = vqrshrn_n_s32(q15s32, 14); + d10s16 = vqrshrn_n_s32(q5s32, 14); + d11s16 = vqrshrn_n_s32(q6s32, 14); + *q9s16 = vcombine_s16(d18s16, d19s16); + + q1s32 = vaddq_s32(q7s32, q10s32); + q3s32 = vaddq_s32(q0s32, q2s32); + q7s32 = vsubq_s32(q7s32, q10s32); + q0s32 = vsubq_s32(q0s32, q2s32); + + d28s16 = vqrshrn_n_s32(q1s32, 14); + d29s16 = vqrshrn_n_s32(q3s32, 14); + d14s16 = vqrshrn_n_s32(q7s32, 14); + d15s16 = vqrshrn_n_s32(q0s32, 14); + *q14s16 = vcombine_s16(d28s16, d29s16); + + d30s16 = vdup_n_s16(cospi_16_64); + + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + q2s32 = vmull_s16(d22s16, d30s16); + q3s32 = vmull_s16(d23s16, d30s16); + q13s32 = vmull_s16(d22s16, d30s16); + q1s32 = vmull_s16(d23s16, d30s16); + + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + q2s32 = vmlal_s16(q2s32, d24s16, d30s16); + q3s32 = vmlal_s16(q3s32, d25s16, d30s16); + q13s32 = vmlsl_s16(q13s32, d24s16, d30s16); + q1s32 = vmlsl_s16(q1s32, d25s16, d30s16); + + d4s16 = vqrshrn_n_s32(q2s32, 14); + d5s16 = vqrshrn_n_s32(q3s32, 14); + d24s16 = vqrshrn_n_s32(q13s32, 14); + d25s16 = vqrshrn_n_s32(q1s32, 14); + q2s16 = vcombine_s16(d4s16, d5s16); + *q12s16 = vcombine_s16(d24s16, d25s16); + + q13s32 = vmull_s16(d10s16, d30s16); + q1s32 = vmull_s16(d11s16, d30s16); + q11s32 = vmull_s16(d10s16, d30s16); + q0s32 = vmull_s16(d11s16, d30s16); + + q13s32 = vmlal_s16(q13s32, d14s16, d30s16); + q1s32 = vmlal_s16(q1s32, d15s16, d30s16); + q11s32 = vmlsl_s16(q11s32, d14s16, d30s16); + q0s32 = vmlsl_s16(q0s32, d15s16, d30s16); + + d20s16 = vqrshrn_n_s32(q13s32, 14); + d21s16 = vqrshrn_n_s32(q1s32, 14); + d12s16 = vqrshrn_n_s32(q11s32, 14); + d13s16 = vqrshrn_n_s32(q0s32, 14); + *q10s16 = vcombine_s16(d20s16, d21s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + q5s16 = vdupq_n_s16(0); + + *q9s16 = vsubq_s16(q5s16, *q9s16); + *q11s16 = vsubq_s16(q5s16, q2s16); + *q13s16 = vsubq_s16(q5s16, q6s16); + *q15s16 = vsubq_s16(q5s16, q4s16); + return; +} + +void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, + int dest_stride, int tx_type) { + int i; + uint8_t *d1, *d2; + uint8x8_t d0u8, d1u8, d2u8, d3u8; + uint64x1_t d0u64, d1u64, d2u64, d3u64; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; + uint16x8_t q8u16, q9u16, q10u16, q11u16; + + q8s16 = vld1q_s16(input); + q9s16 = vld1q_s16(input + 8); + q10s16 = vld1q_s16(input + 8 * 2); + q11s16 = vld1q_s16(input + 8 * 3); + q12s16 = vld1q_s16(input + 8 * 4); + q13s16 = vld1q_s16(input + 8 * 5); + q14s16 = vld1q_s16(input + 8 * 6); + q15s16 = vld1q_s16(input + 8 * 7); + + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + switch (tx_type) { + case 0: // idct_idct is not supported. Fall back to C + vp9_iht8x8_64_add_c(input, dest, dest_stride, tx_type); + return; + break; + case 1: // iadst_idct + // generate IDCT constants + // GENERATE_IDCT_CONSTANTS + + // first transform rows + IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // transpose the matrix + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // generate IADST constants + // GENERATE_IADST_CONSTANTS + + // then transform columns + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + break; + case 2: // idct_iadst + // generate IADST constants + // GENERATE_IADST_CONSTANTS + + // first transform rows + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // transpose the matrix + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // generate IDCT constants + // GENERATE_IDCT_CONSTANTS + + // then transform columns + IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + break; + case 3: // iadst_iadst + // generate IADST constants + // GENERATE_IADST_CONSTANTS + + // first transform rows + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // transpose the matrix + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // then transform columns + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + break; + default: // iadst_idct + assert(0); + break; + } + + q8s16 = vrshrq_n_s16(q8s16, 5); + q9s16 = vrshrq_n_s16(q9s16, 5); + q10s16 = vrshrq_n_s16(q10s16, 5); + q11s16 = vrshrq_n_s16(q11s16, 5); + q12s16 = vrshrq_n_s16(q12s16, 5); + q13s16 = vrshrq_n_s16(q13s16, 5); + q14s16 = vrshrq_n_s16(q14s16, 5); + q15s16 = vrshrq_n_s16(q15s16, 5); + + for (d1 = d2 = dest, i = 0; i < 2; i++) { + if (i != 0) { + q8s16 = q12s16; + q9s16 = q13s16; + q10s16 = q14s16; + q11s16 = q15s16; + } + + d0u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d1u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d2u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d3u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), + vreinterpret_u8_u64(d0u64)); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), + vreinterpret_u8_u64(d1u64)); + q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), + vreinterpret_u8_u64(d2u64)); + q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), + vreinterpret_u8_u64(d3u64)); + + d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); + d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); + + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); + d2 += dest_stride; + } + return; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_alloccommon.c b/thirdparty/libvpx/vp9/common/vp9_alloccommon.c new file mode 100644 index 00000000000..7dd1005d3fe --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_alloccommon.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "vpx_mem/vpx_mem.h" + +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_entropymv.h" +#include "vp9/common/vp9_onyxc_int.h" + +// TODO(hkuang): Don't need to lock the whole pool after implementing atomic +// frame reference count. +void lock_buffer_pool(BufferPool *const pool) { +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&pool->pool_mutex); +#else + (void)pool; +#endif +} + +void unlock_buffer_pool(BufferPool *const pool) { +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(&pool->pool_mutex); +#else + (void)pool; +#endif +} + +void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) { + const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); + const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); + + cm->mi_cols = aligned_width >> MI_SIZE_LOG2; + cm->mi_rows = aligned_height >> MI_SIZE_LOG2; + cm->mi_stride = calc_mi_size(cm->mi_cols); + + cm->mb_cols = (cm->mi_cols + 1) >> 1; + cm->mb_rows = (cm->mi_rows + 1) >> 1; + cm->MBs = cm->mb_rows * cm->mb_cols; +} + +static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) { + int i; + + for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { + cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1); + if (cm->seg_map_array[i] == NULL) + return 1; + } + cm->seg_map_alloc_size = seg_map_size; + + // Init the index. + cm->seg_map_idx = 0; + cm->prev_seg_map_idx = 1; + + cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; + if (!cm->frame_parallel_decode) + cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; + + return 0; +} + +static void free_seg_map(VP9_COMMON *cm) { + int i; + + for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { + vpx_free(cm->seg_map_array[i]); + cm->seg_map_array[i] = NULL; + } + + cm->current_frame_seg_map = NULL; + + if (!cm->frame_parallel_decode) { + cm->last_frame_seg_map = NULL; + } +} + +void vp9_free_ref_frame_buffers(BufferPool *pool) { + int i; + + for (i = 0; i < FRAME_BUFFERS; ++i) { + if (pool->frame_bufs[i].ref_count > 0 && + pool->frame_bufs[i].raw_frame_buffer.data != NULL) { + pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer); + pool->frame_bufs[i].ref_count = 0; + } + vpx_free(pool->frame_bufs[i].mvs); + pool->frame_bufs[i].mvs = NULL; + vpx_free_frame_buffer(&pool->frame_bufs[i].buf); + } +} + +void vp9_free_postproc_buffers(VP9_COMMON *cm) { +#if CONFIG_VP9_POSTPROC + vpx_free_frame_buffer(&cm->post_proc_buffer); + vpx_free_frame_buffer(&cm->post_proc_buffer_int); +#else + (void)cm; +#endif +} + +void vp9_free_context_buffers(VP9_COMMON *cm) { + cm->free_mi(cm); + free_seg_map(cm); + vpx_free(cm->above_context); + cm->above_context = NULL; + vpx_free(cm->above_seg_context); + cm->above_seg_context = NULL; + vpx_free(cm->lf.lfm); + cm->lf.lfm = NULL; +} + + +int vp9_alloc_loop_filter(VP9_COMMON *cm) { + vpx_free(cm->lf.lfm); + // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The + // stride and rows are rounded up / truncated to a multiple of 8. + cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3; + cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc( + ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride, + sizeof(*cm->lf.lfm)); + if (!cm->lf.lfm) + return 1; + return 0; +} + +int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { + int new_mi_size; + + vp9_set_mb_mi(cm, width, height); + new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows); + if (cm->mi_alloc_size < new_mi_size) { + cm->free_mi(cm); + if (cm->alloc_mi(cm, new_mi_size)) + goto fail; + } + + if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) { + // Create the segmentation map structure and set to 0. + free_seg_map(cm); + if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) + goto fail; + } + + if (cm->above_context_alloc_cols < cm->mi_cols) { + vpx_free(cm->above_context); + cm->above_context = (ENTROPY_CONTEXT *)vpx_calloc( + 2 * mi_cols_aligned_to_sb(cm->mi_cols) * MAX_MB_PLANE, + sizeof(*cm->above_context)); + if (!cm->above_context) goto fail; + + vpx_free(cm->above_seg_context); + cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc( + mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context)); + if (!cm->above_seg_context) goto fail; + cm->above_context_alloc_cols = cm->mi_cols; + } + + if (vp9_alloc_loop_filter(cm)) + goto fail; + + return 0; + + fail: + vp9_free_context_buffers(cm); + return 1; +} + +void vp9_remove_common(VP9_COMMON *cm) { + vp9_free_context_buffers(cm); + + vpx_free(cm->fc); + cm->fc = NULL; + vpx_free(cm->frame_contexts); + cm->frame_contexts = NULL; +} + +void vp9_init_context_buffers(VP9_COMMON *cm) { + cm->setup_mi(cm); + if (cm->last_frame_seg_map && !cm->frame_parallel_decode) + memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols); +} + +void vp9_swap_current_and_last_seg_map(VP9_COMMON *cm) { + // Swap indices. + const int tmp = cm->seg_map_idx; + cm->seg_map_idx = cm->prev_seg_map_idx; + cm->prev_seg_map_idx = tmp; + + cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; + cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_alloccommon.h b/thirdparty/libvpx/vp9/common/vp9_alloccommon.h new file mode 100644 index 00000000000..e53955b9988 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_alloccommon.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_ +#define VP9_COMMON_VP9_ALLOCCOMMON_H_ + +#define INVALID_IDX -1 // Invalid buffer index. + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9Common; +struct BufferPool; + +void vp9_remove_common(struct VP9Common *cm); + +int vp9_alloc_loop_filter(struct VP9Common *cm); +int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height); +void vp9_init_context_buffers(struct VP9Common *cm); +void vp9_free_context_buffers(struct VP9Common *cm); + +void vp9_free_ref_frame_buffers(struct BufferPool *pool); +void vp9_free_postproc_buffers(struct VP9Common *cm); + +int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height); +void vp9_free_state_buffers(struct VP9Common *cm); + +void vp9_set_mb_mi(struct VP9Common *cm, int width, int height); + +void vp9_swap_current_and_last_seg_map(struct VP9Common *cm); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ALLOCCOMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_blockd.c b/thirdparty/libvpx/vp9/common/vp9_blockd.c new file mode 100644 index 00000000000..7bab27d4fd0 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_blockd.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_blockd.h" + +PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, + const MODE_INFO *left_mi, int b) { + if (b == 0 || b == 2) { + if (!left_mi || is_inter_block(left_mi)) + return DC_PRED; + + return get_y_mode(left_mi, b + 1); + } else { + assert(b == 1 || b == 3); + return cur_mi->bmi[b - 1].as_mode; + } +} + +PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, + const MODE_INFO *above_mi, int b) { + if (b == 0 || b == 1) { + if (!above_mi || is_inter_block(above_mi)) + return DC_PRED; + + return get_y_mode(above_mi, b + 2); + } else { + assert(b == 2 || b == 3); + return cur_mi->bmi[b - 2].as_mode; + } +} + +void vp9_foreach_transformed_block_in_plane( + const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, + foreach_transformed_block_visitor visit, void *arg) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const MODE_INFO* mi = xd->mi[0]; + // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") + // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 + // transform size varies per plane, look it up in a common way. + const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) + : mi->tx_size; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int step = 1 << (tx_size << 1); + int i = 0, r, c; + + // If mb_to_right_edge is < 0 we are in a situation in which + // the current block size extends into the UMV and we won't + // visit the sub blocks that are wholly within the UMV. + const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : + xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : + xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + const int extra_step = ((num_4x4_w - max_blocks_wide) >> tx_size) * step; + + // Keep track of the row and column of the blocks we use so that we know + // if we are in the unrestricted motion border. + for (r = 0; r < max_blocks_high; r += (1 << tx_size)) { + // Skip visiting the sub blocks that are wholly within the UMV. + for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) { + visit(plane, i, plane_bsize, tx_size, arg); + i += step; + } + i += extra_step; + } +} + +void vp9_foreach_transformed_block(const MACROBLOCKD* const xd, + BLOCK_SIZE bsize, + foreach_transformed_block_visitor visit, + void *arg) { + int plane; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) + vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg); +} + +void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, + int aoff, int loff) { + ENTROPY_CONTEXT *const a = pd->above_context + aoff; + ENTROPY_CONTEXT *const l = pd->left_context + loff; + const int tx_size_in_blocks = 1 << tx_size; + + // above + if (has_eob && xd->mb_to_right_edge < 0) { + int i; + const int blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize] + + (xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + int above_contexts = tx_size_in_blocks; + if (above_contexts + aoff > blocks_wide) + above_contexts = blocks_wide - aoff; + + for (i = 0; i < above_contexts; ++i) + a[i] = has_eob; + for (i = above_contexts; i < tx_size_in_blocks; ++i) + a[i] = 0; + } else { + memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + } + + // left + if (has_eob && xd->mb_to_bottom_edge < 0) { + int i; + const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] + + (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + int left_contexts = tx_size_in_blocks; + if (left_contexts + loff > blocks_high) + left_contexts = blocks_high - loff; + + for (i = 0; i < left_contexts; ++i) + l[i] = has_eob; + for (i = left_contexts; i < tx_size_in_blocks; ++i) + l[i] = 0; + } else { + memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + } +} + +void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) { + int i; + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].subsampling_x = i ? ss_x : 0; + xd->plane[i].subsampling_y = i ? ss_y : 0; + } +} diff --git a/thirdparty/libvpx/vp9/common/vp9_blockd.h b/thirdparty/libvpx/vp9/common/vp9_blockd.h new file mode 100644 index 00000000000..3d26fb2b5d0 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_blockd.h @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_COMMON_VP9_BLOCKD_H_ +#define VP9_COMMON_VP9_BLOCKD_H_ + +#include "./vpx_config.h" + +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_ports/mem.h" +#include "vpx_scale/yv12config.h" + +#include "vp9/common/vp9_common_data.h" +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_mv.h" +#include "vp9/common/vp9_scale.h" +#include "vp9/common/vp9_seg_common.h" +#include "vp9/common/vp9_tile_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_MB_PLANE 3 + +typedef enum { + KEY_FRAME = 0, + INTER_FRAME = 1, + FRAME_TYPES, +} FRAME_TYPE; + +static INLINE int is_inter_mode(PREDICTION_MODE mode) { + return mode >= NEARESTMV && mode <= NEWMV; +} + +/* For keyframes, intra block modes are predicted by the (already decoded) + modes for the Y blocks to the left and above us; for interframes, there + is a single probability table. */ + +typedef struct { + PREDICTION_MODE as_mode; + int_mv as_mv[2]; // first, second inter predictor motion vectors +} b_mode_info; + +// Note that the rate-distortion optimization loop, bit-stream writer, and +// decoder implementation modules critically rely on the defined entry values +// specified herein. They should be refactored concurrently. + +#define NONE -1 +#define INTRA_FRAME 0 +#define LAST_FRAME 1 +#define GOLDEN_FRAME 2 +#define ALTREF_FRAME 3 +#define MAX_REF_FRAMES 4 +typedef int8_t MV_REFERENCE_FRAME; + +// This structure now relates to 8x8 block regions. +typedef struct MODE_INFO { + // Common for both INTER and INTRA blocks + BLOCK_SIZE sb_type; + PREDICTION_MODE mode; + TX_SIZE tx_size; + int8_t skip; + int8_t segment_id; + int8_t seg_id_predicted; // valid only when temporal_update is enabled + + // Only for INTRA blocks + PREDICTION_MODE uv_mode; + + // Only for INTER blocks + INTERP_FILTER interp_filter; + MV_REFERENCE_FRAME ref_frame[2]; + + // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead. + int_mv mv[2]; + + b_mode_info bmi[4]; +} MODE_INFO; + +static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) { + return mi->sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode + : mi->mode; +} + +static INLINE int is_inter_block(const MODE_INFO *mi) { + return mi->ref_frame[0] > INTRA_FRAME; +} + +static INLINE int has_second_ref(const MODE_INFO *mi) { + return mi->ref_frame[1] > INTRA_FRAME; +} + +PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, + const MODE_INFO *left_mi, int b); + +PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, + const MODE_INFO *above_mi, int b); + +enum mv_precision { + MV_PRECISION_Q3, + MV_PRECISION_Q4 +}; + +struct buf_2d { + uint8_t *buf; + int stride; +}; + +struct macroblockd_plane { + tran_low_t *dqcoeff; + int subsampling_x; + int subsampling_y; + struct buf_2d dst; + struct buf_2d pre[2]; + ENTROPY_CONTEXT *above_context; + ENTROPY_CONTEXT *left_context; + int16_t seg_dequant[MAX_SEGMENTS][2]; + + // number of 4x4s in current block + uint16_t n4_w, n4_h; + // log2 of n4_w, n4_h + uint8_t n4_wl, n4_hl; + + // encoder + const int16_t *dequant; +}; + +#define BLOCK_OFFSET(x, i) ((x) + (i) * 16) + +typedef struct RefBuffer { + // TODO(dkovalev): idx is not really required and should be removed, now it + // is used in vp9_onyxd_if.c + int idx; + YV12_BUFFER_CONFIG *buf; + struct scale_factors sf; +} RefBuffer; + +typedef struct macroblockd { + struct macroblockd_plane plane[MAX_MB_PLANE]; + uint8_t bmode_blocks_wl; + uint8_t bmode_blocks_hl; + + FRAME_COUNTS *counts; + TileInfo tile; + + int mi_stride; + + MODE_INFO **mi; + MODE_INFO *left_mi; + MODE_INFO *above_mi; + + unsigned int max_blocks_wide; + unsigned int max_blocks_high; + + const vpx_prob (*partition_probs)[PARTITION_TYPES - 1]; + + /* Distance of MB away from frame edges */ + int mb_to_left_edge; + int mb_to_right_edge; + int mb_to_top_edge; + int mb_to_bottom_edge; + + FRAME_CONTEXT *fc; + + /* pointers to reference frames */ + RefBuffer *block_refs[2]; + + /* pointer to current frame */ + const YV12_BUFFER_CONFIG *cur_buf; + + ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; + ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; + + PARTITION_CONTEXT *above_seg_context; + PARTITION_CONTEXT left_seg_context[8]; + +#if CONFIG_VP9_HIGHBITDEPTH + /* Bit depth: 8, 10, 12 */ + int bd; +#endif + + int lossless; + int corrupted; + + struct vpx_internal_error_info *error_info; +} MACROBLOCKD; + +static INLINE PLANE_TYPE get_plane_type(int plane) { + return (PLANE_TYPE)(plane > 0); +} + +static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, + PARTITION_TYPE partition) { + return subsize_lookup[partition][bsize]; +} + +extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES]; + +static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, + const MACROBLOCKD *xd) { + const MODE_INFO *const mi = xd->mi[0]; + + if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) + return DCT_DCT; + + return intra_mode_to_tx_type_lookup[mi->mode]; +} + +static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type, + const MACROBLOCKD *xd, int ib) { + const MODE_INFO *const mi = xd->mi[0]; + + if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) + return DCT_DCT; + + return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)]; +} + +void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y); + +static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize, + int xss, int yss) { + if (bsize < BLOCK_8X8) { + return TX_4X4; + } else { + const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss]; + return VPXMIN(y_tx_size, max_txsize_lookup[plane_bsize]); + } +} + +static INLINE TX_SIZE get_uv_tx_size(const MODE_INFO *mi, + const struct macroblockd_plane *pd) { + return get_uv_tx_size_impl(mi->tx_size, mi->sb_type, pd->subsampling_x, + pd->subsampling_y); +} + +static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, + const struct macroblockd_plane *pd) { + return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y]; +} + +static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) { + struct macroblockd_plane *const pd = &xd->plane[i]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + memset(pd->above_context, 0, + sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]); + memset(pd->left_context, 0, + sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]); + } +} + +static INLINE const vpx_prob *get_y_mode_probs(const MODE_INFO *mi, + const MODE_INFO *above_mi, + const MODE_INFO *left_mi, + int block) { + const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block); + const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block); + return vp9_kf_y_mode_prob[above][left]; +} + +typedef void (*foreach_transformed_block_visitor)(int plane, int block, + BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, + void *arg); + +void vp9_foreach_transformed_block_in_plane( + const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, + foreach_transformed_block_visitor visit, void *arg); + + +void vp9_foreach_transformed_block( + const MACROBLOCKD* const xd, BLOCK_SIZE bsize, + foreach_transformed_block_visitor visit, void *arg); + +static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, int block, + int *x, int *y) { + const int bwl = b_width_log2_lookup[plane_bsize]; + const int tx_cols_log2 = bwl - tx_size; + const int tx_cols = 1 << tx_cols_log2; + const int raster_mb = block >> (tx_size << 1); + *x = (raster_mb & (tx_cols - 1)) << tx_size; + *y = (raster_mb >> tx_cols_log2) << tx_size; +} + +void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, + int aoff, int loff); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_common.h b/thirdparty/libvpx/vp9/common/vp9_common.h new file mode 100644 index 00000000000..908fa80a31c --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_common.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_COMMON_H_ +#define VP9_COMMON_VP9_COMMON_H_ + +/* Interface header for common constant data structures and lookup tables */ + +#include + +#include "./vpx_config.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx/vpx_integer.h" +#include "vpx_ports/bitops.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Only need this for fixed-size arrays, for structs just assign. +#define vp9_copy(dest, src) { \ + assert(sizeof(dest) == sizeof(src)); \ + memcpy(dest, src, sizeof(src)); \ + } + +// Use this for variably-sized arrays. +#define vp9_copy_array(dest, src, n) { \ + assert(sizeof(*dest) == sizeof(*src)); \ + memcpy(dest, src, n * sizeof(*src)); \ + } + +#define vp9_zero(dest) memset(&(dest), 0, sizeof(dest)) +#define vp9_zero_array(dest, n) memset(dest, 0, n * sizeof(*dest)) + +static INLINE int get_unsigned_bits(unsigned int num_values) { + return num_values > 0 ? get_msb(num_values) + 1 : 0; +} + +#if CONFIG_DEBUG +#define CHECK_MEM_ERROR(cm, lval, expr) do { \ + lval = (expr); \ + if (!lval) \ + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ + "Failed to allocate "#lval" at %s:%d", \ + __FILE__, __LINE__); \ + } while (0) +#else +#define CHECK_MEM_ERROR(cm, lval, expr) do { \ + lval = (expr); \ + if (!lval) \ + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ + "Failed to allocate "#lval); \ + } while (0) +#endif + +#define VP9_SYNC_CODE_0 0x49 +#define VP9_SYNC_CODE_1 0x83 +#define VP9_SYNC_CODE_2 0x42 + +#define VP9_FRAME_MARKER 0x2 + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_common_data.c b/thirdparty/libvpx/vp9/common/vp9_common_data.c new file mode 100644 index 00000000000..3409d04844b --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_common_data.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_common_data.h" +#include "vpx_dsp/vpx_dsp_common.h" + +// Log 2 conversion lookup tables for block width and height +const uint8_t b_width_log2_lookup[BLOCK_SIZES] = + {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4}; +const uint8_t b_height_log2_lookup[BLOCK_SIZES] = + {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4}; +const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = + {1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16}; +const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = + {1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16}; +// Log 2 conversion lookup tables for modeinfo width and height +const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = + {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}; +const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = + {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8}; +const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = + {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8}; + +// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize))) +const uint8_t size_group_lookup[BLOCK_SIZES] = + {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3}; + +const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = + {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12}; + +const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = { + { // 4X4 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID + }, { // 8X8 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID + }, { // 16X16 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID + }, { // 32X32 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, + PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID + }, { // 64X64 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, + PARTITION_NONE + } +}; + +const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = { + { // PARTITION_NONE + BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, + BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, + BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, + BLOCK_64X64, + }, { // PARTITION_HORZ + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_64X32, + }, { // PARTITION_VERT + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_32X64, + }, { // PARTITION_SPLIT + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_32X32, + } +}; + +const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = { + TX_4X4, TX_4X4, TX_4X4, + TX_8X8, TX_8X8, TX_8X8, + TX_16X16, TX_16X16, TX_16X16, + TX_32X32, TX_32X32, TX_32X32, TX_32X32 +}; + +const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = { + BLOCK_4X4, // TX_4X4 + BLOCK_8X8, // TX_8X8 + BLOCK_16X16, // TX_16X16 + BLOCK_32X32, // TX_32X32 +}; + +const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = { + TX_4X4, // ONLY_4X4 + TX_8X8, // ALLOW_8X8 + TX_16X16, // ALLOW_16X16 + TX_32X32, // ALLOW_32X32 + TX_32X32, // TX_MODE_SELECT +}; + +const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = { +// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 +// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 + {{BLOCK_4X4, BLOCK_INVALID}, {BLOCK_INVALID, BLOCK_INVALID}}, + {{BLOCK_4X8, BLOCK_4X4}, {BLOCK_INVALID, BLOCK_INVALID}}, + {{BLOCK_8X4, BLOCK_INVALID}, {BLOCK_4X4, BLOCK_INVALID}}, + {{BLOCK_8X8, BLOCK_8X4}, {BLOCK_4X8, BLOCK_4X4}}, + {{BLOCK_8X16, BLOCK_8X8}, {BLOCK_INVALID, BLOCK_4X8}}, + {{BLOCK_16X8, BLOCK_INVALID}, {BLOCK_8X8, BLOCK_8X4}}, + {{BLOCK_16X16, BLOCK_16X8}, {BLOCK_8X16, BLOCK_8X8}}, + {{BLOCK_16X32, BLOCK_16X16}, {BLOCK_INVALID, BLOCK_8X16}}, + {{BLOCK_32X16, BLOCK_INVALID}, {BLOCK_16X16, BLOCK_16X8}}, + {{BLOCK_32X32, BLOCK_32X16}, {BLOCK_16X32, BLOCK_16X16}}, + {{BLOCK_32X64, BLOCK_32X32}, {BLOCK_INVALID, BLOCK_16X32}}, + {{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32, BLOCK_32X16}}, + {{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}}, +}; + +// Generates 4 bit field in which each bit set to 1 represents +// a blocksize partition 1111 means we split 64x64, 32x32, 16x16 +// and 8x8. 1000 means we just split the 64x64 to 32x32 +const struct { + PARTITION_CONTEXT above; + PARTITION_CONTEXT left; +} partition_context_lookup[BLOCK_SIZES]= { + {15, 15}, // 4X4 - {0b1111, 0b1111} + {15, 14}, // 4X8 - {0b1111, 0b1110} + {14, 15}, // 8X4 - {0b1110, 0b1111} + {14, 14}, // 8X8 - {0b1110, 0b1110} + {14, 12}, // 8X16 - {0b1110, 0b1100} + {12, 14}, // 16X8 - {0b1100, 0b1110} + {12, 12}, // 16X16 - {0b1100, 0b1100} + {12, 8 }, // 16X32 - {0b1100, 0b1000} + {8, 12}, // 32X16 - {0b1000, 0b1100} + {8, 8 }, // 32X32 - {0b1000, 0b1000} + {8, 0 }, // 32X64 - {0b1000, 0b0000} + {0, 8 }, // 64X32 - {0b0000, 0b1000} + {0, 0 }, // 64X64 - {0b0000, 0b0000} +}; + +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH +const uint8_t need_top_left[INTRA_MODES] = { + 0, // DC_PRED + 0, // V_PRED + 0, // H_PRED + 0, // D45_PRED + 1, // D135_PRED + 1, // D117_PRED + 1, // D153_PRED + 0, // D207_PRED + 0, // D63_PRED + 1, // TM_PRED +}; +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vp9/common/vp9_common_data.h b/thirdparty/libvpx/vp9/common/vp9_common_data.h new file mode 100644 index 00000000000..0ae24dad549 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_common_data.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_COMMON_DATA_H_ +#define VP9_COMMON_VP9_COMMON_DATA_H_ + +#include "vp9/common/vp9_enums.h" +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const uint8_t b_width_log2_lookup[BLOCK_SIZES]; +extern const uint8_t b_height_log2_lookup[BLOCK_SIZES]; +extern const uint8_t mi_width_log2_lookup[BLOCK_SIZES]; +extern const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES]; +extern const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES]; +extern const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES]; +extern const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES]; +extern const uint8_t size_group_lookup[BLOCK_SIZES]; +extern const uint8_t num_pels_log2_lookup[BLOCK_SIZES]; +extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZES]; +extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES]; +extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES]; +extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES]; +extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES]; +extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2]; +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH +extern const uint8_t need_top_left[INTRA_MODES]; +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_COMMON_DATA_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_debugmodes.c b/thirdparty/libvpx/vp9/common/vp9_debugmodes.c new file mode 100644 index 00000000000..d9c1fd96861 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_debugmodes.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_onyxc_int.h" + +static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) { + fprintf(f, "%s", str); + fprintf(f, "(Frame %d, Show:%d, Q:%d): \n", cm->current_video_frame, + cm->show_frame, cm->base_qindex); +} +/* This function dereferences a pointer to the mbmi structure + * and uses the passed in member offset to print out the value of an integer + * for each mbmi member value in the mi structure. + */ +static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, + size_t member_offset) { + int mi_row, mi_col; + MODE_INFO **mi = cm->mi_grid_visible; + int rows = cm->mi_rows; + int cols = cm->mi_cols; + char prefix = descriptor[0]; + + log_frame_info(cm, descriptor, file); + for (mi_row = 0; mi_row < rows; mi_row++) { + fprintf(file, "%c ", prefix); + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(file, "%2d ", + *((int*) ((char *) (mi[0]) + + member_offset))); + mi++; + } + fprintf(file, "\n"); + mi += 8; + } + fprintf(file, "\n"); +} + +void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { + int mi_row; + int mi_col; + FILE *mvs = fopen(file, "a"); + MODE_INFO **mi = cm->mi_grid_visible; + int rows = cm->mi_rows; + int cols = cm->mi_cols; + + print_mi_data(cm, mvs, "Partitions:", offsetof(MODE_INFO, sb_type)); + print_mi_data(cm, mvs, "Modes:", offsetof(MODE_INFO, mode)); + print_mi_data(cm, mvs, "Ref frame:", offsetof(MODE_INFO, ref_frame[0])); + print_mi_data(cm, mvs, "Transform:", offsetof(MODE_INFO, tx_size)); + print_mi_data(cm, mvs, "UV Modes:", offsetof(MODE_INFO, uv_mode)); + + // output skip infomation. + log_frame_info(cm, "Skips:", mvs); + for (mi_row = 0; mi_row < rows; mi_row++) { + fprintf(mvs, "S "); + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%2d ", mi[0]->skip); + mi++; + } + fprintf(mvs, "\n"); + mi += 8; + } + fprintf(mvs, "\n"); + + // output motion vectors. + log_frame_info(cm, "Vectors ", mvs); + mi = cm->mi_grid_visible; + for (mi_row = 0; mi_row < rows; mi_row++) { + fprintf(mvs, "V "); + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row, + mi[0]->mv[0].as_mv.col); + mi++; + } + fprintf(mvs, "\n"); + mi += 8; + } + fprintf(mvs, "\n"); + + fclose(mvs); +} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropy.c b/thirdparty/libvpx/vp9/common/vp9_entropy.c new file mode 100644 index 00000000000..7b490af34fd --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropy.c @@ -0,0 +1,802 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_entropymode.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx/vpx_integer.h" + +// Unconstrained Node Tree +const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = { + 2, 6, // 0 = LOW_VAL + -TWO_TOKEN, 4, // 1 = TWO + -THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE + 8, 10, // 3 = HIGH_LOW + -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE + 12, 14, // 5 = CAT_THREEFOUR + -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE + -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE +}; + +const vpx_prob vp9_cat1_prob[] = { 159 }; +const vpx_prob vp9_cat2_prob[] = { 165, 145 }; +const vpx_prob vp9_cat3_prob[] = { 173, 148, 140 }; +const vpx_prob vp9_cat4_prob[] = { 176, 155, 140, 135 }; +const vpx_prob vp9_cat5_prob[] = { 180, 157, 141, 134, 130 }; +const vpx_prob vp9_cat6_prob[] = { + 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 +}; +#if CONFIG_VP9_HIGHBITDEPTH +const vpx_prob vp9_cat6_prob_high12[] = { + 255, 255, 255, 255, 254, 254, 254, 252, 249, + 243, 230, 196, 177, 153, 140, 133, 130, 129 +}; +#endif + +const uint8_t vp9_coefband_trans_8x8plus[1024] = { + 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 5, + // beyond MAXBAND_INDEX+1 all values are filled as 5 + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +}; + +const uint8_t vp9_coefband_trans_4x4[16] = { + 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, +}; + +const uint8_t vp9_pt_energy_class[ENTROPY_TOKENS] = { + 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5 +}; + +// Model obtained from a 2-sided zero-centerd distribuition derived +// from a Pareto distribution. The cdf of the distribution is: +// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta] +// +// For a given beta and a given probablity of the 1-node, the alpha +// is first solved, and then the {alpha, beta} pair is used to generate +// the probabilities for the rest of the nodes. + +// beta = 8 + +// Every odd line in this table can be generated from the even lines +// by averaging : +// vp9_pareto8_full[l][node] = (vp9_pareto8_full[l-1][node] + +// vp9_pareto8_full[l+1][node] ) >> 1; +const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = { + { 3, 86, 128, 6, 86, 23, 88, 29}, + { 6, 86, 128, 11, 87, 42, 91, 52}, + { 9, 86, 129, 17, 88, 61, 94, 76}, + { 12, 86, 129, 22, 88, 77, 97, 93}, + { 15, 87, 129, 28, 89, 93, 100, 110}, + { 17, 87, 129, 33, 90, 105, 103, 123}, + { 20, 88, 130, 38, 91, 118, 106, 136}, + { 23, 88, 130, 43, 91, 128, 108, 146}, + { 26, 89, 131, 48, 92, 139, 111, 156}, + { 28, 89, 131, 53, 93, 147, 114, 163}, + { 31, 90, 131, 58, 94, 156, 117, 171}, + { 34, 90, 131, 62, 94, 163, 119, 177}, + { 37, 90, 132, 66, 95, 171, 122, 184}, + { 39, 90, 132, 70, 96, 177, 124, 189}, + { 42, 91, 132, 75, 97, 183, 127, 194}, + { 44, 91, 132, 79, 97, 188, 129, 198}, + { 47, 92, 133, 83, 98, 193, 132, 202}, + { 49, 92, 133, 86, 99, 197, 134, 205}, + { 52, 93, 133, 90, 100, 201, 137, 208}, + { 54, 93, 133, 94, 100, 204, 139, 211}, + { 57, 94, 134, 98, 101, 208, 142, 214}, + { 59, 94, 134, 101, 102, 211, 144, 216}, + { 62, 94, 135, 105, 103, 214, 146, 218}, + { 64, 94, 135, 108, 103, 216, 148, 220}, + { 66, 95, 135, 111, 104, 219, 151, 222}, + { 68, 95, 135, 114, 105, 221, 153, 223}, + { 71, 96, 136, 117, 106, 224, 155, 225}, + { 73, 96, 136, 120, 106, 225, 157, 226}, + { 76, 97, 136, 123, 107, 227, 159, 228}, + { 78, 97, 136, 126, 108, 229, 160, 229}, + { 80, 98, 137, 129, 109, 231, 162, 231}, + { 82, 98, 137, 131, 109, 232, 164, 232}, + { 84, 98, 138, 134, 110, 234, 166, 233}, + { 86, 98, 138, 137, 111, 235, 168, 234}, + { 89, 99, 138, 140, 112, 236, 170, 235}, + { 91, 99, 138, 142, 112, 237, 171, 235}, + { 93, 100, 139, 145, 113, 238, 173, 236}, + { 95, 100, 139, 147, 114, 239, 174, 237}, + { 97, 101, 140, 149, 115, 240, 176, 238}, + { 99, 101, 140, 151, 115, 241, 177, 238}, + {101, 102, 140, 154, 116, 242, 179, 239}, + {103, 102, 140, 156, 117, 242, 180, 239}, + {105, 103, 141, 158, 118, 243, 182, 240}, + {107, 103, 141, 160, 118, 243, 183, 240}, + {109, 104, 141, 162, 119, 244, 185, 241}, + {111, 104, 141, 164, 119, 244, 186, 241}, + {113, 104, 142, 166, 120, 245, 187, 242}, + {114, 104, 142, 168, 121, 245, 188, 242}, + {116, 105, 143, 170, 122, 246, 190, 243}, + {118, 105, 143, 171, 122, 246, 191, 243}, + {120, 106, 143, 173, 123, 247, 192, 244}, + {121, 106, 143, 175, 124, 247, 193, 244}, + {123, 107, 144, 177, 125, 248, 195, 244}, + {125, 107, 144, 178, 125, 248, 196, 244}, + {127, 108, 145, 180, 126, 249, 197, 245}, + {128, 108, 145, 181, 127, 249, 198, 245}, + {130, 109, 145, 183, 128, 249, 199, 245}, + {132, 109, 145, 184, 128, 249, 200, 245}, + {134, 110, 146, 186, 129, 250, 201, 246}, + {135, 110, 146, 187, 130, 250, 202, 246}, + {137, 111, 147, 189, 131, 251, 203, 246}, + {138, 111, 147, 190, 131, 251, 204, 246}, + {140, 112, 147, 192, 132, 251, 205, 247}, + {141, 112, 147, 193, 132, 251, 206, 247}, + {143, 113, 148, 194, 133, 251, 207, 247}, + {144, 113, 148, 195, 134, 251, 207, 247}, + {146, 114, 149, 197, 135, 252, 208, 248}, + {147, 114, 149, 198, 135, 252, 209, 248}, + {149, 115, 149, 199, 136, 252, 210, 248}, + {150, 115, 149, 200, 137, 252, 210, 248}, + {152, 115, 150, 201, 138, 252, 211, 248}, + {153, 115, 150, 202, 138, 252, 212, 248}, + {155, 116, 151, 204, 139, 253, 213, 249}, + {156, 116, 151, 205, 139, 253, 213, 249}, + {158, 117, 151, 206, 140, 253, 214, 249}, + {159, 117, 151, 207, 141, 253, 215, 249}, + {161, 118, 152, 208, 142, 253, 216, 249}, + {162, 118, 152, 209, 142, 253, 216, 249}, + {163, 119, 153, 210, 143, 253, 217, 249}, + {164, 119, 153, 211, 143, 253, 217, 249}, + {166, 120, 153, 212, 144, 254, 218, 250}, + {167, 120, 153, 212, 145, 254, 219, 250}, + {168, 121, 154, 213, 146, 254, 220, 250}, + {169, 121, 154, 214, 146, 254, 220, 250}, + {171, 122, 155, 215, 147, 254, 221, 250}, + {172, 122, 155, 216, 147, 254, 221, 250}, + {173, 123, 155, 217, 148, 254, 222, 250}, + {174, 123, 155, 217, 149, 254, 222, 250}, + {176, 124, 156, 218, 150, 254, 223, 250}, + {177, 124, 156, 219, 150, 254, 223, 250}, + {178, 125, 157, 220, 151, 254, 224, 251}, + {179, 125, 157, 220, 151, 254, 224, 251}, + {180, 126, 157, 221, 152, 254, 225, 251}, + {181, 126, 157, 221, 152, 254, 225, 251}, + {183, 127, 158, 222, 153, 254, 226, 251}, + {184, 127, 158, 223, 154, 254, 226, 251}, + {185, 128, 159, 224, 155, 255, 227, 251}, + {186, 128, 159, 224, 155, 255, 227, 251}, + {187, 129, 160, 225, 156, 255, 228, 251}, + {188, 130, 160, 225, 156, 255, 228, 251}, + {189, 131, 160, 226, 157, 255, 228, 251}, + {190, 131, 160, 226, 158, 255, 228, 251}, + {191, 132, 161, 227, 159, 255, 229, 251}, + {192, 132, 161, 227, 159, 255, 229, 251}, + {193, 133, 162, 228, 160, 255, 230, 252}, + {194, 133, 162, 229, 160, 255, 230, 252}, + {195, 134, 163, 230, 161, 255, 231, 252}, + {196, 134, 163, 230, 161, 255, 231, 252}, + {197, 135, 163, 231, 162, 255, 231, 252}, + {198, 135, 163, 231, 162, 255, 231, 252}, + {199, 136, 164, 232, 163, 255, 232, 252}, + {200, 136, 164, 232, 164, 255, 232, 252}, + {201, 137, 165, 233, 165, 255, 233, 252}, + {201, 137, 165, 233, 165, 255, 233, 252}, + {202, 138, 166, 233, 166, 255, 233, 252}, + {203, 138, 166, 233, 166, 255, 233, 252}, + {204, 139, 166, 234, 167, 255, 234, 252}, + {205, 139, 166, 234, 167, 255, 234, 252}, + {206, 140, 167, 235, 168, 255, 235, 252}, + {206, 140, 167, 235, 168, 255, 235, 252}, + {207, 141, 168, 236, 169, 255, 235, 252}, + {208, 141, 168, 236, 170, 255, 235, 252}, + {209, 142, 169, 237, 171, 255, 236, 252}, + {209, 143, 169, 237, 171, 255, 236, 252}, + {210, 144, 169, 237, 172, 255, 236, 252}, + {211, 144, 169, 237, 172, 255, 236, 252}, + {212, 145, 170, 238, 173, 255, 237, 252}, + {213, 145, 170, 238, 173, 255, 237, 252}, + {214, 146, 171, 239, 174, 255, 237, 253}, + {214, 146, 171, 239, 174, 255, 237, 253}, + {215, 147, 172, 240, 175, 255, 238, 253}, + {215, 147, 172, 240, 175, 255, 238, 253}, + {216, 148, 173, 240, 176, 255, 238, 253}, + {217, 148, 173, 240, 176, 255, 238, 253}, + {218, 149, 173, 241, 177, 255, 239, 253}, + {218, 149, 173, 241, 178, 255, 239, 253}, + {219, 150, 174, 241, 179, 255, 239, 253}, + {219, 151, 174, 241, 179, 255, 239, 253}, + {220, 152, 175, 242, 180, 255, 240, 253}, + {221, 152, 175, 242, 180, 255, 240, 253}, + {222, 153, 176, 242, 181, 255, 240, 253}, + {222, 153, 176, 242, 181, 255, 240, 253}, + {223, 154, 177, 243, 182, 255, 240, 253}, + {223, 154, 177, 243, 182, 255, 240, 253}, + {224, 155, 178, 244, 183, 255, 241, 253}, + {224, 155, 178, 244, 183, 255, 241, 253}, + {225, 156, 178, 244, 184, 255, 241, 253}, + {225, 157, 178, 244, 184, 255, 241, 253}, + {226, 158, 179, 244, 185, 255, 242, 253}, + {227, 158, 179, 244, 185, 255, 242, 253}, + {228, 159, 180, 245, 186, 255, 242, 253}, + {228, 159, 180, 245, 186, 255, 242, 253}, + {229, 160, 181, 245, 187, 255, 242, 253}, + {229, 160, 181, 245, 187, 255, 242, 253}, + {230, 161, 182, 246, 188, 255, 243, 253}, + {230, 162, 182, 246, 188, 255, 243, 253}, + {231, 163, 183, 246, 189, 255, 243, 253}, + {231, 163, 183, 246, 189, 255, 243, 253}, + {232, 164, 184, 247, 190, 255, 243, 253}, + {232, 164, 184, 247, 190, 255, 243, 253}, + {233, 165, 185, 247, 191, 255, 244, 253}, + {233, 165, 185, 247, 191, 255, 244, 253}, + {234, 166, 185, 247, 192, 255, 244, 253}, + {234, 167, 185, 247, 192, 255, 244, 253}, + {235, 168, 186, 248, 193, 255, 244, 253}, + {235, 168, 186, 248, 193, 255, 244, 253}, + {236, 169, 187, 248, 194, 255, 244, 253}, + {236, 169, 187, 248, 194, 255, 244, 253}, + {236, 170, 188, 248, 195, 255, 245, 253}, + {236, 170, 188, 248, 195, 255, 245, 253}, + {237, 171, 189, 249, 196, 255, 245, 254}, + {237, 172, 189, 249, 196, 255, 245, 254}, + {238, 173, 190, 249, 197, 255, 245, 254}, + {238, 173, 190, 249, 197, 255, 245, 254}, + {239, 174, 191, 249, 198, 255, 245, 254}, + {239, 174, 191, 249, 198, 255, 245, 254}, + {240, 175, 192, 249, 199, 255, 246, 254}, + {240, 176, 192, 249, 199, 255, 246, 254}, + {240, 177, 193, 250, 200, 255, 246, 254}, + {240, 177, 193, 250, 200, 255, 246, 254}, + {241, 178, 194, 250, 201, 255, 246, 254}, + {241, 178, 194, 250, 201, 255, 246, 254}, + {242, 179, 195, 250, 202, 255, 246, 254}, + {242, 180, 195, 250, 202, 255, 246, 254}, + {242, 181, 196, 250, 203, 255, 247, 254}, + {242, 181, 196, 250, 203, 255, 247, 254}, + {243, 182, 197, 251, 204, 255, 247, 254}, + {243, 183, 197, 251, 204, 255, 247, 254}, + {244, 184, 198, 251, 205, 255, 247, 254}, + {244, 184, 198, 251, 205, 255, 247, 254}, + {244, 185, 199, 251, 206, 255, 247, 254}, + {244, 185, 199, 251, 206, 255, 247, 254}, + {245, 186, 200, 251, 207, 255, 247, 254}, + {245, 187, 200, 251, 207, 255, 247, 254}, + {246, 188, 201, 252, 207, 255, 248, 254}, + {246, 188, 201, 252, 207, 255, 248, 254}, + {246, 189, 202, 252, 208, 255, 248, 254}, + {246, 190, 202, 252, 208, 255, 248, 254}, + {247, 191, 203, 252, 209, 255, 248, 254}, + {247, 191, 203, 252, 209, 255, 248, 254}, + {247, 192, 204, 252, 210, 255, 248, 254}, + {247, 193, 204, 252, 210, 255, 248, 254}, + {248, 194, 205, 252, 211, 255, 248, 254}, + {248, 194, 205, 252, 211, 255, 248, 254}, + {248, 195, 206, 252, 212, 255, 249, 254}, + {248, 196, 206, 252, 212, 255, 249, 254}, + {249, 197, 207, 253, 213, 255, 249, 254}, + {249, 197, 207, 253, 213, 255, 249, 254}, + {249, 198, 208, 253, 214, 255, 249, 254}, + {249, 199, 209, 253, 214, 255, 249, 254}, + {250, 200, 210, 253, 215, 255, 249, 254}, + {250, 200, 210, 253, 215, 255, 249, 254}, + {250, 201, 211, 253, 215, 255, 249, 254}, + {250, 202, 211, 253, 215, 255, 249, 254}, + {250, 203, 212, 253, 216, 255, 249, 254}, + {250, 203, 212, 253, 216, 255, 249, 254}, + {251, 204, 213, 253, 217, 255, 250, 254}, + {251, 205, 213, 253, 217, 255, 250, 254}, + {251, 206, 214, 254, 218, 255, 250, 254}, + {251, 206, 215, 254, 218, 255, 250, 254}, + {252, 207, 216, 254, 219, 255, 250, 254}, + {252, 208, 216, 254, 219, 255, 250, 254}, + {252, 209, 217, 254, 220, 255, 250, 254}, + {252, 210, 217, 254, 220, 255, 250, 254}, + {252, 211, 218, 254, 221, 255, 250, 254}, + {252, 212, 218, 254, 221, 255, 250, 254}, + {253, 213, 219, 254, 222, 255, 250, 254}, + {253, 213, 220, 254, 222, 255, 250, 254}, + {253, 214, 221, 254, 223, 255, 250, 254}, + {253, 215, 221, 254, 223, 255, 250, 254}, + {253, 216, 222, 254, 224, 255, 251, 254}, + {253, 217, 223, 254, 224, 255, 251, 254}, + {253, 218, 224, 254, 225, 255, 251, 254}, + {253, 219, 224, 254, 225, 255, 251, 254}, + {254, 220, 225, 254, 225, 255, 251, 254}, + {254, 221, 226, 254, 225, 255, 251, 254}, + {254, 222, 227, 255, 226, 255, 251, 254}, + {254, 223, 227, 255, 226, 255, 251, 254}, + {254, 224, 228, 255, 227, 255, 251, 254}, + {254, 225, 229, 255, 227, 255, 251, 254}, + {254, 226, 230, 255, 228, 255, 251, 254}, + {254, 227, 230, 255, 229, 255, 251, 254}, + {255, 228, 231, 255, 230, 255, 251, 254}, + {255, 229, 232, 255, 230, 255, 251, 254}, + {255, 230, 233, 255, 231, 255, 252, 254}, + {255, 231, 234, 255, 231, 255, 252, 254}, + {255, 232, 235, 255, 232, 255, 252, 254}, + {255, 233, 236, 255, 232, 255, 252, 254}, + {255, 235, 237, 255, 233, 255, 252, 254}, + {255, 236, 238, 255, 234, 255, 252, 254}, + {255, 238, 240, 255, 235, 255, 252, 255}, + {255, 239, 241, 255, 235, 255, 252, 254}, + {255, 241, 243, 255, 236, 255, 252, 254}, + {255, 243, 245, 255, 237, 255, 252, 254}, + {255, 246, 247, 255, 239, 255, 253, 255}, +}; + +static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 195, 29, 183 }, { 84, 49, 136 }, { 8, 42, 71 } + }, { // Band 1 + { 31, 107, 169 }, { 35, 99, 159 }, { 17, 82, 140 }, + { 8, 66, 114 }, { 2, 44, 76 }, { 1, 19, 32 } + }, { // Band 2 + { 40, 132, 201 }, { 29, 114, 187 }, { 13, 91, 157 }, + { 7, 75, 127 }, { 3, 58, 95 }, { 1, 28, 47 } + }, { // Band 3 + { 69, 142, 221 }, { 42, 122, 201 }, { 15, 91, 159 }, + { 6, 67, 121 }, { 1, 42, 77 }, { 1, 17, 31 } + }, { // Band 4 + { 102, 148, 228 }, { 67, 117, 204 }, { 17, 82, 154 }, + { 6, 59, 114 }, { 2, 39, 75 }, { 1, 15, 29 } + }, { // Band 5 + { 156, 57, 233 }, { 119, 57, 212 }, { 58, 48, 163 }, + { 29, 40, 124 }, { 12, 30, 81 }, { 3, 12, 31 } + } + }, { // Inter + { // Band 0 + { 191, 107, 226 }, { 124, 117, 204 }, { 25, 99, 155 } + }, { // Band 1 + { 29, 148, 210 }, { 37, 126, 194 }, { 8, 93, 157 }, + { 2, 68, 118 }, { 1, 39, 69 }, { 1, 17, 33 } + }, { // Band 2 + { 41, 151, 213 }, { 27, 123, 193 }, { 3, 82, 144 }, + { 1, 58, 105 }, { 1, 32, 60 }, { 1, 13, 26 } + }, { // Band 3 + { 59, 159, 220 }, { 23, 126, 198 }, { 4, 88, 151 }, + { 1, 66, 114 }, { 1, 38, 71 }, { 1, 18, 34 } + }, { // Band 4 + { 114, 136, 232 }, { 51, 114, 207 }, { 11, 83, 155 }, + { 3, 56, 105 }, { 1, 33, 65 }, { 1, 17, 34 } + }, { // Band 5 + { 149, 65, 234 }, { 121, 57, 215 }, { 61, 49, 166 }, + { 28, 36, 114 }, { 12, 25, 76 }, { 3, 16, 42 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 214, 49, 220 }, { 132, 63, 188 }, { 42, 65, 137 } + }, { // Band 1 + { 85, 137, 221 }, { 104, 131, 216 }, { 49, 111, 192 }, + { 21, 87, 155 }, { 2, 49, 87 }, { 1, 16, 28 } + }, { // Band 2 + { 89, 163, 230 }, { 90, 137, 220 }, { 29, 100, 183 }, + { 10, 70, 135 }, { 2, 42, 81 }, { 1, 17, 33 } + }, { // Band 3 + { 108, 167, 237 }, { 55, 133, 222 }, { 15, 97, 179 }, + { 4, 72, 135 }, { 1, 45, 85 }, { 1, 19, 38 } + }, { // Band 4 + { 124, 146, 240 }, { 66, 124, 224 }, { 17, 88, 175 }, + { 4, 58, 122 }, { 1, 36, 75 }, { 1, 18, 37 } + }, { // Band 5 + { 141, 79, 241 }, { 126, 70, 227 }, { 66, 58, 182 }, + { 30, 44, 136 }, { 12, 34, 96 }, { 2, 20, 47 } + } + }, { // Inter + { // Band 0 + { 229, 99, 249 }, { 143, 111, 235 }, { 46, 109, 192 } + }, { // Band 1 + { 82, 158, 236 }, { 94, 146, 224 }, { 25, 117, 191 }, + { 9, 87, 149 }, { 3, 56, 99 }, { 1, 33, 57 } + }, { // Band 2 + { 83, 167, 237 }, { 68, 145, 222 }, { 10, 103, 177 }, + { 2, 72, 131 }, { 1, 41, 79 }, { 1, 20, 39 } + }, { // Band 3 + { 99, 167, 239 }, { 47, 141, 224 }, { 10, 104, 178 }, + { 2, 73, 133 }, { 1, 44, 85 }, { 1, 22, 47 } + }, { // Band 4 + { 127, 145, 243 }, { 71, 129, 228 }, { 17, 93, 177 }, + { 3, 61, 124 }, { 1, 41, 84 }, { 1, 21, 52 } + }, { // Band 5 + { 157, 78, 244 }, { 140, 72, 231 }, { 69, 58, 184 }, + { 31, 44, 137 }, { 14, 38, 105 }, { 8, 23, 61 } + } + } + } +}; + +static const vp9_coeff_probs_model default_coef_probs_8x8[PLANE_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 125, 34, 187 }, { 52, 41, 133 }, { 6, 31, 56 } + }, { // Band 1 + { 37, 109, 153 }, { 51, 102, 147 }, { 23, 87, 128 }, + { 8, 67, 101 }, { 1, 41, 63 }, { 1, 19, 29 } + }, { // Band 2 + { 31, 154, 185 }, { 17, 127, 175 }, { 6, 96, 145 }, + { 2, 73, 114 }, { 1, 51, 82 }, { 1, 28, 45 } + }, { // Band 3 + { 23, 163, 200 }, { 10, 131, 185 }, { 2, 93, 148 }, + { 1, 67, 111 }, { 1, 41, 69 }, { 1, 14, 24 } + }, { // Band 4 + { 29, 176, 217 }, { 12, 145, 201 }, { 3, 101, 156 }, + { 1, 69, 111 }, { 1, 39, 63 }, { 1, 14, 23 } + }, { // Band 5 + { 57, 192, 233 }, { 25, 154, 215 }, { 6, 109, 167 }, + { 3, 78, 118 }, { 1, 48, 69 }, { 1, 21, 29 } + } + }, { // Inter + { // Band 0 + { 202, 105, 245 }, { 108, 106, 216 }, { 18, 90, 144 } + }, { // Band 1 + { 33, 172, 219 }, { 64, 149, 206 }, { 14, 117, 177 }, + { 5, 90, 141 }, { 2, 61, 95 }, { 1, 37, 57 } + }, { // Band 2 + { 33, 179, 220 }, { 11, 140, 198 }, { 1, 89, 148 }, + { 1, 60, 104 }, { 1, 33, 57 }, { 1, 12, 21 } + }, { // Band 3 + { 30, 181, 221 }, { 8, 141, 198 }, { 1, 87, 145 }, + { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 20 } + }, { // Band 4 + { 32, 186, 224 }, { 7, 142, 198 }, { 1, 86, 143 }, + { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 22 } + }, { // Band 5 + { 57, 192, 227 }, { 20, 143, 204 }, { 3, 96, 154 }, + { 1, 68, 112 }, { 1, 42, 69 }, { 1, 19, 32 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 212, 35, 215 }, { 113, 47, 169 }, { 29, 48, 105 } + }, { // Band 1 + { 74, 129, 203 }, { 106, 120, 203 }, { 49, 107, 178 }, + { 19, 84, 144 }, { 4, 50, 84 }, { 1, 15, 25 } + }, { // Band 2 + { 71, 172, 217 }, { 44, 141, 209 }, { 15, 102, 173 }, + { 6, 76, 133 }, { 2, 51, 89 }, { 1, 24, 42 } + }, { // Band 3 + { 64, 185, 231 }, { 31, 148, 216 }, { 8, 103, 175 }, + { 3, 74, 131 }, { 1, 46, 81 }, { 1, 18, 30 } + }, { // Band 4 + { 65, 196, 235 }, { 25, 157, 221 }, { 5, 105, 174 }, + { 1, 67, 120 }, { 1, 38, 69 }, { 1, 15, 30 } + }, { // Band 5 + { 65, 204, 238 }, { 30, 156, 224 }, { 7, 107, 177 }, + { 2, 70, 124 }, { 1, 42, 73 }, { 1, 18, 34 } + } + }, { // Inter + { // Band 0 + { 225, 86, 251 }, { 144, 104, 235 }, { 42, 99, 181 } + }, { // Band 1 + { 85, 175, 239 }, { 112, 165, 229 }, { 29, 136, 200 }, + { 12, 103, 162 }, { 6, 77, 123 }, { 2, 53, 84 } + }, { // Band 2 + { 75, 183, 239 }, { 30, 155, 221 }, { 3, 106, 171 }, + { 1, 74, 128 }, { 1, 44, 76 }, { 1, 17, 28 } + }, { // Band 3 + { 73, 185, 240 }, { 27, 159, 222 }, { 2, 107, 172 }, + { 1, 75, 127 }, { 1, 42, 73 }, { 1, 17, 29 } + }, { // Band 4 + { 62, 190, 238 }, { 21, 159, 222 }, { 2, 107, 172 }, + { 1, 72, 122 }, { 1, 40, 71 }, { 1, 18, 32 } + }, { // Band 5 + { 61, 199, 240 }, { 27, 161, 226 }, { 4, 113, 180 }, + { 1, 76, 129 }, { 1, 46, 80 }, { 1, 23, 41 } + } + } + } +}; + +static const vp9_coeff_probs_model default_coef_probs_16x16[PLANE_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 7, 27, 153 }, { 5, 30, 95 }, { 1, 16, 30 } + }, { // Band 1 + { 50, 75, 127 }, { 57, 75, 124 }, { 27, 67, 108 }, + { 10, 54, 86 }, { 1, 33, 52 }, { 1, 12, 18 } + }, { // Band 2 + { 43, 125, 151 }, { 26, 108, 148 }, { 7, 83, 122 }, + { 2, 59, 89 }, { 1, 38, 60 }, { 1, 17, 27 } + }, { // Band 3 + { 23, 144, 163 }, { 13, 112, 154 }, { 2, 75, 117 }, + { 1, 50, 81 }, { 1, 31, 51 }, { 1, 14, 23 } + }, { // Band 4 + { 18, 162, 185 }, { 6, 123, 171 }, { 1, 78, 125 }, + { 1, 51, 86 }, { 1, 31, 54 }, { 1, 14, 23 } + }, { // Band 5 + { 15, 199, 227 }, { 3, 150, 204 }, { 1, 91, 146 }, + { 1, 55, 95 }, { 1, 30, 53 }, { 1, 11, 20 } + } + }, { // Inter + { // Band 0 + { 19, 55, 240 }, { 19, 59, 196 }, { 3, 52, 105 } + }, { // Band 1 + { 41, 166, 207 }, { 104, 153, 199 }, { 31, 123, 181 }, + { 14, 101, 152 }, { 5, 72, 106 }, { 1, 36, 52 } + }, { // Band 2 + { 35, 176, 211 }, { 12, 131, 190 }, { 2, 88, 144 }, + { 1, 60, 101 }, { 1, 36, 60 }, { 1, 16, 28 } + }, { // Band 3 + { 28, 183, 213 }, { 8, 134, 191 }, { 1, 86, 142 }, + { 1, 56, 96 }, { 1, 30, 53 }, { 1, 12, 20 } + }, { // Band 4 + { 20, 190, 215 }, { 4, 135, 192 }, { 1, 84, 139 }, + { 1, 53, 91 }, { 1, 28, 49 }, { 1, 11, 20 } + }, { // Band 5 + { 13, 196, 216 }, { 2, 137, 192 }, { 1, 86, 143 }, + { 1, 57, 99 }, { 1, 32, 56 }, { 1, 13, 24 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 211, 29, 217 }, { 96, 47, 156 }, { 22, 43, 87 } + }, { // Band 1 + { 78, 120, 193 }, { 111, 116, 186 }, { 46, 102, 164 }, + { 15, 80, 128 }, { 2, 49, 76 }, { 1, 18, 28 } + }, { // Band 2 + { 71, 161, 203 }, { 42, 132, 192 }, { 10, 98, 150 }, + { 3, 69, 109 }, { 1, 44, 70 }, { 1, 18, 29 } + }, { // Band 3 + { 57, 186, 211 }, { 30, 140, 196 }, { 4, 93, 146 }, + { 1, 62, 102 }, { 1, 38, 65 }, { 1, 16, 27 } + }, { // Band 4 + { 47, 199, 217 }, { 14, 145, 196 }, { 1, 88, 142 }, + { 1, 57, 98 }, { 1, 36, 62 }, { 1, 15, 26 } + }, { // Band 5 + { 26, 219, 229 }, { 5, 155, 207 }, { 1, 94, 151 }, + { 1, 60, 104 }, { 1, 36, 62 }, { 1, 16, 28 } + } + }, { // Inter + { // Band 0 + { 233, 29, 248 }, { 146, 47, 220 }, { 43, 52, 140 } + }, { // Band 1 + { 100, 163, 232 }, { 179, 161, 222 }, { 63, 142, 204 }, + { 37, 113, 174 }, { 26, 89, 137 }, { 18, 68, 97 } + }, { // Band 2 + { 85, 181, 230 }, { 32, 146, 209 }, { 7, 100, 164 }, + { 3, 71, 121 }, { 1, 45, 77 }, { 1, 18, 30 } + }, { // Band 3 + { 65, 187, 230 }, { 20, 148, 207 }, { 2, 97, 159 }, + { 1, 68, 116 }, { 1, 40, 70 }, { 1, 14, 29 } + }, { // Band 4 + { 40, 194, 227 }, { 8, 147, 204 }, { 1, 94, 155 }, + { 1, 65, 112 }, { 1, 39, 66 }, { 1, 14, 26 } + }, { // Band 5 + { 16, 208, 228 }, { 3, 151, 207 }, { 1, 98, 160 }, + { 1, 67, 117 }, { 1, 41, 74 }, { 1, 17, 31 } + } + } + } +}; + +static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 17, 38, 140 }, { 7, 34, 80 }, { 1, 17, 29 } + }, { // Band 1 + { 37, 75, 128 }, { 41, 76, 128 }, { 26, 66, 116 }, + { 12, 52, 94 }, { 2, 32, 55 }, { 1, 10, 16 } + }, { // Band 2 + { 50, 127, 154 }, { 37, 109, 152 }, { 16, 82, 121 }, + { 5, 59, 85 }, { 1, 35, 54 }, { 1, 13, 20 } + }, { // Band 3 + { 40, 142, 167 }, { 17, 110, 157 }, { 2, 71, 112 }, + { 1, 44, 72 }, { 1, 27, 45 }, { 1, 11, 17 } + }, { // Band 4 + { 30, 175, 188 }, { 9, 124, 169 }, { 1, 74, 116 }, + { 1, 48, 78 }, { 1, 30, 49 }, { 1, 11, 18 } + }, { // Band 5 + { 10, 222, 223 }, { 2, 150, 194 }, { 1, 83, 128 }, + { 1, 48, 79 }, { 1, 27, 45 }, { 1, 11, 17 } + } + }, { // Inter + { // Band 0 + { 36, 41, 235 }, { 29, 36, 193 }, { 10, 27, 111 } + }, { // Band 1 + { 85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 }, + { 57, 113, 168 }, { 23, 83, 120 }, { 10, 49, 61 } + }, { // Band 2 + { 85, 190, 223 }, { 36, 139, 200 }, { 5, 90, 146 }, + { 1, 60, 103 }, { 1, 38, 65 }, { 1, 18, 30 } + }, { // Band 3 + { 72, 202, 223 }, { 23, 141, 199 }, { 2, 86, 140 }, + { 1, 56, 97 }, { 1, 36, 61 }, { 1, 16, 27 } + }, { // Band 4 + { 55, 218, 225 }, { 13, 145, 200 }, { 1, 86, 141 }, + { 1, 57, 99 }, { 1, 35, 61 }, { 1, 13, 22 } + }, { // Band 5 + { 15, 235, 212 }, { 1, 132, 184 }, { 1, 84, 139 }, + { 1, 57, 97 }, { 1, 34, 56 }, { 1, 14, 23 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 181, 21, 201 }, { 61, 37, 123 }, { 10, 38, 71 } + }, { // Band 1 + { 47, 106, 172 }, { 95, 104, 173 }, { 42, 93, 159 }, + { 18, 77, 131 }, { 4, 50, 81 }, { 1, 17, 23 } + }, { // Band 2 + { 62, 147, 199 }, { 44, 130, 189 }, { 28, 102, 154 }, + { 18, 75, 115 }, { 2, 44, 65 }, { 1, 12, 19 } + }, { // Band 3 + { 55, 153, 210 }, { 24, 130, 194 }, { 3, 93, 146 }, + { 1, 61, 97 }, { 1, 31, 50 }, { 1, 10, 16 } + }, { // Band 4 + { 49, 186, 223 }, { 17, 148, 204 }, { 1, 96, 142 }, + { 1, 53, 83 }, { 1, 26, 44 }, { 1, 11, 17 } + }, { // Band 5 + { 13, 217, 212 }, { 2, 136, 180 }, { 1, 78, 124 }, + { 1, 50, 83 }, { 1, 29, 49 }, { 1, 14, 23 } + } + }, { // Inter + { // Band 0 + { 197, 13, 247 }, { 82, 17, 222 }, { 25, 17, 162 } + }, { // Band 1 + { 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 }, + { 104, 158, 220 }, { 66, 128, 186 }, { 55, 90, 137 } + }, { // Band 2 + { 111, 197, 242 }, { 46, 158, 219 }, { 9, 104, 171 }, + { 2, 65, 125 }, { 1, 44, 80 }, { 1, 17, 91 } + }, { // Band 3 + { 104, 208, 245 }, { 39, 168, 224 }, { 3, 109, 162 }, + { 1, 79, 124 }, { 1, 50, 102 }, { 1, 43, 102 } + }, { // Band 4 + { 84, 220, 246 }, { 31, 177, 231 }, { 2, 115, 180 }, + { 1, 79, 134 }, { 1, 55, 77 }, { 1, 60, 79 } + }, { // Band 5 + { 43, 243, 240 }, { 8, 180, 217 }, { 1, 115, 166 }, + { 1, 84, 121 }, { 1, 51, 67 }, { 1, 16, 6 } + } + } + } +}; + +static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) { + assert(p != 0); + memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob)); +} + +void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) { + if (full != model) + memcpy(full, model, sizeof(vpx_prob) * UNCONSTRAINED_NODES); + extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]); +} + +void vp9_default_coef_probs(VP9_COMMON *cm) { + vp9_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4); + vp9_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8); + vp9_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16); + vp9_copy(cm->fc->coef_probs[TX_32X32], default_coef_probs_32x32); +} + +#define COEF_COUNT_SAT 24 +#define COEF_MAX_UPDATE_FACTOR 112 +#define COEF_COUNT_SAT_KEY 24 +#define COEF_MAX_UPDATE_FACTOR_KEY 112 +#define COEF_COUNT_SAT_AFTER_KEY 24 +#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128 + +static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size, + unsigned int count_sat, + unsigned int update_factor) { + const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; + vp9_coeff_probs_model *const probs = cm->fc->coef_probs[tx_size]; + const vp9_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size]; + vp9_coeff_count_model *counts = cm->counts.coef[tx_size]; + unsigned int (*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = + cm->counts.eob_branch[tx_size]; + int i, j, k, l, m; + + for (i = 0; i < PLANE_TYPES; ++i) + for (j = 0; j < REF_TYPES; ++j) + for (k = 0; k < COEF_BANDS; ++k) + for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { + const int n0 = counts[i][j][k][l][ZERO_TOKEN]; + const int n1 = counts[i][j][k][l][ONE_TOKEN]; + const int n2 = counts[i][j][k][l][TWO_TOKEN]; + const int neob = counts[i][j][k][l][EOB_MODEL_TOKEN]; + const unsigned int branch_ct[UNCONSTRAINED_NODES][2] = { + { neob, eob_counts[i][j][k][l] - neob }, + { n0, n1 + n2 }, + { n1, n2 } + }; + for (m = 0; m < UNCONSTRAINED_NODES; ++m) + probs[i][j][k][l][m] = merge_probs(pre_probs[i][j][k][l][m], + branch_ct[m], + count_sat, update_factor); + } +} + +void vp9_adapt_coef_probs(VP9_COMMON *cm) { + TX_SIZE t; + unsigned int count_sat, update_factor; + + if (frame_is_intra_only(cm)) { + update_factor = COEF_MAX_UPDATE_FACTOR_KEY; + count_sat = COEF_COUNT_SAT_KEY; + } else if (cm->last_frame_type == KEY_FRAME) { + update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */ + count_sat = COEF_COUNT_SAT_AFTER_KEY; + } else { + update_factor = COEF_MAX_UPDATE_FACTOR; + count_sat = COEF_COUNT_SAT; + } + for (t = TX_4X4; t <= TX_32X32; t++) + adapt_coef_probs(cm, t, count_sat, update_factor); +} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropy.h b/thirdparty/libvpx/vp9/common/vp9_entropy.h new file mode 100644 index 00000000000..63b3bff5d91 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropy.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_ENTROPY_H_ +#define VP9_COMMON_VP9_ENTROPY_H_ + +#include "vpx/vpx_integer.h" +#include "vpx_dsp/prob.h" + +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_enums.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define DIFF_UPDATE_PROB 252 + +// Coefficient token alphabet +#define ZERO_TOKEN 0 // 0 Extra Bits 0+0 +#define ONE_TOKEN 1 // 1 Extra Bits 0+1 +#define TWO_TOKEN 2 // 2 Extra Bits 0+1 +#define THREE_TOKEN 3 // 3 Extra Bits 0+1 +#define FOUR_TOKEN 4 // 4 Extra Bits 0+1 +#define CATEGORY1_TOKEN 5 // 5-6 Extra Bits 1+1 +#define CATEGORY2_TOKEN 6 // 7-10 Extra Bits 2+1 +#define CATEGORY3_TOKEN 7 // 11-18 Extra Bits 3+1 +#define CATEGORY4_TOKEN 8 // 19-34 Extra Bits 4+1 +#define CATEGORY5_TOKEN 9 // 35-66 Extra Bits 5+1 +#define CATEGORY6_TOKEN 10 // 67+ Extra Bits 14+1 +#define EOB_TOKEN 11 // EOB Extra Bits 0+0 + +#define ENTROPY_TOKENS 12 + +#define ENTROPY_NODES 11 + +DECLARE_ALIGNED(16, extern const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]); + +#define CAT1_MIN_VAL 5 +#define CAT2_MIN_VAL 7 +#define CAT3_MIN_VAL 11 +#define CAT4_MIN_VAL 19 +#define CAT5_MIN_VAL 35 +#define CAT6_MIN_VAL 67 + +// Extra bit probabilities. +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob[1]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob[2]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob[3]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob[4]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob[5]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob[14]); + +#if CONFIG_VP9_HIGHBITDEPTH +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high10[1]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high10[2]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high10[3]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high10[4]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high10[5]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high10[16]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high12[1]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high12[2]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high12[3]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high12[4]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high12[5]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high12[18]); +#endif // CONFIG_VP9_HIGHBITDEPTH + +#define EOB_MODEL_TOKEN 3 + +#define DCT_MAX_VALUE 16384 +#if CONFIG_VP9_HIGHBITDEPTH +#define DCT_MAX_VALUE_HIGH10 65536 +#define DCT_MAX_VALUE_HIGH12 262144 +#endif // CONFIG_VP9_HIGHBITDEPTH + +/* Coefficients are predicted via a 3-dimensional probability table. */ + +#define REF_TYPES 2 // intra=0, inter=1 + +/* Middle dimension reflects the coefficient position within the transform. */ +#define COEF_BANDS 6 + +/* Inside dimension is measure of nearby complexity, that reflects the energy + of nearby coefficients are nonzero. For the first coefficient (DC, unless + block type is 0), we look at the (already encoded) blocks above and to the + left of the current block. The context index is then the number (0,1,or 2) + of these blocks having nonzero coefficients. + After decoding a coefficient, the measure is determined by the size of the + most recently decoded coefficient. + Note that the intuitive meaning of this measure changes as coefficients + are decoded, e.g., prior to the first token, a zero means that my neighbors + are empty while, after the first token, because of the use of end-of-block, + a zero means we just decoded a zero and hence guarantees that a non-zero + coefficient will appear later in this block. However, this shift + in meaning is perfectly OK because our context depends also on the + coefficient band (and since zigzag positions 0, 1, and 2 are in + distinct bands). */ + +#define COEFF_CONTEXTS 6 +#define BAND_COEFF_CONTEXTS(band) ((band) == 0 ? 3 : COEFF_CONTEXTS) + +// #define ENTROPY_STATS + +typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] + [ENTROPY_TOKENS]; +typedef unsigned int vp9_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] + [ENTROPY_NODES][2]; + +#define SUBEXP_PARAM 4 /* Subexponential code parameter */ +#define MODULUS_PARAM 13 /* Modulus parameter */ + +struct VP9Common; +void vp9_default_coef_probs(struct VP9Common *cm); +void vp9_adapt_coef_probs(struct VP9Common *cm); + +// This is the index in the scan order beyond which all coefficients for +// 8x8 transform and above are in the top band. +// This macro is currently unused but may be used by certain implementations +#define MAXBAND_INDEX 21 + +DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_8x8plus[1024]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_4x4[16]); + +static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { + return tx_size == TX_4X4 ? vp9_coefband_trans_4x4 + : vp9_coefband_trans_8x8plus; +} + +// 128 lists of probabilities are stored for the following ONE node probs: +// 1, 3, 5, 7, ..., 253, 255 +// In between probabilities are interpolated linearly + +#define COEFF_PROB_MODELS 255 + +#define UNCONSTRAINED_NODES 3 + +#define PIVOT_NODE 2 // which node is pivot + +#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES) +extern const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)]; +extern const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES]; + +typedef vpx_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS] + [COEFF_CONTEXTS][UNCONSTRAINED_NODES]; + +typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS] + [COEFF_CONTEXTS] + [UNCONSTRAINED_NODES + 1]; + +void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full); + +typedef char ENTROPY_CONTEXT; + +static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a, + ENTROPY_CONTEXT b) { + return (a != 0) + (b != 0); +} + +static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, + const ENTROPY_CONTEXT *l) { + ENTROPY_CONTEXT above_ec = 0, left_ec = 0; + + switch (tx_size) { + case TX_4X4: + above_ec = a[0] != 0; + left_ec = l[0] != 0; + break; + case TX_8X8: + above_ec = !!*(const uint16_t *)a; + left_ec = !!*(const uint16_t *)l; + break; + case TX_16X16: + above_ec = !!*(const uint32_t *)a; + left_ec = !!*(const uint32_t *)l; + break; + case TX_32X32: + above_ec = !!*(const uint64_t *)a; + left_ec = !!*(const uint64_t *)l; + break; + default: + assert(0 && "Invalid transform size."); + break; + } + + return combine_entropy_contexts(above_ec, left_ec); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymode.c b/thirdparty/libvpx/vp9/common/vp9_entropymode.c new file mode 100644 index 00000000000..670348bafd3 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropymode.c @@ -0,0 +1,469 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_mem/vpx_mem.h" + +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_seg_common.h" + +const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = { + { // above = dc + { 137, 30, 42, 148, 151, 207, 70, 52, 91 }, // left = dc + { 92, 45, 102, 136, 116, 180, 74, 90, 100 }, // left = v + { 73, 32, 19, 187, 222, 215, 46, 34, 100 }, // left = h + { 91, 30, 32, 116, 121, 186, 93, 86, 94 }, // left = d45 + { 72, 35, 36, 149, 68, 206, 68, 63, 105 }, // left = d135 + { 73, 31, 28, 138, 57, 124, 55, 122, 151 }, // left = d117 + { 67, 23, 21, 140, 126, 197, 40, 37, 171 }, // left = d153 + { 86, 27, 28, 128, 154, 212, 45, 43, 53 }, // left = d207 + { 74, 32, 27, 107, 86, 160, 63, 134, 102 }, // left = d63 + { 59, 67, 44, 140, 161, 202, 78, 67, 119 } // left = tm + }, { // above = v + { 63, 36, 126, 146, 123, 158, 60, 90, 96 }, // left = dc + { 43, 46, 168, 134, 107, 128, 69, 142, 92 }, // left = v + { 44, 29, 68, 159, 201, 177, 50, 57, 77 }, // left = h + { 58, 38, 76, 114, 97, 172, 78, 133, 92 }, // left = d45 + { 46, 41, 76, 140, 63, 184, 69, 112, 57 }, // left = d135 + { 38, 32, 85, 140, 46, 112, 54, 151, 133 }, // left = d117 + { 39, 27, 61, 131, 110, 175, 44, 75, 136 }, // left = d153 + { 52, 30, 74, 113, 130, 175, 51, 64, 58 }, // left = d207 + { 47, 35, 80, 100, 74, 143, 64, 163, 74 }, // left = d63 + { 36, 61, 116, 114, 128, 162, 80, 125, 82 } // left = tm + }, { // above = h + { 82, 26, 26, 171, 208, 204, 44, 32, 105 }, // left = dc + { 55, 44, 68, 166, 179, 192, 57, 57, 108 }, // left = v + { 42, 26, 11, 199, 241, 228, 23, 15, 85 }, // left = h + { 68, 42, 19, 131, 160, 199, 55, 52, 83 }, // left = d45 + { 58, 50, 25, 139, 115, 232, 39, 52, 118 }, // left = d135 + { 50, 35, 33, 153, 104, 162, 64, 59, 131 }, // left = d117 + { 44, 24, 16, 150, 177, 202, 33, 19, 156 }, // left = d153 + { 55, 27, 12, 153, 203, 218, 26, 27, 49 }, // left = d207 + { 53, 49, 21, 110, 116, 168, 59, 80, 76 }, // left = d63 + { 38, 72, 19, 168, 203, 212, 50, 50, 107 } // left = tm + }, { // above = d45 + { 103, 26, 36, 129, 132, 201, 83, 80, 93 }, // left = dc + { 59, 38, 83, 112, 103, 162, 98, 136, 90 }, // left = v + { 62, 30, 23, 158, 200, 207, 59, 57, 50 }, // left = h + { 67, 30, 29, 84, 86, 191, 102, 91, 59 }, // left = d45 + { 60, 32, 33, 112, 71, 220, 64, 89, 104 }, // left = d135 + { 53, 26, 34, 130, 56, 149, 84, 120, 103 }, // left = d117 + { 53, 21, 23, 133, 109, 210, 56, 77, 172 }, // left = d153 + { 77, 19, 29, 112, 142, 228, 55, 66, 36 }, // left = d207 + { 61, 29, 29, 93, 97, 165, 83, 175, 162 }, // left = d63 + { 47, 47, 43, 114, 137, 181, 100, 99, 95 } // left = tm + }, { // above = d135 + { 69, 23, 29, 128, 83, 199, 46, 44, 101 }, // left = dc + { 53, 40, 55, 139, 69, 183, 61, 80, 110 }, // left = v + { 40, 29, 19, 161, 180, 207, 43, 24, 91 }, // left = h + { 60, 34, 19, 105, 61, 198, 53, 64, 89 }, // left = d45 + { 52, 31, 22, 158, 40, 209, 58, 62, 89 }, // left = d135 + { 44, 31, 29, 147, 46, 158, 56, 102, 198 }, // left = d117 + { 35, 19, 12, 135, 87, 209, 41, 45, 167 }, // left = d153 + { 55, 25, 21, 118, 95, 215, 38, 39, 66 }, // left = d207 + { 51, 38, 25, 113, 58, 164, 70, 93, 97 }, // left = d63 + { 47, 54, 34, 146, 108, 203, 72, 103, 151 } // left = tm + }, { // above = d117 + { 64, 19, 37, 156, 66, 138, 49, 95, 133 }, // left = dc + { 46, 27, 80, 150, 55, 124, 55, 121, 135 }, // left = v + { 36, 23, 27, 165, 149, 166, 54, 64, 118 }, // left = h + { 53, 21, 36, 131, 63, 163, 60, 109, 81 }, // left = d45 + { 40, 26, 35, 154, 40, 185, 51, 97, 123 }, // left = d135 + { 35, 19, 34, 179, 19, 97, 48, 129, 124 }, // left = d117 + { 36, 20, 26, 136, 62, 164, 33, 77, 154 }, // left = d153 + { 45, 18, 32, 130, 90, 157, 40, 79, 91 }, // left = d207 + { 45, 26, 28, 129, 45, 129, 49, 147, 123 }, // left = d63 + { 38, 44, 51, 136, 74, 162, 57, 97, 121 } // left = tm + }, { // above = d153 + { 75, 17, 22, 136, 138, 185, 32, 34, 166 }, // left = dc + { 56, 39, 58, 133, 117, 173, 48, 53, 187 }, // left = v + { 35, 21, 12, 161, 212, 207, 20, 23, 145 }, // left = h + { 56, 29, 19, 117, 109, 181, 55, 68, 112 }, // left = d45 + { 47, 29, 17, 153, 64, 220, 59, 51, 114 }, // left = d135 + { 46, 16, 24, 136, 76, 147, 41, 64, 172 }, // left = d117 + { 34, 17, 11, 108, 152, 187, 13, 15, 209 }, // left = d153 + { 51, 24, 14, 115, 133, 209, 32, 26, 104 }, // left = d207 + { 55, 30, 18, 122, 79, 179, 44, 88, 116 }, // left = d63 + { 37, 49, 25, 129, 168, 164, 41, 54, 148 } // left = tm + }, { // above = d207 + { 82, 22, 32, 127, 143, 213, 39, 41, 70 }, // left = dc + { 62, 44, 61, 123, 105, 189, 48, 57, 64 }, // left = v + { 47, 25, 17, 175, 222, 220, 24, 30, 86 }, // left = h + { 68, 36, 17, 106, 102, 206, 59, 74, 74 }, // left = d45 + { 57, 39, 23, 151, 68, 216, 55, 63, 58 }, // left = d135 + { 49, 30, 35, 141, 70, 168, 82, 40, 115 }, // left = d117 + { 51, 25, 15, 136, 129, 202, 38, 35, 139 }, // left = d153 + { 68, 26, 16, 111, 141, 215, 29, 28, 28 }, // left = d207 + { 59, 39, 19, 114, 75, 180, 77, 104, 42 }, // left = d63 + { 40, 61, 26, 126, 152, 206, 61, 59, 93 } // left = tm + }, { // above = d63 + { 78, 23, 39, 111, 117, 170, 74, 124, 94 }, // left = dc + { 48, 34, 86, 101, 92, 146, 78, 179, 134 }, // left = v + { 47, 22, 24, 138, 187, 178, 68, 69, 59 }, // left = h + { 56, 25, 33, 105, 112, 187, 95, 177, 129 }, // left = d45 + { 48, 31, 27, 114, 63, 183, 82, 116, 56 }, // left = d135 + { 43, 28, 37, 121, 63, 123, 61, 192, 169 }, // left = d117 + { 42, 17, 24, 109, 97, 177, 56, 76, 122 }, // left = d153 + { 58, 18, 28, 105, 139, 182, 70, 92, 63 }, // left = d207 + { 46, 23, 32, 74, 86, 150, 67, 183, 88 }, // left = d63 + { 36, 38, 48, 92, 122, 165, 88, 137, 91 } // left = tm + }, { // above = tm + { 65, 70, 60, 155, 159, 199, 61, 60, 81 }, // left = dc + { 44, 78, 115, 132, 119, 173, 71, 112, 93 }, // left = v + { 39, 38, 21, 184, 227, 206, 42, 32, 64 }, // left = h + { 58, 47, 36, 124, 137, 193, 80, 82, 78 }, // left = d45 + { 49, 50, 35, 144, 95, 205, 63, 78, 59 }, // left = d135 + { 41, 53, 52, 148, 71, 142, 65, 128, 51 }, // left = d117 + { 40, 36, 28, 143, 143, 202, 40, 55, 137 }, // left = d153 + { 52, 34, 29, 129, 183, 227, 42, 35, 43 }, // left = d207 + { 42, 44, 44, 104, 105, 164, 64, 130, 80 }, // left = d63 + { 43, 81, 53, 140, 169, 204, 68, 84, 72 } // left = tm + } +}; + +const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = { + { 144, 11, 54, 157, 195, 130, 46, 58, 108 }, // y = dc + { 118, 15, 123, 148, 131, 101, 44, 93, 131 }, // y = v + { 113, 12, 23, 188, 226, 142, 26, 32, 125 }, // y = h + { 120, 11, 50, 123, 163, 135, 64, 77, 103 }, // y = d45 + { 113, 9, 36, 155, 111, 157, 32, 44, 161 }, // y = d135 + { 116, 9, 55, 176, 76, 96, 37, 61, 149 }, // y = d117 + { 115, 9, 28, 141, 161, 167, 21, 25, 193 }, // y = d153 + { 120, 12, 32, 145, 195, 142, 32, 38, 86 }, // y = d207 + { 116, 12, 64, 120, 140, 125, 49, 115, 121 }, // y = d63 + { 102, 19, 66, 162, 182, 122, 35, 59, 128 } // y = tm +}; + +static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = { + { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8 + { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16 + { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32 + { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32 +}; + +static const vpx_prob default_if_uv_probs[INTRA_MODES][INTRA_MODES - 1] = { + { 120, 7, 76, 176, 208, 126, 28, 54, 103 }, // y = dc + { 48, 12, 154, 155, 139, 90, 34, 117, 119 }, // y = v + { 67, 6, 25, 204, 243, 158, 13, 21, 96 }, // y = h + { 97, 5, 44, 131, 176, 139, 48, 68, 97 }, // y = d45 + { 83, 5, 42, 156, 111, 152, 26, 49, 152 }, // y = d135 + { 80, 5, 58, 178, 74, 83, 33, 62, 145 }, // y = d117 + { 86, 5, 32, 154, 192, 168, 14, 22, 163 }, // y = d153 + { 85, 5, 32, 156, 216, 148, 19, 29, 73 }, // y = d207 + { 77, 7, 64, 116, 132, 122, 37, 126, 120 }, // y = d63 + { 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm +}; + +const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] + [PARTITION_TYPES - 1] = { + // 8x8 -> 4x4 + { 158, 97, 94 }, // a/l both not split + { 93, 24, 99 }, // a split, l not split + { 85, 119, 44 }, // l split, a not split + { 62, 59, 67 }, // a/l both split + // 16x16 -> 8x8 + { 149, 53, 53 }, // a/l both not split + { 94, 20, 48 }, // a split, l not split + { 83, 53, 24 }, // l split, a not split + { 52, 18, 18 }, // a/l both split + // 32x32 -> 16x16 + { 150, 40, 39 }, // a/l both not split + { 78, 12, 26 }, // a split, l not split + { 67, 33, 11 }, // l split, a not split + { 24, 7, 5 }, // a/l both split + // 64x64 -> 32x32 + { 174, 35, 49 }, // a/l both not split + { 68, 11, 27 }, // a split, l not split + { 57, 15, 9 }, // l split, a not split + { 12, 3, 3 }, // a/l both split +}; + +static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] + [PARTITION_TYPES - 1] = { + // 8x8 -> 4x4 + { 199, 122, 141 }, // a/l both not split + { 147, 63, 159 }, // a split, l not split + { 148, 133, 118 }, // l split, a not split + { 121, 104, 114 }, // a/l both split + // 16x16 -> 8x8 + { 174, 73, 87 }, // a/l both not split + { 92, 41, 83 }, // a split, l not split + { 82, 99, 50 }, // l split, a not split + { 53, 39, 39 }, // a/l both split + // 32x32 -> 16x16 + { 177, 58, 59 }, // a/l both not split + { 68, 26, 63 }, // a split, l not split + { 52, 79, 25 }, // l split, a not split + { 17, 14, 12 }, // a/l both split + // 64x64 -> 32x32 + { 222, 34, 30 }, // a/l both not split + { 72, 16, 44 }, // a split, l not split + { 58, 32, 12 }, // l split, a not split + { 10, 7, 6 }, // a/l both split +}; + +static const vpx_prob default_inter_mode_probs[INTER_MODE_CONTEXTS] + [INTER_MODES - 1] = { + {2, 173, 34}, // 0 = both zero mv + {7, 145, 85}, // 1 = one zero mv + one a predicted mv + {7, 166, 63}, // 2 = two predicted mvs + {7, 94, 66}, // 3 = one predicted/zero and one new mv + {8, 64, 46}, // 4 = two new mvs + {17, 81, 31}, // 5 = one intra neighbour + x + {25, 29, 30}, // 6 = two intra neighbours +}; + +/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ +const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = { + -DC_PRED, 2, /* 0 = DC_NODE */ + -TM_PRED, 4, /* 1 = TM_NODE */ + -V_PRED, 6, /* 2 = V_NODE */ + 8, 12, /* 3 = COM_NODE */ + -H_PRED, 10, /* 4 = H_NODE */ + -D135_PRED, -D117_PRED, /* 5 = D135_NODE */ + -D45_PRED, 14, /* 6 = D45_NODE */ + -D63_PRED, 16, /* 7 = D63_NODE */ + -D153_PRED, -D207_PRED /* 8 = D153_NODE */ +}; + +const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = { + -INTER_OFFSET(ZEROMV), 2, + -INTER_OFFSET(NEARESTMV), 4, + -INTER_OFFSET(NEARMV), -INTER_OFFSET(NEWMV) +}; + +const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = { + -PARTITION_NONE, 2, + -PARTITION_HORZ, 4, + -PARTITION_VERT, -PARTITION_SPLIT +}; + +static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { + 9, 102, 187, 225 +}; + +static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { + 239, 183, 119, 96, 41 +}; + +static const vpx_prob default_comp_ref_p[REF_CONTEXTS] = { + 50, 126, 123, 221, 226 +}; + +static const vpx_prob default_single_ref_p[REF_CONTEXTS][2] = { + { 33, 16 }, + { 77, 74 }, + { 142, 142 }, + { 172, 170 }, + { 238, 247 } +}; + +static const struct tx_probs default_tx_probs = { + { { 3, 136, 37 }, + { 5, 52, 13 } }, + + { { 20, 152 }, + { 15, 101 } }, + + { { 100 }, + { 66 } } +}; + +void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, + unsigned int (*ct_32x32p)[2]) { + ct_32x32p[0][0] = tx_count_32x32p[TX_4X4]; + ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + + tx_count_32x32p[TX_16X16] + + tx_count_32x32p[TX_32X32]; + ct_32x32p[1][0] = tx_count_32x32p[TX_8X8]; + ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + + tx_count_32x32p[TX_32X32]; + ct_32x32p[2][0] = tx_count_32x32p[TX_16X16]; + ct_32x32p[2][1] = tx_count_32x32p[TX_32X32]; +} + +void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, + unsigned int (*ct_16x16p)[2]) { + ct_16x16p[0][0] = tx_count_16x16p[TX_4X4]; + ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16]; + ct_16x16p[1][0] = tx_count_16x16p[TX_8X8]; + ct_16x16p[1][1] = tx_count_16x16p[TX_16X16]; +} + +void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, + unsigned int (*ct_8x8p)[2]) { + ct_8x8p[0][0] = tx_count_8x8p[TX_4X4]; + ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; +} + +static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = { + 192, 128, 64 +}; + +static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] + [SWITCHABLE_FILTERS - 1] = { + { 235, 162, }, + { 36, 255, }, + { 34, 3, }, + { 149, 144, }, +}; + +static void init_mode_probs(FRAME_CONTEXT *fc) { + vp9_copy(fc->uv_mode_prob, default_if_uv_probs); + vp9_copy(fc->y_mode_prob, default_if_y_probs); + vp9_copy(fc->switchable_interp_prob, default_switchable_interp_prob); + vp9_copy(fc->partition_prob, default_partition_probs); + vp9_copy(fc->intra_inter_prob, default_intra_inter_p); + vp9_copy(fc->comp_inter_prob, default_comp_inter_p); + vp9_copy(fc->comp_ref_prob, default_comp_ref_p); + vp9_copy(fc->single_ref_prob, default_single_ref_p); + fc->tx_probs = default_tx_probs; + vp9_copy(fc->skip_probs, default_skip_probs); + vp9_copy(fc->inter_mode_probs, default_inter_mode_probs); +} + +const vpx_tree_index vp9_switchable_interp_tree + [TREE_SIZE(SWITCHABLE_FILTERS)] = { + -EIGHTTAP, 2, + -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP +}; + +void vp9_adapt_mode_probs(VP9_COMMON *cm) { + int i, j; + FRAME_CONTEXT *fc = cm->fc; + const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; + const FRAME_COUNTS *counts = &cm->counts; + + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) + fc->intra_inter_prob[i] = mode_mv_merge_probs(pre_fc->intra_inter_prob[i], + counts->intra_inter[i]); + for (i = 0; i < COMP_INTER_CONTEXTS; i++) + fc->comp_inter_prob[i] = mode_mv_merge_probs(pre_fc->comp_inter_prob[i], + counts->comp_inter[i]); + for (i = 0; i < REF_CONTEXTS; i++) + fc->comp_ref_prob[i] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i], + counts->comp_ref[i]); + for (i = 0; i < REF_CONTEXTS; i++) + for (j = 0; j < 2; j++) + fc->single_ref_prob[i][j] = mode_mv_merge_probs( + pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]); + + for (i = 0; i < INTER_MODE_CONTEXTS; i++) + vpx_tree_merge_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i], + counts->inter_mode[i], fc->inter_mode_probs[i]); + + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) + vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i], + counts->y_mode[i], fc->y_mode_prob[i]); + + for (i = 0; i < INTRA_MODES; ++i) + vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i], + counts->uv_mode[i], fc->uv_mode_prob[i]); + + for (i = 0; i < PARTITION_CONTEXTS; i++) + vpx_tree_merge_probs(vp9_partition_tree, pre_fc->partition_prob[i], + counts->partition[i], fc->partition_prob[i]); + + if (cm->interp_filter == SWITCHABLE) { + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) + vpx_tree_merge_probs(vp9_switchable_interp_tree, + pre_fc->switchable_interp_prob[i], + counts->switchable_interp[i], + fc->switchable_interp_prob[i]); + } + + if (cm->tx_mode == TX_MODE_SELECT) { + int j; + unsigned int branch_ct_8x8p[TX_SIZES - 3][2]; + unsigned int branch_ct_16x16p[TX_SIZES - 2][2]; + unsigned int branch_ct_32x32p[TX_SIZES - 1][2]; + + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { + tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p); + for (j = 0; j < TX_SIZES - 3; ++j) + fc->tx_probs.p8x8[i][j] = mode_mv_merge_probs( + pre_fc->tx_probs.p8x8[i][j], branch_ct_8x8p[j]); + + tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p); + for (j = 0; j < TX_SIZES - 2; ++j) + fc->tx_probs.p16x16[i][j] = mode_mv_merge_probs( + pre_fc->tx_probs.p16x16[i][j], branch_ct_16x16p[j]); + + tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p); + for (j = 0; j < TX_SIZES - 1; ++j) + fc->tx_probs.p32x32[i][j] = mode_mv_merge_probs( + pre_fc->tx_probs.p32x32[i][j], branch_ct_32x32p[j]); + } + } + + for (i = 0; i < SKIP_CONTEXTS; ++i) + fc->skip_probs[i] = mode_mv_merge_probs( + pre_fc->skip_probs[i], counts->skip[i]); +} + +static void set_default_lf_deltas(struct loopfilter *lf) { + lf->mode_ref_delta_enabled = 1; + lf->mode_ref_delta_update = 1; + + lf->ref_deltas[INTRA_FRAME] = 1; + lf->ref_deltas[LAST_FRAME] = 0; + lf->ref_deltas[GOLDEN_FRAME] = -1; + lf->ref_deltas[ALTREF_FRAME] = -1; + + lf->mode_deltas[0] = 0; + lf->mode_deltas[1] = 0; +} + +void vp9_setup_past_independence(VP9_COMMON *cm) { + // Reset the segment feature data to the default stats: + // Features disabled, 0, with delta coding (Default state). + struct loopfilter *const lf = &cm->lf; + + int i; + vp9_clearall_segfeatures(&cm->seg); + cm->seg.abs_delta = SEGMENT_DELTADATA; + + if (cm->last_frame_seg_map && !cm->frame_parallel_decode) + memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); + + if (cm->current_frame_seg_map) + memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); + + // Reset the mode ref deltas for loop filter + vp9_zero(lf->last_ref_deltas); + vp9_zero(lf->last_mode_deltas); + set_default_lf_deltas(lf); + + // To force update of the sharpness + lf->last_sharpness_level = -1; + + vp9_default_coef_probs(cm); + init_mode_probs(cm->fc); + vp9_init_mv_probs(cm); + cm->fc->initialized = 1; + + if (cm->frame_type == KEY_FRAME || + cm->error_resilient_mode || cm->reset_frame_context == 3) { + // Reset all frame contexts. + for (i = 0; i < FRAME_CONTEXTS; ++i) + cm->frame_contexts[i] = *cm->fc; + } else if (cm->reset_frame_context == 2) { + // Reset only the frame context specified in the frame header. + cm->frame_contexts[cm->frame_context_idx] = *cm->fc; + } + + // prev_mip will only be allocated in encoder. + if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode) + memset(cm->prev_mip, 0, + cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip)); + + vp9_zero(cm->ref_frame_sign_bias); + + cm->frame_context_idx = 0; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymode.h b/thirdparty/libvpx/vp9/common/vp9_entropymode.h new file mode 100644 index 00000000000..0285be15573 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropymode.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_ENTROPYMODE_H_ +#define VP9_COMMON_VP9_ENTROPYMODE_H_ + +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymv.h" +#include "vp9/common/vp9_filter.h" +#include "vpx_dsp/vpx_filter.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define BLOCK_SIZE_GROUPS 4 + +#define TX_SIZE_CONTEXTS 2 + +#define INTER_OFFSET(mode) ((mode) - NEARESTMV) + +struct VP9Common; + +struct tx_probs { + vpx_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1]; + vpx_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2]; + vpx_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3]; +}; + +struct tx_counts { + unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES]; + unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1]; + unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2]; + unsigned int tx_totals[TX_SIZES]; +}; + +typedef struct frame_contexts { + vpx_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1]; + vpx_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; + vpx_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1]; + vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES]; + vpx_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] + [SWITCHABLE_FILTERS - 1]; + vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; + vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; + vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS]; + vpx_prob single_ref_prob[REF_CONTEXTS][2]; + vpx_prob comp_ref_prob[REF_CONTEXTS]; + struct tx_probs tx_probs; + vpx_prob skip_probs[SKIP_CONTEXTS]; + nmv_context nmvc; + int initialized; +} FRAME_CONTEXT; + +typedef struct FRAME_COUNTS { + unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES]; + unsigned int uv_mode[INTRA_MODES][INTRA_MODES]; + unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES]; + vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES]; + unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES] + [COEF_BANDS][COEFF_CONTEXTS]; + unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS] + [SWITCHABLE_FILTERS]; + unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES]; + unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; + unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; + unsigned int single_ref[REF_CONTEXTS][2][2]; + unsigned int comp_ref[REF_CONTEXTS][2]; + struct tx_counts tx; + unsigned int skip[SKIP_CONTEXTS][2]; + nmv_context_counts mv; +} FRAME_COUNTS; + +extern const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; +extern const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES] + [INTRA_MODES - 1]; +extern const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] + [PARTITION_TYPES - 1]; +extern const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)]; +extern const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)]; +extern const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)]; +extern const vpx_tree_index vp9_switchable_interp_tree + [TREE_SIZE(SWITCHABLE_FILTERS)]; + +void vp9_setup_past_independence(struct VP9Common *cm); + +void vp9_adapt_mode_probs(struct VP9Common *cm); + +void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, + unsigned int (*ct_32x32p)[2]); +void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, + unsigned int (*ct_16x16p)[2]); +void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, + unsigned int (*ct_8x8p)[2]); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ENTROPYMODE_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymv.c b/thirdparty/libvpx/vp9/common/vp9_entropymv.c new file mode 100644 index 00000000000..566ae91cf7e --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropymv.c @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_entropymv.h" + +const vpx_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { + -MV_JOINT_ZERO, 2, + -MV_JOINT_HNZVZ, 4, + -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ +}; + +const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = { + -MV_CLASS_0, 2, + -MV_CLASS_1, 4, + 6, 8, + -MV_CLASS_2, -MV_CLASS_3, + 10, 12, + -MV_CLASS_4, -MV_CLASS_5, + -MV_CLASS_6, 14, + 16, 18, + -MV_CLASS_7, -MV_CLASS_8, + -MV_CLASS_9, -MV_CLASS_10, +}; + +const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { + -0, -1, +}; + +const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { + -0, 2, + -1, 4, + -2, -3 +}; + +static const nmv_context default_nmv_context = { + {32, 64, 96}, + { + { // Vertical component + 128, // sign + {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, // class + {216}, // class0 + {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits + {{128, 128, 64}, {96, 112, 64}}, // class0_fp + {64, 96, 64}, // fp + 160, // class0_hp bit + 128, // hp + }, + { // Horizontal component + 128, // sign + {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, // class + {208}, // class0 + {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits + {{128, 128, 64}, {96, 112, 64}}, // class0_fp + {64, 96, 64}, // fp + 160, // class0_hp bit + 128, // hp + } + }, +}; + +static const uint8_t log_in_base_2[] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10 +}; + +static INLINE int mv_class_base(MV_CLASS_TYPE c) { + return c ? CLASS0_SIZE << (c + 2) : 0; +} + +MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) { + const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ? + MV_CLASS_10 : (MV_CLASS_TYPE)log_in_base_2[z >> 3]; + if (offset) + *offset = z - mv_class_base(c); + return c; +} + +static void inc_mv_component(int v, nmv_component_counts *comp_counts, + int incr, int usehp) { + int s, z, c, o, d, e, f; + assert(v != 0); /* should not be zero */ + s = v < 0; + comp_counts->sign[s] += incr; + z = (s ? -v : v) - 1; /* magnitude - 1 */ + + c = vp9_get_mv_class(z, &o); + comp_counts->classes[c] += incr; + + d = (o >> 3); /* int mv data */ + f = (o >> 1) & 3; /* fractional pel mv data */ + e = (o & 1); /* high precision mv data */ + + if (c == MV_CLASS_0) { + comp_counts->class0[d] += incr; + comp_counts->class0_fp[d][f] += incr; + comp_counts->class0_hp[e] += usehp * incr; + } else { + int i; + int b = c + CLASS0_BITS - 1; // number of bits + for (i = 0; i < b; ++i) + comp_counts->bits[i][((d >> i) & 1)] += incr; + comp_counts->fp[f] += incr; + comp_counts->hp[e] += usehp * incr; + } +} + +void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) { + if (counts != NULL) { + const MV_JOINT_TYPE j = vp9_get_mv_joint(mv); + ++counts->joints[j]; + + if (mv_joint_vertical(j)) { + inc_mv_component(mv->row, &counts->comps[0], 1, 1); + } + + if (mv_joint_horizontal(j)) { + inc_mv_component(mv->col, &counts->comps[1], 1, 1); + } + } +} + +void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) { + int i, j; + + nmv_context *fc = &cm->fc->nmvc; + const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc; + const nmv_context_counts *counts = &cm->counts.mv; + + vpx_tree_merge_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints, + fc->joints); + + for (i = 0; i < 2; ++i) { + nmv_component *comp = &fc->comps[i]; + const nmv_component *pre_comp = &pre_fc->comps[i]; + const nmv_component_counts *c = &counts->comps[i]; + + comp->sign = mode_mv_merge_probs(pre_comp->sign, c->sign); + vpx_tree_merge_probs(vp9_mv_class_tree, pre_comp->classes, c->classes, + comp->classes); + vpx_tree_merge_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0, + comp->class0); + + for (j = 0; j < MV_OFFSET_BITS; ++j) + comp->bits[j] = mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]); + + for (j = 0; j < CLASS0_SIZE; ++j) + vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j], + c->class0_fp[j], comp->class0_fp[j]); + + vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp); + + if (allow_hp) { + comp->class0_hp = mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp); + comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp); + } + } +} + +void vp9_init_mv_probs(VP9_COMMON *cm) { + cm->fc->nmvc = default_nmv_context; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymv.h b/thirdparty/libvpx/vp9/common/vp9_entropymv.h new file mode 100644 index 00000000000..2f05ad44b60 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropymv.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_COMMON_VP9_ENTROPYMV_H_ +#define VP9_COMMON_VP9_ENTROPYMV_H_ + +#include "./vpx_config.h" + +#include "vpx_dsp/prob.h" + +#include "vp9/common/vp9_mv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9Common; + +void vp9_init_mv_probs(struct VP9Common *cm); + +void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp); + +// Integer pel reference mv threshold for use of high-precision 1/8 mv +#define COMPANDED_MVREF_THRESH 8 + +static INLINE int use_mv_hp(const MV *ref) { + return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH && + (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH; +} + +#define MV_UPDATE_PROB 252 + +/* Symbols for coding which components are zero jointly */ +#define MV_JOINTS 4 +typedef enum { + MV_JOINT_ZERO = 0, /* Zero vector */ + MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */ + MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */ + MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ +} MV_JOINT_TYPE; + +static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) { + return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ; +} + +static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) { + return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ; +} + +/* Symbols for coding magnitude class of nonzero components */ +#define MV_CLASSES 11 +typedef enum { + MV_CLASS_0 = 0, /* (0, 2] integer pel */ + MV_CLASS_1 = 1, /* (2, 4] integer pel */ + MV_CLASS_2 = 2, /* (4, 8] integer pel */ + MV_CLASS_3 = 3, /* (8, 16] integer pel */ + MV_CLASS_4 = 4, /* (16, 32] integer pel */ + MV_CLASS_5 = 5, /* (32, 64] integer pel */ + MV_CLASS_6 = 6, /* (64, 128] integer pel */ + MV_CLASS_7 = 7, /* (128, 256] integer pel */ + MV_CLASS_8 = 8, /* (256, 512] integer pel */ + MV_CLASS_9 = 9, /* (512, 1024] integer pel */ + MV_CLASS_10 = 10, /* (1024,2048] integer pel */ +} MV_CLASS_TYPE; + +#define CLASS0_BITS 1 /* bits at integer precision for class 0 */ +#define CLASS0_SIZE (1 << CLASS0_BITS) +#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2) +#define MV_FP_SIZE 4 + +#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2) +#define MV_MAX ((1 << MV_MAX_BITS) - 1) +#define MV_VALS ((MV_MAX << 1) + 1) + +#define MV_IN_USE_BITS 14 +#define MV_UPP ((1 << MV_IN_USE_BITS) - 1) +#define MV_LOW (-(1 << MV_IN_USE_BITS)) + +extern const vpx_tree_index vp9_mv_joint_tree[]; +extern const vpx_tree_index vp9_mv_class_tree[]; +extern const vpx_tree_index vp9_mv_class0_tree[]; +extern const vpx_tree_index vp9_mv_fp_tree[]; + +typedef struct { + vpx_prob sign; + vpx_prob classes[MV_CLASSES - 1]; + vpx_prob class0[CLASS0_SIZE - 1]; + vpx_prob bits[MV_OFFSET_BITS]; + vpx_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1]; + vpx_prob fp[MV_FP_SIZE - 1]; + vpx_prob class0_hp; + vpx_prob hp; +} nmv_component; + +typedef struct { + vpx_prob joints[MV_JOINTS - 1]; + nmv_component comps[2]; +} nmv_context; + +static INLINE MV_JOINT_TYPE vp9_get_mv_joint(const MV *mv) { + if (mv->row == 0) { + return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ; + } else { + return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ; + } +} + +MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset); + +typedef struct { + unsigned int sign[2]; + unsigned int classes[MV_CLASSES]; + unsigned int class0[CLASS0_SIZE]; + unsigned int bits[MV_OFFSET_BITS][2]; + unsigned int class0_fp[CLASS0_SIZE][MV_FP_SIZE]; + unsigned int fp[MV_FP_SIZE]; + unsigned int class0_hp[2]; + unsigned int hp[2]; +} nmv_component_counts; + +typedef struct { + unsigned int joints[MV_JOINTS]; + nmv_component_counts comps[2]; +} nmv_context_counts; + +void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ENTROPYMV_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_enums.h b/thirdparty/libvpx/vp9/common/vp9_enums.h new file mode 100644 index 00000000000..d089f23f970 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_enums.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_ENUMS_H_ +#define VP9_COMMON_VP9_ENUMS_H_ + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MI_SIZE_LOG2 3 +#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6 + +#define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit +#define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block + +#define MI_MASK (MI_BLOCK_SIZE - 1) + +// Bitstream profiles indicated by 2-3 bits in the uncompressed header. +// 00: Profile 0. 8-bit 4:2:0 only. +// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0. +// 01: Profile 2. 10-bit and 12-bit color only, with 4:2:0 sampling. +// 110: Profile 3. 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0 +// sampling. +// 111: Undefined profile. +typedef enum BITSTREAM_PROFILE { + PROFILE_0, + PROFILE_1, + PROFILE_2, + PROFILE_3, + MAX_PROFILES +} BITSTREAM_PROFILE; + +#define BLOCK_4X4 0 +#define BLOCK_4X8 1 +#define BLOCK_8X4 2 +#define BLOCK_8X8 3 +#define BLOCK_8X16 4 +#define BLOCK_16X8 5 +#define BLOCK_16X16 6 +#define BLOCK_16X32 7 +#define BLOCK_32X16 8 +#define BLOCK_32X32 9 +#define BLOCK_32X64 10 +#define BLOCK_64X32 11 +#define BLOCK_64X64 12 +#define BLOCK_SIZES 13 +#define BLOCK_INVALID BLOCK_SIZES +typedef uint8_t BLOCK_SIZE; + +typedef enum PARTITION_TYPE { + PARTITION_NONE, + PARTITION_HORZ, + PARTITION_VERT, + PARTITION_SPLIT, + PARTITION_TYPES, + PARTITION_INVALID = PARTITION_TYPES +} PARTITION_TYPE; + +typedef char PARTITION_CONTEXT; +#define PARTITION_PLOFFSET 4 // number of probability models per block size +#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) + +// block transform size +typedef uint8_t TX_SIZE; +#define TX_4X4 ((TX_SIZE)0) // 4x4 transform +#define TX_8X8 ((TX_SIZE)1) // 8x8 transform +#define TX_16X16 ((TX_SIZE)2) // 16x16 transform +#define TX_32X32 ((TX_SIZE)3) // 32x32 transform +#define TX_SIZES ((TX_SIZE)4) + +// frame transform mode +typedef enum { + ONLY_4X4 = 0, // only 4x4 transform used + ALLOW_8X8 = 1, // allow block transform size up to 8x8 + ALLOW_16X16 = 2, // allow block transform size up to 16x16 + ALLOW_32X32 = 3, // allow block transform size up to 32x32 + TX_MODE_SELECT = 4, // transform specified for each block + TX_MODES = 5, +} TX_MODE; + +typedef enum { + DCT_DCT = 0, // DCT in both horizontal and vertical + ADST_DCT = 1, // ADST in vertical, DCT in horizontal + DCT_ADST = 2, // DCT in vertical, ADST in horizontal + ADST_ADST = 3, // ADST in both directions + TX_TYPES = 4 +} TX_TYPE; + +typedef enum { + VP9_LAST_FLAG = 1 << 0, + VP9_GOLD_FLAG = 1 << 1, + VP9_ALT_FLAG = 1 << 2, +} VP9_REFFRAME; + +typedef enum { + PLANE_TYPE_Y = 0, + PLANE_TYPE_UV = 1, + PLANE_TYPES +} PLANE_TYPE; + +#define DC_PRED 0 // Average of above and left pixels +#define V_PRED 1 // Vertical +#define H_PRED 2 // Horizontal +#define D45_PRED 3 // Directional 45 deg = round(arctan(1/1) * 180/pi) +#define D135_PRED 4 // Directional 135 deg = 180 - 45 +#define D117_PRED 5 // Directional 117 deg = 180 - 63 +#define D153_PRED 6 // Directional 153 deg = 180 - 27 +#define D207_PRED 7 // Directional 207 deg = 180 + 27 +#define D63_PRED 8 // Directional 63 deg = round(arctan(2/1) * 180/pi) +#define TM_PRED 9 // True-motion +#define NEARESTMV 10 +#define NEARMV 11 +#define ZEROMV 12 +#define NEWMV 13 +#define MB_MODE_COUNT 14 +typedef uint8_t PREDICTION_MODE; + +#define INTRA_MODES (TM_PRED + 1) + +#define INTER_MODES (1 + NEWMV - NEARESTMV) + +#define SKIP_CONTEXTS 3 +#define INTER_MODE_CONTEXTS 7 + +/* Segment Feature Masks */ +#define MAX_MV_REF_CANDIDATES 2 + +#define INTRA_INTER_CONTEXTS 4 +#define COMP_INTER_CONTEXTS 5 +#define REF_CONTEXTS 5 + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ENUMS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_filter.c b/thirdparty/libvpx/vp9/common/vp9_filter.c new file mode 100644 index 00000000000..4b2198fc40a --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_filter.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vp9/common/vp9_filter.h" + +DECLARE_ALIGNED(256, static const InterpKernel, + bilinear_filters[SUBPEL_SHIFTS]) = { + { 0, 0, 0, 128, 0, 0, 0, 0 }, + { 0, 0, 0, 120, 8, 0, 0, 0 }, + { 0, 0, 0, 112, 16, 0, 0, 0 }, + { 0, 0, 0, 104, 24, 0, 0, 0 }, + { 0, 0, 0, 96, 32, 0, 0, 0 }, + { 0, 0, 0, 88, 40, 0, 0, 0 }, + { 0, 0, 0, 80, 48, 0, 0, 0 }, + { 0, 0, 0, 72, 56, 0, 0, 0 }, + { 0, 0, 0, 64, 64, 0, 0, 0 }, + { 0, 0, 0, 56, 72, 0, 0, 0 }, + { 0, 0, 0, 48, 80, 0, 0, 0 }, + { 0, 0, 0, 40, 88, 0, 0, 0 }, + { 0, 0, 0, 32, 96, 0, 0, 0 }, + { 0, 0, 0, 24, 104, 0, 0, 0 }, + { 0, 0, 0, 16, 112, 0, 0, 0 }, + { 0, 0, 0, 8, 120, 0, 0, 0 } +}; + +// Lagrangian interpolation filter +DECLARE_ALIGNED(256, static const InterpKernel, + sub_pel_filters_8[SUBPEL_SHIFTS]) = { + { 0, 0, 0, 128, 0, 0, 0, 0}, + { 0, 1, -5, 126, 8, -3, 1, 0}, + { -1, 3, -10, 122, 18, -6, 2, 0}, + { -1, 4, -13, 118, 27, -9, 3, -1}, + { -1, 4, -16, 112, 37, -11, 4, -1}, + { -1, 5, -18, 105, 48, -14, 4, -1}, + { -1, 5, -19, 97, 58, -16, 5, -1}, + { -1, 6, -19, 88, 68, -18, 5, -1}, + { -1, 6, -19, 78, 78, -19, 6, -1}, + { -1, 5, -18, 68, 88, -19, 6, -1}, + { -1, 5, -16, 58, 97, -19, 5, -1}, + { -1, 4, -14, 48, 105, -18, 5, -1}, + { -1, 4, -11, 37, 112, -16, 4, -1}, + { -1, 3, -9, 27, 118, -13, 4, -1}, + { 0, 2, -6, 18, 122, -10, 3, -1}, + { 0, 1, -3, 8, 126, -5, 1, 0} +}; + +// DCT based filter +DECLARE_ALIGNED(256, static const InterpKernel, + sub_pel_filters_8s[SUBPEL_SHIFTS]) = { + {0, 0, 0, 128, 0, 0, 0, 0}, + {-1, 3, -7, 127, 8, -3, 1, 0}, + {-2, 5, -13, 125, 17, -6, 3, -1}, + {-3, 7, -17, 121, 27, -10, 5, -2}, + {-4, 9, -20, 115, 37, -13, 6, -2}, + {-4, 10, -23, 108, 48, -16, 8, -3}, + {-4, 10, -24, 100, 59, -19, 9, -3}, + {-4, 11, -24, 90, 70, -21, 10, -4}, + {-4, 11, -23, 80, 80, -23, 11, -4}, + {-4, 10, -21, 70, 90, -24, 11, -4}, + {-3, 9, -19, 59, 100, -24, 10, -4}, + {-3, 8, -16, 48, 108, -23, 10, -4}, + {-2, 6, -13, 37, 115, -20, 9, -4}, + {-2, 5, -10, 27, 121, -17, 7, -3}, + {-1, 3, -6, 17, 125, -13, 5, -2}, + {0, 1, -3, 8, 127, -7, 3, -1} +}; + +// freqmultiplier = 0.5 +DECLARE_ALIGNED(256, static const InterpKernel, + sub_pel_filters_8lp[SUBPEL_SHIFTS]) = { + { 0, 0, 0, 128, 0, 0, 0, 0}, + {-3, -1, 32, 64, 38, 1, -3, 0}, + {-2, -2, 29, 63, 41, 2, -3, 0}, + {-2, -2, 26, 63, 43, 4, -4, 0}, + {-2, -3, 24, 62, 46, 5, -4, 0}, + {-2, -3, 21, 60, 49, 7, -4, 0}, + {-1, -4, 18, 59, 51, 9, -4, 0}, + {-1, -4, 16, 57, 53, 12, -4, -1}, + {-1, -4, 14, 55, 55, 14, -4, -1}, + {-1, -4, 12, 53, 57, 16, -4, -1}, + { 0, -4, 9, 51, 59, 18, -4, -1}, + { 0, -4, 7, 49, 60, 21, -3, -2}, + { 0, -4, 5, 46, 62, 24, -3, -2}, + { 0, -4, 4, 43, 63, 26, -2, -2}, + { 0, -3, 2, 41, 63, 29, -2, -2}, + { 0, -3, 1, 38, 64, 32, -1, -3} +}; + + +const InterpKernel *vp9_filter_kernels[4] = { + sub_pel_filters_8, + sub_pel_filters_8lp, + sub_pel_filters_8s, + bilinear_filters +}; diff --git a/thirdparty/libvpx/vp9/common/vp9_filter.h b/thirdparty/libvpx/vp9/common/vp9_filter.h new file mode 100644 index 00000000000..efa24bc67b8 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_filter.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_FILTER_H_ +#define VP9_COMMON_VP9_FILTER_H_ + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_filter.h" +#include "vpx_ports/mem.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +#define EIGHTTAP 0 +#define EIGHTTAP_SMOOTH 1 +#define EIGHTTAP_SHARP 2 +#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */ +#define BILINEAR 3 +// The codec can operate in four possible inter prediction filter mode: +// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three. +#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) +#define SWITCHABLE 4 /* should be the last one */ + +typedef uint8_t INTERP_FILTER; + +extern const InterpKernel *vp9_filter_kernels[4]; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_FILTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c new file mode 100644 index 00000000000..0f41d66985f --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vp9/common/vp9_frame_buffers.h" +#include "vpx_mem/vpx_mem.h" + +int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) { + assert(list != NULL); + vp9_free_internal_frame_buffers(list); + + list->num_internal_frame_buffers = + VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; + list->int_fb = + (InternalFrameBuffer *)vpx_calloc(list->num_internal_frame_buffers, + sizeof(*list->int_fb)); + return (list->int_fb == NULL); +} + +void vp9_free_internal_frame_buffers(InternalFrameBufferList *list) { + int i; + + assert(list != NULL); + + for (i = 0; i < list->num_internal_frame_buffers; ++i) { + vpx_free(list->int_fb[i].data); + list->int_fb[i].data = NULL; + } + vpx_free(list->int_fb); + list->int_fb = NULL; +} + +int vp9_get_frame_buffer(void *cb_priv, size_t min_size, + vpx_codec_frame_buffer_t *fb) { + int i; + InternalFrameBufferList *const int_fb_list = + (InternalFrameBufferList *)cb_priv; + if (int_fb_list == NULL) + return -1; + + // Find a free frame buffer. + for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) { + if (!int_fb_list->int_fb[i].in_use) + break; + } + + if (i == int_fb_list->num_internal_frame_buffers) + return -1; + + if (int_fb_list->int_fb[i].size < min_size) { + int_fb_list->int_fb[i].data = + (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size); + if (!int_fb_list->int_fb[i].data) + return -1; + + // This memset is needed for fixing valgrind error from C loop filter + // due to access uninitialized memory in frame border. It could be + // removed if border is totally removed. + memset(int_fb_list->int_fb[i].data, 0, min_size); + int_fb_list->int_fb[i].size = min_size; + } + + fb->data = int_fb_list->int_fb[i].data; + fb->size = int_fb_list->int_fb[i].size; + int_fb_list->int_fb[i].in_use = 1; + + // Set the frame buffer's private data to point at the internal frame buffer. + fb->priv = &int_fb_list->int_fb[i]; + return 0; +} + +int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) { + InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv; + (void)cb_priv; + if (int_fb) + int_fb->in_use = 0; + return 0; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h new file mode 100644 index 00000000000..e2cfe61b662 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_FRAME_BUFFERS_H_ +#define VP9_COMMON_VP9_FRAME_BUFFERS_H_ + +#include "vpx/vpx_frame_buffer.h" +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct InternalFrameBuffer { + uint8_t *data; + size_t size; + int in_use; +} InternalFrameBuffer; + +typedef struct InternalFrameBufferList { + int num_internal_frame_buffers; + InternalFrameBuffer *int_fb; +} InternalFrameBufferList; + +// Initializes |list|. Returns 0 on success. +int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list); + +// Free any data allocated to the frame buffers. +void vp9_free_internal_frame_buffers(InternalFrameBufferList *list); + +// Callback used by libvpx to request an external frame buffer. |cb_priv| +// Callback private data, which points to an InternalFrameBufferList. +// |min_size| is the minimum size in bytes needed to decode the next frame. +// |fb| pointer to the frame buffer. +int vp9_get_frame_buffer(void *cb_priv, size_t min_size, + vpx_codec_frame_buffer_t *fb); + +// Callback used by libvpx when there are no references to the frame buffer. +// |cb_priv| is not used. |fb| pointer to the frame buffer. +int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_FRAME_BUFFERS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_idct.c b/thirdparty/libvpx/vp9/common/vp9_idct.c new file mode 100644 index 00000000000..1b420143bb3 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_idct.c @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_idct.h" +#include "vpx_dsp/inv_txfm.h" +#include "vpx_ports/mem.h" + +void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + const transform_2d IHT_4[] = { + { idct4_c, idct4_c }, // DCT_DCT = 0 + { iadst4_c, idct4_c }, // ADST_DCT = 1 + { idct4_c, iadst4_c }, // DCT_ADST = 2 + { iadst4_c, iadst4_c } // ADST_ADST = 3 + }; + + int i, j; + tran_low_t out[4 * 4]; + tran_low_t *outptr = out; + tran_low_t temp_in[4], temp_out[4]; + + // inverse transform row vectors + for (i = 0; i < 4; ++i) { + IHT_4[tx_type].rows(input, outptr); + input += 4; + outptr += 4; + } + + // inverse transform column vectors + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = out[j * 4 + i]; + IHT_4[tx_type].cols(temp_in, temp_out); + for (j = 0; j < 4; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 4)); + } + } +} + +static const transform_2d IHT_8[] = { + { idct8_c, idct8_c }, // DCT_DCT = 0 + { iadst8_c, idct8_c }, // ADST_DCT = 1 + { idct8_c, iadst8_c }, // DCT_ADST = 2 + { iadst8_c, iadst8_c } // ADST_ADST = 3 +}; + +void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + int i, j; + tran_low_t out[8 * 8]; + tran_low_t *outptr = out; + tran_low_t temp_in[8], temp_out[8]; + const transform_2d ht = IHT_8[tx_type]; + + // inverse transform row vectors + for (i = 0; i < 8; ++i) { + ht.rows(input, outptr); + input += 8; + outptr += 8; + } + + // inverse transform column vectors + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j * 8 + i]; + ht.cols(temp_in, temp_out); + for (j = 0; j < 8; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 5)); + } + } +} + +static const transform_2d IHT_16[] = { + { idct16_c, idct16_c }, // DCT_DCT = 0 + { iadst16_c, idct16_c }, // ADST_DCT = 1 + { idct16_c, iadst16_c }, // DCT_ADST = 2 + { iadst16_c, iadst16_c } // ADST_ADST = 3 +}; + +void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + int i, j; + tran_low_t out[16 * 16]; + tran_low_t *outptr = out; + tran_low_t temp_in[16], temp_out[16]; + const transform_2d ht = IHT_16[tx_type]; + + // Rows + for (i = 0; i < 16; ++i) { + ht.rows(input, outptr); + input += 16; + outptr += 16; + } + + // Columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j * 16 + i]; + ht.cols(temp_in, temp_out); + for (j = 0; j < 16; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 6)); + } + } +} + +// idct +void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob) { + if (eob > 1) + vpx_idct4x4_16_add(input, dest, stride); + else + vpx_idct4x4_1_add(input, dest, stride); +} + + +void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob) { + if (eob > 1) + vpx_iwht4x4_16_add(input, dest, stride); + else + vpx_iwht4x4_1_add(input, dest, stride); +} + +void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob) { + // If dc is 1, then input[0] is the reconstructed value, do not need + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to decide what to do. + // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. + // Combine that with code here. + if (eob == 1) + // DC only DCT coefficient + vpx_idct8x8_1_add(input, dest, stride); + else if (eob <= 12) + vpx_idct8x8_12_add(input, dest, stride); + else + vpx_idct8x8_64_add(input, dest, stride); +} + +void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob) { + /* The calculation can be simplified if there are not many non-zero dct + * coefficients. Use eobs to separate different cases. */ + if (eob == 1) + /* DC only DCT coefficient. */ + vpx_idct16x16_1_add(input, dest, stride); + else if (eob <= 10) + vpx_idct16x16_10_add(input, dest, stride); + else + vpx_idct16x16_256_add(input, dest, stride); +} + +void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob) { + if (eob == 1) + vpx_idct32x32_1_add(input, dest, stride); + else if (eob <= 34) + // non-zero coeff only in upper-left 8x8 + vpx_idct32x32_34_add(input, dest, stride); + else if (eob <= 135) + // non-zero coeff only in upper-left 16x16 + vpx_idct32x32_135_add(input, dest, stride); + else + vpx_idct32x32_1024_add(input, dest, stride); +} + +// iht +void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob) { + if (tx_type == DCT_DCT) + vp9_idct4x4_add(input, dest, stride, eob); + else + vp9_iht4x4_16_add(input, dest, stride, tx_type); +} + +void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob) { + if (tx_type == DCT_DCT) { + vp9_idct8x8_add(input, dest, stride, eob); + } else { + vp9_iht8x8_64_add(input, dest, stride, tx_type); + } +} + +void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob) { + if (tx_type == DCT_DCT) { + vp9_idct16x16_add(input, dest, stride, eob); + } else { + vp9_iht16x16_256_add(input, dest, stride, tx_type); + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int tx_type, int bd) { + const highbd_transform_2d IHT_4[] = { + { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 + { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 + { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 + }; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + int i, j; + tran_low_t out[4 * 4]; + tran_low_t *outptr = out; + tran_low_t temp_in[4], temp_out[4]; + + // Inverse transform row vectors. + for (i = 0; i < 4; ++i) { + IHT_4[tx_type].rows(input, outptr, bd); + input += 4; + outptr += 4; + } + + // Inverse transform column vectors. + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = out[j * 4 + i]; + IHT_4[tx_type].cols(temp_in, temp_out, bd); + for (j = 0; j < 4; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); + } + } +} + +static const highbd_transform_2d HIGH_IHT_8[] = { + { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 + { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 + { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3 +}; + +void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int tx_type, int bd) { + int i, j; + tran_low_t out[8 * 8]; + tran_low_t *outptr = out; + tran_low_t temp_in[8], temp_out[8]; + const highbd_transform_2d ht = HIGH_IHT_8[tx_type]; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + // Inverse transform row vectors. + for (i = 0; i < 8; ++i) { + ht.rows(input, outptr, bd); + input += 8; + outptr += 8; + } + + // Inverse transform column vectors. + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j * 8 + i]; + ht.cols(temp_in, temp_out, bd); + for (j = 0; j < 8; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); + } + } +} + +static const highbd_transform_2d HIGH_IHT_16[] = { + { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 + { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 + { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3 +}; + +void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int tx_type, int bd) { + int i, j; + tran_low_t out[16 * 16]; + tran_low_t *outptr = out; + tran_low_t temp_in[16], temp_out[16]; + const highbd_transform_2d ht = HIGH_IHT_16[tx_type]; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + // Rows + for (i = 0; i < 16; ++i) { + ht.rows(input, outptr, bd); + input += 16; + outptr += 16; + } + + // Columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j * 16 + i]; + ht.cols(temp_in, temp_out, bd); + for (j = 0; j < 16; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + } + } +} + +// idct +void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd) { + if (eob > 1) + vpx_highbd_idct4x4_16_add(input, dest, stride, bd); + else + vpx_highbd_idct4x4_1_add(input, dest, stride, bd); +} + + +void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd) { + if (eob > 1) + vpx_highbd_iwht4x4_16_add(input, dest, stride, bd); + else + vpx_highbd_iwht4x4_1_add(input, dest, stride, bd); +} + +void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd) { + // If dc is 1, then input[0] is the reconstructed value, do not need + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to decide what to do. + // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. + // Combine that with code here. + // DC only DCT coefficient + if (eob == 1) { + vpx_highbd_idct8x8_1_add(input, dest, stride, bd); + } else if (eob <= 10) { + vpx_highbd_idct8x8_10_add(input, dest, stride, bd); + } else { + vpx_highbd_idct8x8_64_add(input, dest, stride, bd); + } +} + +void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, + int stride, int eob, int bd) { + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to separate different cases. + // DC only DCT coefficient. + if (eob == 1) { + vpx_highbd_idct16x16_1_add(input, dest, stride, bd); + } else if (eob <= 10) { + vpx_highbd_idct16x16_10_add(input, dest, stride, bd); + } else { + vpx_highbd_idct16x16_256_add(input, dest, stride, bd); + } +} + +void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, + int stride, int eob, int bd) { + // Non-zero coeff only in upper-left 8x8 + if (eob == 1) { + vpx_highbd_idct32x32_1_add(input, dest, stride, bd); + } else if (eob <= 34) { + vpx_highbd_idct32x32_34_add(input, dest, stride, bd); + } else { + vpx_highbd_idct32x32_1024_add(input, dest, stride, bd); + } +} + +// iht +void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd) { + if (tx_type == DCT_DCT) + vp9_highbd_idct4x4_add(input, dest, stride, eob, bd); + else + vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); +} + +void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd) { + if (tx_type == DCT_DCT) { + vp9_highbd_idct8x8_add(input, dest, stride, eob, bd); + } else { + vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); + } +} + +void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd) { + if (tx_type == DCT_DCT) { + vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); + } else { + vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vp9/common/vp9_idct.h b/thirdparty/libvpx/vp9/common/vp9_idct.h new file mode 100644 index 00000000000..b5a3fbf3620 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_idct.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_IDCT_H_ +#define VP9_COMMON_VP9_IDCT_H_ + +#include + +#include "./vpx_config.h" +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_enums.h" +#include "vpx_dsp/inv_txfm.h" +#include "vpx_dsp/txfm_common.h" +#include "vpx_ports/mem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*transform_1d)(const tran_low_t*, tran_low_t*); + +typedef struct { + transform_1d cols, rows; // vertical and horizontal +} transform_2d; + +#if CONFIG_VP9_HIGHBITDEPTH +typedef void (*highbd_transform_1d)(const tran_low_t*, tran_low_t*, int bd); + +typedef struct { + highbd_transform_1d cols, rows; // vertical and horizontal +} highbd_transform_2d; +#endif // CONFIG_VP9_HIGHBITDEPTH + +void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob); +void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob); +void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob); +void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob); +void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob); + +void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob); +void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob); +void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob); + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd); +void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd); +void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd); +void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, + int stride, int eob, int bd); +void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, + int stride, int eob, int bd); +void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd); +void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd); +void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd); +#endif // CONFIG_VP9_HIGHBITDEPTH +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_IDCT_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_loopfilter.c b/thirdparty/libvpx/vp9/common/vp9_loopfilter.c new file mode 100644 index 00000000000..183dec4e714 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_loopfilter.c @@ -0,0 +1,1697 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vp9/common/vp9_loopfilter.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_reconinter.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" + +#include "vp9/common/vp9_seg_common.h" + +// 64 bit masks for left transform size. Each 1 represents a position where +// we should apply a loop filter across the left border of an 8x8 block +// boundary. +// +// In the case of TX_16X16-> ( in low order byte first we end up with +// a mask that looks like this +// +// 10101010 +// 10101010 +// 10101010 +// 10101010 +// 10101010 +// 10101010 +// 10101010 +// 10101010 +// +// A loopfilter should be applied to every other 8x8 horizontally. +static const uint64_t left_64x64_txform_mask[TX_SIZES]= { + 0xffffffffffffffffULL, // TX_4X4 + 0xffffffffffffffffULL, // TX_8x8 + 0x5555555555555555ULL, // TX_16x16 + 0x1111111111111111ULL, // TX_32x32 +}; + +// 64 bit masks for above transform size. Each 1 represents a position where +// we should apply a loop filter across the top border of an 8x8 block +// boundary. +// +// In the case of TX_32x32 -> ( in low order byte first we end up with +// a mask that looks like this +// +// 11111111 +// 00000000 +// 00000000 +// 00000000 +// 11111111 +// 00000000 +// 00000000 +// 00000000 +// +// A loopfilter should be applied to every other 4 the row vertically. +static const uint64_t above_64x64_txform_mask[TX_SIZES]= { + 0xffffffffffffffffULL, // TX_4X4 + 0xffffffffffffffffULL, // TX_8x8 + 0x00ff00ff00ff00ffULL, // TX_16x16 + 0x000000ff000000ffULL, // TX_32x32 +}; + +// 64 bit masks for prediction sizes (left). Each 1 represents a position +// where left border of an 8x8 block. These are aligned to the right most +// appropriate bit, and then shifted into place. +// +// In the case of TX_16x32 -> ( low order byte first ) we end up with +// a mask that looks like this : +// +// 10000000 +// 10000000 +// 10000000 +// 10000000 +// 00000000 +// 00000000 +// 00000000 +// 00000000 +static const uint64_t left_prediction_mask[BLOCK_SIZES] = { + 0x0000000000000001ULL, // BLOCK_4X4, + 0x0000000000000001ULL, // BLOCK_4X8, + 0x0000000000000001ULL, // BLOCK_8X4, + 0x0000000000000001ULL, // BLOCK_8X8, + 0x0000000000000101ULL, // BLOCK_8X16, + 0x0000000000000001ULL, // BLOCK_16X8, + 0x0000000000000101ULL, // BLOCK_16X16, + 0x0000000001010101ULL, // BLOCK_16X32, + 0x0000000000000101ULL, // BLOCK_32X16, + 0x0000000001010101ULL, // BLOCK_32X32, + 0x0101010101010101ULL, // BLOCK_32X64, + 0x0000000001010101ULL, // BLOCK_64X32, + 0x0101010101010101ULL, // BLOCK_64X64 +}; + +// 64 bit mask to shift and set for each prediction size. +static const uint64_t above_prediction_mask[BLOCK_SIZES] = { + 0x0000000000000001ULL, // BLOCK_4X4 + 0x0000000000000001ULL, // BLOCK_4X8 + 0x0000000000000001ULL, // BLOCK_8X4 + 0x0000000000000001ULL, // BLOCK_8X8 + 0x0000000000000001ULL, // BLOCK_8X16, + 0x0000000000000003ULL, // BLOCK_16X8 + 0x0000000000000003ULL, // BLOCK_16X16 + 0x0000000000000003ULL, // BLOCK_16X32, + 0x000000000000000fULL, // BLOCK_32X16, + 0x000000000000000fULL, // BLOCK_32X32, + 0x000000000000000fULL, // BLOCK_32X64, + 0x00000000000000ffULL, // BLOCK_64X32, + 0x00000000000000ffULL, // BLOCK_64X64 +}; +// 64 bit mask to shift and set for each prediction size. A bit is set for +// each 8x8 block that would be in the left most block of the given block +// size in the 64x64 block. +static const uint64_t size_mask[BLOCK_SIZES] = { + 0x0000000000000001ULL, // BLOCK_4X4 + 0x0000000000000001ULL, // BLOCK_4X8 + 0x0000000000000001ULL, // BLOCK_8X4 + 0x0000000000000001ULL, // BLOCK_8X8 + 0x0000000000000101ULL, // BLOCK_8X16, + 0x0000000000000003ULL, // BLOCK_16X8 + 0x0000000000000303ULL, // BLOCK_16X16 + 0x0000000003030303ULL, // BLOCK_16X32, + 0x0000000000000f0fULL, // BLOCK_32X16, + 0x000000000f0f0f0fULL, // BLOCK_32X32, + 0x0f0f0f0f0f0f0f0fULL, // BLOCK_32X64, + 0x00000000ffffffffULL, // BLOCK_64X32, + 0xffffffffffffffffULL, // BLOCK_64X64 +}; + +// These are used for masking the left and above borders. +static const uint64_t left_border = 0x1111111111111111ULL; +static const uint64_t above_border = 0x000000ff000000ffULL; + +// 16 bit masks for uv transform sizes. +static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= { + 0xffff, // TX_4X4 + 0xffff, // TX_8x8 + 0x5555, // TX_16x16 + 0x1111, // TX_32x32 +}; + +static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= { + 0xffff, // TX_4X4 + 0xffff, // TX_8x8 + 0x0f0f, // TX_16x16 + 0x000f, // TX_32x32 +}; + +// 16 bit left mask to shift and set for each uv prediction size. +static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = { + 0x0001, // BLOCK_4X4, + 0x0001, // BLOCK_4X8, + 0x0001, // BLOCK_8X4, + 0x0001, // BLOCK_8X8, + 0x0001, // BLOCK_8X16, + 0x0001, // BLOCK_16X8, + 0x0001, // BLOCK_16X16, + 0x0011, // BLOCK_16X32, + 0x0001, // BLOCK_32X16, + 0x0011, // BLOCK_32X32, + 0x1111, // BLOCK_32X64 + 0x0011, // BLOCK_64X32, + 0x1111, // BLOCK_64X64 +}; +// 16 bit above mask to shift and set for uv each prediction size. +static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = { + 0x0001, // BLOCK_4X4 + 0x0001, // BLOCK_4X8 + 0x0001, // BLOCK_8X4 + 0x0001, // BLOCK_8X8 + 0x0001, // BLOCK_8X16, + 0x0001, // BLOCK_16X8 + 0x0001, // BLOCK_16X16 + 0x0001, // BLOCK_16X32, + 0x0003, // BLOCK_32X16, + 0x0003, // BLOCK_32X32, + 0x0003, // BLOCK_32X64, + 0x000f, // BLOCK_64X32, + 0x000f, // BLOCK_64X64 +}; + +// 64 bit mask to shift and set for each uv prediction size +static const uint16_t size_mask_uv[BLOCK_SIZES] = { + 0x0001, // BLOCK_4X4 + 0x0001, // BLOCK_4X8 + 0x0001, // BLOCK_8X4 + 0x0001, // BLOCK_8X8 + 0x0001, // BLOCK_8X16, + 0x0001, // BLOCK_16X8 + 0x0001, // BLOCK_16X16 + 0x0011, // BLOCK_16X32, + 0x0003, // BLOCK_32X16, + 0x0033, // BLOCK_32X32, + 0x3333, // BLOCK_32X64, + 0x00ff, // BLOCK_64X32, + 0xffff, // BLOCK_64X64 +}; +static const uint16_t left_border_uv = 0x1111; +static const uint16_t above_border_uv = 0x000f; + +static const int mode_lf_lut[MB_MODE_COUNT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES + 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) +}; + +static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { + int lvl; + + // For each possible value for the loop filter fill out limits + for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { + // Set loop filter parameters that control sharpness. + int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); + + if (sharpness_lvl > 0) { + if (block_inside_limit > (9 - sharpness_lvl)) + block_inside_limit = (9 - sharpness_lvl); + } + + if (block_inside_limit < 1) + block_inside_limit = 1; + + memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); + memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), + SIMD_WIDTH); + } +} + +static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, + const MODE_INFO *mi) { + return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]] + [mode_lf_lut[mi->mode]]; +} + +void vp9_loop_filter_init(VP9_COMMON *cm) { + loop_filter_info_n *lfi = &cm->lf_info; + struct loopfilter *lf = &cm->lf; + int lvl; + + // init limits for given sharpness + update_sharpness(lfi, lf->sharpness_level); + lf->last_sharpness_level = lf->sharpness_level; + + // init hev threshold const vectors + for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) + memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); +} + +void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { + int seg_id; + // n_shift is the multiplier for lf_deltas + // the multiplier is 1 for when filter_lvl is between 0 and 31; + // 2 when filter_lvl is between 32 and 63 + const int scale = 1 << (default_filt_lvl >> 5); + loop_filter_info_n *const lfi = &cm->lf_info; + struct loopfilter *const lf = &cm->lf; + const struct segmentation *const seg = &cm->seg; + + // update limits if sharpness has changed + if (lf->last_sharpness_level != lf->sharpness_level) { + update_sharpness(lfi, lf->sharpness_level); + lf->last_sharpness_level = lf->sharpness_level; + } + + for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { + int lvl_seg = default_filt_lvl; + if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { + const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF); + lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ? + data : default_filt_lvl + data, + 0, MAX_LOOP_FILTER); + } + + if (!lf->mode_ref_delta_enabled) { + // we could get rid of this if we assume that deltas are set to + // zero when not in use; encoder always uses deltas + memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); + } else { + int ref, mode; + const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; + lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); + + for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { + for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { + const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale + + lf->mode_deltas[mode] * scale; + lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); + } + } + } + } +} + +static void filter_selectively_vert_row2(int subsampling_factor, + uint8_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl) { + const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; + const int lfl_forward = subsampling_factor ? 4 : 8; + const unsigned int dual_one = 1 | (1 << lfl_forward); + unsigned int mask; + uint8_t *ss[2]; + ss[0] = s; + + for (mask = + (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; + mask; mask = (mask & ~dual_one) >> 1) { + if (mask & dual_one) { + const loop_filter_thresh *lfis[2]; + lfis[0] = lfthr + *lfl; + lfis[1] = lfthr + *(lfl + lfl_forward); + ss[1] = ss[0] + 8 * pitch; + + if (mask_16x16 & dual_one) { + if ((mask_16x16 & dual_one) == dual_one) { + vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; + vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr); + } + } + + if (mask_8x8 & dual_one) { + if ((mask_8x8 & dual_one) == dual_one) { + vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr, lfis[1]->mblim, + lfis[1]->lim, lfis[1]->hev_thr); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; + vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } + + if (mask_4x4 & dual_one) { + if ((mask_4x4 & dual_one) == dual_one) { + vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr, lfis[1]->mblim, + lfis[1]->lim, lfis[1]->hev_thr); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; + vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } + + if (mask_4x4_int & dual_one) { + if ((mask_4x4_int & dual_one) == dual_one) { + vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; + vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr); + } + } + } + + ss[0] += 8; + lfl += 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_filter_selectively_vert_row2(int subsampling_factor, + uint16_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl, int bd) { + const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; + const int lfl_forward = subsampling_factor ? 4 : 8; + const unsigned int dual_one = 1 | (1 << lfl_forward); + unsigned int mask; + uint16_t *ss[2]; + ss[0] = s; + + for (mask = + (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; + mask; mask = (mask & ~dual_one) >> 1) { + if (mask & dual_one) { + const loop_filter_thresh *lfis[2]; + lfis[0] = lfthr + *lfl; + lfis[1] = lfthr + *(lfl + lfl_forward); + ss[1] = ss[0] + 8 * pitch; + + if (mask_16x16 & dual_one) { + if ((mask_16x16 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, bd); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; + vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } + } + + if (mask_8x8 & dual_one) { + if ((mask_8x8 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; + vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } + } + + if (mask_4x4 & dual_one) { + if ((mask_4x4 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; + vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } + } + + if (mask_4x4_int & dual_one) { + if ((mask_4x4_int & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; + vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, + lfi->mblim, lfi->lim, lfi->hev_thr, bd); + } + } + } + + ss[0] += 8; + lfl += 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +static void filter_selectively_horiz(uint8_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl) { + unsigned int mask; + int count; + + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; + mask; mask >>= count) { + count = 1; + if (mask & 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + + if (mask_16x16 & 1) { + if ((mask_16x16 & 3) == 3) { + vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + count = 2; + } else { + vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } else if (mask_8x8 & 1) { + if ((mask_8x8 & 3) == 3) { + // Next block's thresholds. + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); + + vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr); + + if ((mask_4x4_int & 3) == 3) { + vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, lfin->mblim, + lfin->lim, lfin->hev_thr); + } else { + if (mask_4x4_int & 1) + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + else if (mask_4x4_int & 2) + vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr); + } + count = 2; + } else { + vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + + if (mask_4x4_int & 1) + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } else if (mask_4x4 & 1) { + if ((mask_4x4 & 3) == 3) { + // Next block's thresholds. + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); + + vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr); + if ((mask_4x4_int & 3) == 3) { + vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, lfin->mblim, + lfin->lim, lfin->hev_thr); + } else { + if (mask_4x4_int & 1) + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + else if (mask_4x4_int & 2) + vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr); + } + count = 2; + } else { + vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + + if (mask_4x4_int & 1) + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } else { + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } + s += 8 * count; + lfl += count; + mask_16x16 >>= count; + mask_8x8 >>= count; + mask_4x4 >>= count; + mask_4x4_int >>= count; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl, int bd) { + unsigned int mask; + int count; + + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; + mask; mask >>= count) { + count = 1; + if (mask & 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + + if (mask_16x16 & 1) { + if ((mask_16x16 & 3) == 3) { + vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + count = 2; + } else { + vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + } + } else if (mask_8x8 & 1) { + if ((mask_8x8 & 3) == 3) { + // Next block's thresholds. + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); + + vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr, bd); + + if ((mask_4x4_int & 3) == 3) { + vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, + lfin->mblim, lfin->lim, + lfin->hev_thr, bd); + } else { + if (mask_4x4_int & 1) { + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } else if (mask_4x4_int & 2) { + vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr, bd); + } + } + count = 2; + } else { + vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + + if (mask_4x4_int & 1) { + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } + } + } else if (mask_4x4 & 1) { + if ((mask_4x4 & 3) == 3) { + // Next block's thresholds. + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); + + vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr, bd); + if ((mask_4x4_int & 3) == 3) { + vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, + lfin->mblim, lfin->lim, + lfin->hev_thr, bd); + } else { + if (mask_4x4_int & 1) { + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } else if (mask_4x4_int & 2) { + vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr, bd); + } + } + count = 2; + } else { + vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + + if (mask_4x4_int & 1) { + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } + } + } else { + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + } + } + s += 8 * count; + lfl += count; + mask_16x16 >>= count; + mask_8x8 >>= count; + mask_4x4 >>= count; + mask_4x4_int >>= count; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +// This function ors into the current lfm structure, where to do loop +// filters for the specific mi we are looking at. It uses information +// including the block_size_type (32x16, 32x32, etc.), the transform size, +// whether there were any coefficients encoded, and the loop filter strength +// block we are currently looking at. Shift is used to position the +// 1's we produce. +static void build_masks(const loop_filter_info_n *const lfi_n, + const MODE_INFO *mi, const int shift_y, + const int shift_uv, + LOOP_FILTER_MASK *lfm) { + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; + const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); + const int filter_level = get_filter_level(lfi_n, mi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; + uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; + uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; + int i; + + // If filter level is 0 we don't loop filter. + if (!filter_level) { + return; + } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; + int index = shift_y; + for (i = 0; i < h; i++) { + memset(&lfm->lfl_y[index], filter_level, w); + index += 8; + } + } + + // These set 1 in the current block size for the block size edges. + // For instance if the block size is 32x16, we'll set: + // above = 1111 + // 0000 + // and + // left = 1000 + // = 1000 + // NOTE : In this example the low bit is left most ( 1000 ) is stored as + // 1, not 8... + // + // U and V set things on a 16 bit scale. + // + *above_y |= above_prediction_mask[block_size] << shift_y; + *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; + *left_y |= left_prediction_mask[block_size] << shift_y; + *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; + + // If the block has no coefficients and is not intra we skip applying + // the loop filter on block edges. + if (mi->skip && is_inter_block(mi)) + return; + + // Here we are adding a mask for the transform size. The transform + // size mask is set to be correct for a 64x64 prediction block size. We + // mask to match the size of the block we are working on and then shift it + // into place.. + *above_y |= (size_mask[block_size] & + above_64x64_txform_mask[tx_size_y]) << shift_y; + *above_uv |= (size_mask_uv[block_size] & + above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + + *left_y |= (size_mask[block_size] & + left_64x64_txform_mask[tx_size_y]) << shift_y; + *left_uv |= (size_mask_uv[block_size] & + left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + + // Here we are trying to determine what to do with the internal 4x4 block + // boundaries. These differ from the 4x4 boundaries on the outside edge of + // an 8x8 in that the internal ones can be skipped and don't depend on + // the prediction block size. + if (tx_size_y == TX_4X4) + *int_4x4_y |= size_mask[block_size] << shift_y; + + if (tx_size_uv == TX_4X4) + *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; +} + +// This function does the same thing as the one above with the exception that +// it only affects the y masks. It exists because for blocks < 16x16 in size, +// we only update u and v masks on the first block. +static void build_y_mask(const loop_filter_info_n *const lfi_n, + const MODE_INFO *mi, const int shift_y, + LOOP_FILTER_MASK *lfm) { + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; + const int filter_level = get_filter_level(lfi_n, mi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + int i; + + if (!filter_level) { + return; + } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; + int index = shift_y; + for (i = 0; i < h; i++) { + memset(&lfm->lfl_y[index], filter_level, w); + index += 8; + } + } + + *above_y |= above_prediction_mask[block_size] << shift_y; + *left_y |= left_prediction_mask[block_size] << shift_y; + + if (mi->skip && is_inter_block(mi)) + return; + + *above_y |= (size_mask[block_size] & + above_64x64_txform_mask[tx_size_y]) << shift_y; + + *left_y |= (size_mask[block_size] & + left_64x64_txform_mask[tx_size_y]) << shift_y; + + if (tx_size_y == TX_4X4) + *int_4x4_y |= size_mask[block_size] << shift_y; +} + +void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, + const int mi_col, LOOP_FILTER_MASK *lfm) { + int i; + + // The largest loopfilter we have is 16x16 so we use the 16x16 mask + // for 32x32 transforms also. + lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32]; + lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32]; + lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32]; + lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32]; + + // We do at least 8 tap filter on every 32x32 even if the transform size + // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and + // remove it from the 4x4. + lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border; + lfm->left_y[TX_4X4] &= ~left_border; + lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border; + lfm->above_y[TX_4X4] &= ~above_border; + lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv; + lfm->left_uv[TX_4X4] &= ~left_border_uv; + lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv; + lfm->above_uv[TX_4X4] &= ~above_border_uv; + + // We do some special edge handling. + if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) { + const uint64_t rows = cm->mi_rows - mi_row; + + // Each pixel inside the border gets a 1, + const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1); + const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1); + + // Remove values completely outside our border. + for (i = 0; i < TX_32X32; i++) { + lfm->left_y[i] &= mask_y; + lfm->above_y[i] &= mask_y; + lfm->left_uv[i] &= mask_uv; + lfm->above_uv[i] &= mask_uv; + } + lfm->int_4x4_y &= mask_y; + lfm->int_4x4_uv &= mask_uv; + + // We don't apply a wide loop filter on the last uv block row. If set + // apply the shorter one instead. + if (rows == 1) { + lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16]; + lfm->above_uv[TX_16X16] = 0; + } + if (rows == 5) { + lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00; + lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00); + } + } + + if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) { + const uint64_t columns = cm->mi_cols - mi_col; + + // Each pixel inside the border gets a 1, the multiply copies the border + // to where we need it. + const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL; + const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111; + + // Internal edges are not applied on the last column of the image so + // we mask 1 more for the internal edges + const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111; + + // Remove the bits outside the image edge. + for (i = 0; i < TX_32X32; i++) { + lfm->left_y[i] &= mask_y; + lfm->above_y[i] &= mask_y; + lfm->left_uv[i] &= mask_uv; + lfm->above_uv[i] &= mask_uv; + } + lfm->int_4x4_y &= mask_y; + lfm->int_4x4_uv &= mask_uv_int; + + // We don't apply a wide loop filter on the last uv column. If set + // apply the shorter one instead. + if (columns == 1) { + lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16]; + lfm->left_uv[TX_16X16] = 0; + } + if (columns == 5) { + lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc); + lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc); + } + } + // We don't apply a loop filter on the first column in the image, mask that + // out. + if (mi_col == 0) { + for (i = 0; i < TX_32X32; i++) { + lfm->left_y[i] &= 0xfefefefefefefefeULL; + lfm->left_uv[i] &= 0xeeee; + } + } + + // Assert if we try to apply 2 different loop filters at the same position. + assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); + assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); + assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); + assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); + assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); + assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); + assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); + assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); + assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); + assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); + assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); + assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); + assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); + assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); + assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); + assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); +} + +// This function sets up the bit masks for the entire 64x64 region represented +// by mi_row, mi_col. +void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, + MODE_INFO **mi, const int mode_info_stride, + LOOP_FILTER_MASK *lfm) { + int idx_32, idx_16, idx_8; + const loop_filter_info_n *const lfi_n = &cm->lf_info; + MODE_INFO **mip = mi; + MODE_INFO **mip2 = mi; + + // These are offsets to the next mi in the 64x64 block. It is what gets + // added to the mi ptr as we go through each loop. It helps us to avoid + // setting up special row and column counters for each index. The last step + // brings us out back to the starting position. + const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4, + -(mode_info_stride << 2) - 4}; + const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2, + -(mode_info_stride << 1) - 2}; + const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1}; + + // Following variables represent shifts to position the current block + // mask over the appropriate block. A shift of 36 to the left will move + // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left + // 4 rows to the appropriate spot. + const int shift_32_y[] = {0, 4, 32, 36}; + const int shift_16_y[] = {0, 2, 16, 18}; + const int shift_8_y[] = {0, 1, 8, 9}; + const int shift_32_uv[] = {0, 2, 8, 10}; + const int shift_16_uv[] = {0, 1, 4, 5}; + const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? + cm->mi_rows - mi_row : MI_BLOCK_SIZE); + const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? + cm->mi_cols - mi_col : MI_BLOCK_SIZE); + + vp9_zero(*lfm); + assert(mip[0] != NULL); + + switch (mip[0]->sb_type) { + case BLOCK_64X64: + build_masks(lfi_n, mip[0] , 0, 0, lfm); + break; + case BLOCK_64X32: + build_masks(lfi_n, mip[0], 0, 0, lfm); + mip2 = mip + mode_info_stride * 4; + if (4 >= max_rows) + break; + build_masks(lfi_n, mip2[0], 32, 8, lfm); + break; + case BLOCK_32X64: + build_masks(lfi_n, mip[0], 0, 0, lfm); + mip2 = mip + 4; + if (4 >= max_cols) + break; + build_masks(lfi_n, mip2[0], 4, 2, lfm); + break; + default: + for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { + const int shift_y = shift_32_y[idx_32]; + const int shift_uv = shift_32_uv[idx_32]; + const int mi_32_col_offset = ((idx_32 & 1) << 2); + const int mi_32_row_offset = ((idx_32 >> 1) << 2); + if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) + continue; + switch (mip[0]->sb_type) { + case BLOCK_32X32: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + break; + case BLOCK_32X16: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + if (mi_32_row_offset + 2 >= max_rows) + continue; + mip2 = mip + mode_info_stride * 2; + build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); + break; + case BLOCK_16X32: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + if (mi_32_col_offset + 2 >= max_cols) + continue; + mip2 = mip + 2; + build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); + break; + default: + for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { + const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; + const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; + const int mi_16_col_offset = mi_32_col_offset + + ((idx_16 & 1) << 1); + const int mi_16_row_offset = mi_32_row_offset + + ((idx_16 >> 1) << 1); + + if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) + continue; + + switch (mip[0]->sb_type) { + case BLOCK_16X16: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + break; + case BLOCK_16X8: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + if (mi_16_row_offset + 1 >= max_rows) + continue; + mip2 = mip + mode_info_stride; + build_y_mask(lfi_n, mip2[0], shift_y+8, lfm); + break; + case BLOCK_8X16: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + if (mi_16_col_offset +1 >= max_cols) + continue; + mip2 = mip + 1; + build_y_mask(lfi_n, mip2[0], shift_y+1, lfm); + break; + default: { + const int shift_y = shift_32_y[idx_32] + + shift_16_y[idx_16] + + shift_8_y[0]; + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + mip += offset[0]; + for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { + const int shift_y = shift_32_y[idx_32] + + shift_16_y[idx_16] + + shift_8_y[idx_8]; + const int mi_8_col_offset = mi_16_col_offset + + ((idx_8 & 1)); + const int mi_8_row_offset = mi_16_row_offset + + ((idx_8 >> 1)); + + if (mi_8_col_offset >= max_cols || + mi_8_row_offset >= max_rows) + continue; + build_y_mask(lfi_n, mip[0], shift_y, lfm); + } + break; + } + } + } + break; + } + } + break; + } +} + +static void filter_selectively_vert(uint8_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl) { + unsigned int mask; + + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; + mask; mask >>= 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + + if (mask & 1) { + if (mask_16x16 & 1) { + vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + } else if (mask_8x8 & 1) { + vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + } else if (mask_4x4 & 1) { + vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + } + } + if (mask_4x4_int & 1) + vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + s += 8; + lfl += 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_filter_selectively_vert(uint16_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl, int bd) { + unsigned int mask; + + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; + mask; mask >>= 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + + if (mask & 1) { + if (mask_16x16 & 1) { + vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + } else if (mask_8x8 & 1) { + vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + } else if (mask_4x4 & 1) { + vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + } + } + if (mask_4x4_int & 1) + vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + s += 8; + lfl += 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +void vp9_filter_block_plane_non420(VP9_COMMON *cm, + struct macroblockd_plane *plane, + MODE_INFO **mi_8x8, + int mi_row, int mi_col) { + const int ss_x = plane->subsampling_x; + const int ss_y = plane->subsampling_y; + const int row_step = 1 << ss_y; + const int col_step = 1 << ss_x; + const int row_step_stride = cm->mi_stride * row_step; + struct buf_2d *const dst = &plane->dst; + uint8_t* const dst0 = dst->buf; + unsigned int mask_16x16[MI_BLOCK_SIZE] = {0}; + unsigned int mask_8x8[MI_BLOCK_SIZE] = {0}; + unsigned int mask_4x4[MI_BLOCK_SIZE] = {0}; + unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; + uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; + int r, c; + + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { + unsigned int mask_16x16_c = 0; + unsigned int mask_8x8_c = 0; + unsigned int mask_4x4_c = 0; + unsigned int border_mask; + + // Determine the vertical edges that need filtering + for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { + const MODE_INFO *mi = mi_8x8[c]; + const BLOCK_SIZE sb_type = mi[0].sb_type; + const int skip_this = mi[0].skip && is_inter_block(mi); + // left edge of current unit is block/partition edge -> no skip + const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? + !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; + const int skip_this_c = skip_this && !block_edge_left; + // top edge of current unit is block/partition edge -> no skip + const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? + !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; + const int skip_this_r = skip_this && !block_edge_above; + const TX_SIZE tx_size = get_uv_tx_size(mi, plane); + const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; + const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; + + // Filter level can vary per MI + if (!(lfl[(r << 3) + (c >> ss_x)] = + get_filter_level(&cm->lf_info, mi))) + continue; + + // Build masks based on the transform size of each block + if (tx_size == TX_32X32) { + if (!skip_this_c && ((c >> ss_x) & 3) == 0) { + if (!skip_border_4x4_c) + mask_16x16_c |= 1 << (c >> ss_x); + else + mask_8x8_c |= 1 << (c >> ss_x); + } + if (!skip_this_r && ((r >> ss_y) & 3) == 0) { + if (!skip_border_4x4_r) + mask_16x16[r] |= 1 << (c >> ss_x); + else + mask_8x8[r] |= 1 << (c >> ss_x); + } + } else if (tx_size == TX_16X16) { + if (!skip_this_c && ((c >> ss_x) & 1) == 0) { + if (!skip_border_4x4_c) + mask_16x16_c |= 1 << (c >> ss_x); + else + mask_8x8_c |= 1 << (c >> ss_x); + } + if (!skip_this_r && ((r >> ss_y) & 1) == 0) { + if (!skip_border_4x4_r) + mask_16x16[r] |= 1 << (c >> ss_x); + else + mask_8x8[r] |= 1 << (c >> ss_x); + } + } else { + // force 8x8 filtering on 32x32 boundaries + if (!skip_this_c) { + if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0) + mask_8x8_c |= 1 << (c >> ss_x); + else + mask_4x4_c |= 1 << (c >> ss_x); + } + + if (!skip_this_r) { + if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0) + mask_8x8[r] |= 1 << (c >> ss_x); + else + mask_4x4[r] |= 1 << (c >> ss_x); + } + + if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c) + mask_4x4_int[r] |= 1 << (c >> ss_x); + } + } + + // Disable filtering on the leftmost column + border_mask = ~(mi_col == 0); +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + mask_16x16_c & border_mask, + mask_8x8_c & border_mask, + mask_4x4_c & border_mask, + mask_4x4_int[r], + cm->lf_info.lfthr, &lfl[r << 3], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_vert(dst->buf, dst->stride, + mask_16x16_c & border_mask, + mask_8x8_c & border_mask, + mask_4x4_c & border_mask, + mask_4x4_int[r], + cm->lf_info.lfthr, &lfl[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + dst->buf += 8 * dst->stride; + mi_8x8 += row_step_stride; + } + + // Now do horizontal pass + dst->buf = dst0; + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { + const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; + const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; + + unsigned int mask_16x16_r; + unsigned int mask_8x8_r; + unsigned int mask_4x4_r; + + if (mi_row + r == 0) { + mask_16x16_r = 0; + mask_8x8_r = 0; + mask_4x4_r = 0; + } else { + mask_16x16_r = mask_16x16[r]; + mask_8x8_r = mask_8x8[r]; + mask_4x4_r = mask_4x4[r]; + } +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + mask_16x16_r, + mask_8x8_r, + mask_4x4_r, + mask_4x4_int_r, + cm->lf_info.lfthr, &lfl[r << 3], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_horiz(dst->buf, dst->stride, + mask_16x16_r, + mask_8x8_r, + mask_4x4_r, + mask_4x4_int_r, + cm->lf_info.lfthr, &lfl[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + dst->buf += 8 * dst->stride; + } +} + +void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm) { + struct buf_2d *const dst = &plane->dst; + uint8_t *const dst0 = dst->buf; + int r; + uint64_t mask_16x16 = lfm->left_y[TX_16X16]; + uint64_t mask_8x8 = lfm->left_y[TX_8X8]; + uint64_t mask_4x4 = lfm->left_y[TX_4X4]; + uint64_t mask_4x4_int = lfm->int_4x4_y; + + assert(plane->subsampling_x == 0 && plane->subsampling_y == 0); + + // Vertical pass: do 2 rows at one time + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + // Disable filtering on the leftmost column. +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert_row2(plane->subsampling_x, + CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, + &lfm->lfl_y[r << 3], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + dst->buf += 16 * dst->stride; + mask_16x16 >>= 16; + mask_8x8 >>= 16; + mask_4x4 >>= 16; + mask_4x4_int >>= 16; + } + + // Horizontal pass + dst->buf = dst0; + mask_16x16 = lfm->above_y[TX_16X16]; + mask_8x8 = lfm->above_y[TX_8X8]; + mask_4x4 = lfm->above_y[TX_4X4]; + mask_4x4_int = lfm->int_4x4_y; + + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { + unsigned int mask_16x16_r; + unsigned int mask_8x8_r; + unsigned int mask_4x4_r; + + if (mi_row + r == 0) { + mask_16x16_r = 0; + mask_8x8_r = 0; + mask_4x4_r = 0; + } else { + mask_16x16_r = mask_16x16 & 0xff; + mask_8x8_r = mask_8x8 & 0xff; + mask_4x4_r = mask_4x4 & 0xff; + } + +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + dst->buf += 8 * dst->stride; + mask_16x16 >>= 8; + mask_8x8 >>= 8; + mask_4x4 >>= 8; + mask_4x4_int >>= 8; + } +} + +void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm) { + struct buf_2d *const dst = &plane->dst; + uint8_t *const dst0 = dst->buf; + int r, c; + uint8_t lfl_uv[16]; + + uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; + uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; + uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; + uint16_t mask_4x4_int = lfm->int_4x4_uv; + + assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); + + // Vertical pass: do 2 rows at one time + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { + for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { + lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; + lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; + } + + // Disable filtering on the leftmost column. +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert_row2(plane->subsampling_x, + CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfl_uv[r << 1], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfl_uv[r << 1]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + dst->buf += 16 * dst->stride; + mask_16x16 >>= 8; + mask_8x8 >>= 8; + mask_4x4 >>= 8; + mask_4x4_int >>= 8; + } + + // Horizontal pass + dst->buf = dst0; + mask_16x16 = lfm->above_uv[TX_16X16]; + mask_8x8 = lfm->above_uv[TX_8X8]; + mask_4x4 = lfm->above_uv[TX_4X4]; + mask_4x4_int = lfm->int_4x4_uv; + + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; + const unsigned int mask_4x4_int_r = + skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf); + unsigned int mask_16x16_r; + unsigned int mask_8x8_r; + unsigned int mask_4x4_r; + + if (mi_row + r == 0) { + mask_16x16_r = 0; + mask_8x8_r = 0; + mask_4x4_r = 0; + } else { + mask_16x16_r = mask_16x16 & 0xf; + mask_8x8_r = mask_8x8 & 0xf; + mask_4x4_r = mask_4x4 & 0xf; + } + +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, + cm->lf_info.lfthr, &lfl_uv[r << 1], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, + &lfl_uv[r << 1]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + dst->buf += 8 * dst->stride; + mask_16x16 >>= 4; + mask_8x8 >>= 4; + mask_4x4 >>= 4; + mask_4x4_int >>= 4; + } +} + +static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int start, int stop, int y_only) { + const int num_planes = y_only ? 1 : MAX_MB_PLANE; + enum lf_path path; + int mi_row, mi_col; + + if (y_only) + path = LF_PATH_444; + else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) + path = LF_PATH_420; + else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) + path = LF_PATH_444; + else + path = LF_PATH_SLOW; + + for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { + MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; + LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); + + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { + int plane; + + vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); + + // TODO(jimbankoski): For 444 only need to do y mask. + vp9_adjust_mask(cm, mi_row, mi_col, lfm); + + vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); + for (plane = 1; plane < num_planes; ++plane) { + switch (path) { + case LF_PATH_420: + vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); + break; + case LF_PATH_444: + vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); + break; + case LF_PATH_SLOW: + vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, + mi_row, mi_col); + break; + } + } + } + } +} + +void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, + VP9_COMMON *cm, MACROBLOCKD *xd, + int frame_filter_level, + int y_only, int partial_frame) { + int start_mi_row, end_mi_row, mi_rows_to_filter; + if (!frame_filter_level) return; + start_mi_row = 0; + mi_rows_to_filter = cm->mi_rows; + if (partial_frame && cm->mi_rows > 8) { + start_mi_row = cm->mi_rows >> 1; + start_mi_row &= 0xfffffff8; + mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); + } + end_mi_row = start_mi_row + mi_rows_to_filter; + loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only); +} + +// Used by the encoder to build the loopfilter masks. +// TODO(slavarnway): Do the encoder the same way the decoder does it and +// build the masks in line as part of the encode process. +void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level, + int partial_frame) { + int start_mi_row, end_mi_row, mi_rows_to_filter; + int mi_col, mi_row; + if (!frame_filter_level) return; + start_mi_row = 0; + mi_rows_to_filter = cm->mi_rows; + if (partial_frame && cm->mi_rows > 8) { + start_mi_row = cm->mi_rows >> 1; + start_mi_row &= 0xfffffff8; + mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); + } + end_mi_row = start_mi_row + mi_rows_to_filter; + + vp9_loop_filter_frame_init(cm, frame_filter_level); + + for (mi_row = start_mi_row; mi_row < end_mi_row; mi_row += MI_BLOCK_SIZE) { + MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { + // vp9_setup_mask() zeros lfm + vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, + get_lfm(&cm->lf, mi_row, mi_col)); + } + } +} + +// 8x8 blocks in a superblock. A "1" represents the first block in a 16x16 +// or greater area. +static const uint8_t first_block_in_16x16[8][8] = { + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0} +}; + +// This function sets up the bit masks for a block represented +// by mi_row, mi_col in a 64x64 region. +// TODO(SJL): This function only works for yv12. +void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row, + int mi_col, int bw, int bh) { + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; + const loop_filter_info_n *const lfi_n = &cm->lf_info; + const int filter_level = get_filter_level(lfi_n, mi); + const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); + LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; + uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; + uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; + const int row_in_sb = (mi_row & 7); + const int col_in_sb = (mi_col & 7); + const int shift_y = col_in_sb + (row_in_sb << 3); + const int shift_uv = (col_in_sb >> 1) + ((row_in_sb >> 1) << 2); + const int build_uv = first_block_in_16x16[row_in_sb][col_in_sb]; + + if (!filter_level) { + return; + } else { + int index = shift_y; + int i; + for (i = 0; i < bh; i++) { + memset(&lfm->lfl_y[index], filter_level, bw); + index += 8; + } + } + + // These set 1 in the current block size for the block size edges. + // For instance if the block size is 32x16, we'll set: + // above = 1111 + // 0000 + // and + // left = 1000 + // = 1000 + // NOTE : In this example the low bit is left most ( 1000 ) is stored as + // 1, not 8... + // + // U and V set things on a 16 bit scale. + // + *above_y |= above_prediction_mask[block_size] << shift_y; + *left_y |= left_prediction_mask[block_size] << shift_y; + + if (build_uv) { + *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; + *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; + } + + // If the block has no coefficients and is not intra we skip applying + // the loop filter on block edges. + if (mi->skip && is_inter_block(mi)) + return; + + // Add a mask for the transform size. The transform size mask is set to + // be correct for a 64x64 prediction block size. Mask to match the size of + // the block we are working on and then shift it into place. + *above_y |= (size_mask[block_size] & + above_64x64_txform_mask[tx_size_y]) << shift_y; + *left_y |= (size_mask[block_size] & + left_64x64_txform_mask[tx_size_y]) << shift_y; + + if (build_uv) { + *above_uv |= (size_mask_uv[block_size] & + above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + + *left_uv |= (size_mask_uv[block_size] & + left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + } + + // Try to determine what to do with the internal 4x4 block boundaries. These + // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the + // internal ones can be skipped and don't depend on the prediction block size. + if (tx_size_y == TX_4X4) + *int_4x4_y |= size_mask[block_size] << shift_y; + + if (build_uv && tx_size_uv == TX_4X4) + *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; +} + +void vp9_loop_filter_data_reset( + LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, + struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) { + lf_data->frame_buffer = frame_buffer; + lf_data->cm = cm; + lf_data->start = 0; + lf_data->stop = 0; + lf_data->y_only = 0; + memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); +} + +void vp9_reset_lfm(VP9_COMMON *const cm) { + if (cm->lf.filter_level) { + memset(cm->lf.lfm, 0, + ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride * + sizeof(*cm->lf.lfm)); + } +} + +int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) { + (void)unused; + loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, + lf_data->start, lf_data->stop, lf_data->y_only); + return 1; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_loopfilter.h b/thirdparty/libvpx/vp9/common/vp9_loopfilter.h new file mode 100644 index 00000000000..fca8830fa19 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_loopfilter.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_LOOPFILTER_H_ +#define VP9_COMMON_VP9_LOOPFILTER_H_ + +#include "vpx_ports/mem.h" +#include "./vpx_config.h" + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_seg_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_LOOP_FILTER 63 +#define MAX_SHARPNESS 7 + +#define SIMD_WIDTH 16 + +#define MAX_REF_LF_DELTAS 4 +#define MAX_MODE_LF_DELTAS 2 + +enum lf_path { + LF_PATH_420, + LF_PATH_444, + LF_PATH_SLOW, +}; + +// Need to align this structure so when it is declared and +// passed it can be loaded into vector registers. +typedef struct { + DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]); + DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]); + DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]); +} loop_filter_thresh; + +typedef struct { + loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; + uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; +} loop_filter_info_n; + +// This structure holds bit masks for all 8x8 blocks in a 64x64 region. +// Each 1 bit represents a position in which we want to apply the loop filter. +// Left_ entries refer to whether we apply a filter on the border to the +// left of the block. Above_ entries refer to whether or not to apply a +// filter on the above border. Int_ entries refer to whether or not to +// apply borders on the 4x4 edges within the 8x8 block that each bit +// represents. +// Since each transform is accompanied by a potentially different type of +// loop filter there is a different entry in the array for each transform size. +typedef struct { + uint64_t left_y[TX_SIZES]; + uint64_t above_y[TX_SIZES]; + uint64_t int_4x4_y; + uint16_t left_uv[TX_SIZES]; + uint16_t above_uv[TX_SIZES]; + uint16_t int_4x4_uv; + uint8_t lfl_y[64]; +} LOOP_FILTER_MASK; + +struct loopfilter { + int filter_level; + int last_filt_level; + + int sharpness_level; + int last_sharpness_level; + + uint8_t mode_ref_delta_enabled; + uint8_t mode_ref_delta_update; + + // 0 = Intra, Last, GF, ARF + signed char ref_deltas[MAX_REF_LF_DELTAS]; + signed char last_ref_deltas[MAX_REF_LF_DELTAS]; + + // 0 = ZERO_MV, MV + signed char mode_deltas[MAX_MODE_LF_DELTAS]; + signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; + + LOOP_FILTER_MASK *lfm; + int lfm_stride; +}; + +/* assorted loopfilter functions which get used elsewhere */ +struct VP9Common; +struct macroblockd; +struct VP9LfSyncData; + +// This function sets up the bit masks for the entire 64x64 region represented +// by mi_row, mi_col. +void vp9_setup_mask(struct VP9Common *const cm, + const int mi_row, const int mi_col, + MODE_INFO **mi_8x8, const int mode_info_stride, + LOOP_FILTER_MASK *lfm); + +void vp9_filter_block_plane_ss00(struct VP9Common *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm); + +void vp9_filter_block_plane_ss11(struct VP9Common *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm); + +void vp9_filter_block_plane_non420(struct VP9Common *cm, + struct macroblockd_plane *plane, + MODE_INFO **mi_8x8, + int mi_row, int mi_col); + +void vp9_loop_filter_init(struct VP9Common *cm); + +// Update the loop filter for the current frame. +// This should be called before vp9_loop_filter_frame(), vp9_build_mask_frame() +// calls this function directly. +void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl); + +void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, + struct VP9Common *cm, + struct macroblockd *mbd, + int filter_level, + int y_only, int partial_frame); + +// Get the superblock lfm for a given mi_row, mi_col. +static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf, + const int mi_row, const int mi_col) { + return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)]; +} + +void vp9_build_mask(struct VP9Common *cm, const MODE_INFO *mi, int mi_row, + int mi_col, int bw, int bh); +void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row, + const int mi_col, LOOP_FILTER_MASK *lfm); +void vp9_build_mask_frame(struct VP9Common *cm, int frame_filter_level, + int partial_frame); +void vp9_reset_lfm(struct VP9Common *const cm); + +typedef struct LoopFilterWorkerData { + YV12_BUFFER_CONFIG *frame_buffer; + struct VP9Common *cm; + struct macroblockd_plane planes[MAX_MB_PLANE]; + + int start; + int stop; + int y_only; +} LFWorkerData; + +void vp9_loop_filter_data_reset( + LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, + struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]); + +// Operates on the rows described by 'lf_data'. +int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused); +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_LOOPFILTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_mv.h b/thirdparty/libvpx/vp9/common/vp9_mv.h new file mode 100644 index 00000000000..5d89da8c25a --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_mv.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_MV_H_ +#define VP9_COMMON_VP9_MV_H_ + +#include "vpx/vpx_integer.h" + +#include "vp9/common/vp9_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct mv { + int16_t row; + int16_t col; +} MV; + +typedef union int_mv { + uint32_t as_int; + MV as_mv; +} int_mv; /* facilitates faster equality tests and copies */ + +typedef struct mv32 { + int32_t row; + int32_t col; +} MV32; + +static INLINE int is_zero_mv(const MV *mv) { + return *((const uint32_t *)mv) == 0; +} + +static INLINE int is_equal_mv(const MV *a, const MV *b) { + return *((const uint32_t *)a) == *((const uint32_t *)b); +} + +static INLINE void clamp_mv(MV *mv, int min_col, int max_col, + int min_row, int max_row) { + mv->col = clamp(mv->col, min_col, max_col); + mv->row = clamp(mv->row, min_row, max_row); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_MV_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_mvref_common.c b/thirdparty/libvpx/vp9/common/vp9_mvref_common.c new file mode 100644 index 00000000000..0eb01a51bad --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_mvref_common.c @@ -0,0 +1,201 @@ + +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_mvref_common.h" + +// This function searches the neighborhood of a given MB/SB +// to try and find candidate reference vectors. +static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, + int block, int mi_row, int mi_col, + uint8_t *mode_context) { + const int *ref_sign_bias = cm->ref_frame_sign_bias; + int i, refmv_count = 0; + const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type]; + int different_ref_found = 0; + int context_counter = 0; + const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? + cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; + const TileInfo *const tile = &xd->tile; + + // Blank the reference vector list + memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); + + // The nearest 2 blocks are treated differently + // if the size < 8x8 we get the mv from the bmi substructure, + // and we also need to keep a mode count. + for (i = 0; i < 2; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * + xd->mi_stride]; + // Keep counts for entropy encoding. + context_counter += mode_2_counter[candidate_mi->mode]; + different_ref_found = 1; + + if (candidate_mi->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), + refmv_count, mv_ref_list, Done); + else if (candidate_mi->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), + refmv_count, mv_ref_list, Done); + } + } + + // Check the rest of the neighbors in much the same way + // as before except we don't need to keep track of sub blocks or + // mode counts. + for (; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + different_ref_found = 1; + + if (candidate_mi->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done); + else if (candidate_mi->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST(candidate_mi->mv[1], refmv_count, mv_ref_list, Done); + } + } + + // Check the last frame's mode and mv info. + if (cm->use_prev_frame_mvs) { + if (prev_frame_mvs->ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); + } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { + ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); + } + } + + // Since we couldn't find 2 mvs from the same reference frame + // go back through the neighbors and find motion vectors from + // different reference frames. + if (different_ref_found) { + for (i = 0; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + + // If the candidate is INTRA we don't want to consider its mv. + IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias, + refmv_count, mv_ref_list, Done); + } + } + } + + // Since we still don't have a candidate we'll try the last frame. + if (cm->use_prev_frame_mvs) { + if (prev_frame_mvs->ref_frame[0] != ref_frame && + prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { + int_mv mv = prev_frame_mvs->mv[0]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); + } + + if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && + prev_frame_mvs->ref_frame[1] != ref_frame && + prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { + int_mv mv = prev_frame_mvs->mv[1]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); + } + } + + Done: + + mode_context[ref_frame] = counter_to_context[context_counter]; + + // Clamp vectors + for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) + clamp_mv_ref(&mv_ref_list[i].as_mv, xd); +} + +void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, + int mi_row, int mi_col, + uint8_t *mode_context) { + find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, + mi_row, mi_col, mode_context); +} + +void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, + int_mv *mvlist, int_mv *nearest_mv, + int_mv *near_mv) { + int i; + // Make sure all the candidates are properly clamped etc + for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { + lower_mv_precision(&mvlist[i].as_mv, allow_hp); + clamp_mv2(&mvlist[i].as_mv, xd); + } + *nearest_mv = mvlist[0]; + *near_mv = mvlist[1]; +} + +void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, + int block, int ref, int mi_row, int mi_col, + int_mv *nearest_mv, int_mv *near_mv, + uint8_t *mode_context) { + int_mv mv_list[MAX_MV_REF_CANDIDATES]; + MODE_INFO *const mi = xd->mi[0]; + b_mode_info *bmi = mi->bmi; + int n; + + assert(MAX_MV_REF_CANDIDATES == 2); + + find_mv_refs_idx(cm, xd, mi, mi->ref_frame[ref], mv_list, block, + mi_row, mi_col, mode_context); + + near_mv->as_int = 0; + switch (block) { + case 0: + nearest_mv->as_int = mv_list[0].as_int; + near_mv->as_int = mv_list[1].as_int; + break; + case 1: + case 2: + nearest_mv->as_int = bmi[0].as_mv[ref].as_int; + for (n = 0; n < MAX_MV_REF_CANDIDATES; ++n) + if (nearest_mv->as_int != mv_list[n].as_int) { + near_mv->as_int = mv_list[n].as_int; + break; + } + break; + case 3: { + int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; + candidates[0] = bmi[1].as_mv[ref]; + candidates[1] = bmi[0].as_mv[ref]; + candidates[2] = mv_list[0]; + candidates[3] = mv_list[1]; + + nearest_mv->as_int = bmi[2].as_mv[ref].as_int; + for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) + if (nearest_mv->as_int != candidates[n].as_int) { + near_mv->as_int = candidates[n].as_int; + break; + } + break; + } + default: + assert(0 && "Invalid block index."); + } +} diff --git a/thirdparty/libvpx/vp9/common/vp9_mvref_common.h b/thirdparty/libvpx/vp9/common/vp9_mvref_common.h new file mode 100644 index 00000000000..4380843e24a --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_mvref_common.h @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_ +#define VP9_COMMON_VP9_MVREF_COMMON_H_ + +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3) +#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\ + VP9_INTERP_EXTEND) << 3) + +#define MVREF_NEIGHBOURS 8 + +typedef struct position { + int row; + int col; +} POSITION; + +typedef enum { + BOTH_ZERO = 0, + ZERO_PLUS_PREDICTED = 1, + BOTH_PREDICTED = 2, + NEW_PLUS_NON_INTRA = 3, + BOTH_NEW = 4, + INTRA_PLUS_NON_INTRA = 5, + BOTH_INTRA = 6, + INVALID_CASE = 9 +} motion_vector_context; + +// This is used to figure out a context for the ref blocks. The code flattens +// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by +// adding 9 for each intra block, 3 for each zero mv and 1 for each new +// motion vector. This single number is then converted into a context +// with a single lookup ( counter_to_context ). +static const int mode_2_counter[MB_MODE_COUNT] = { + 9, // DC_PRED + 9, // V_PRED + 9, // H_PRED + 9, // D45_PRED + 9, // D135_PRED + 9, // D117_PRED + 9, // D153_PRED + 9, // D207_PRED + 9, // D63_PRED + 9, // TM_PRED + 0, // NEARESTMV + 0, // NEARMV + 3, // ZEROMV + 1, // NEWMV +}; + +// There are 3^3 different combinations of 3 counts that can be either 0,1 or +// 2. However the actual count can never be greater than 2 so the highest +// counter we need is 18. 9 is an invalid counter that's never used. +static const int counter_to_context[19] = { + BOTH_PREDICTED, // 0 + NEW_PLUS_NON_INTRA, // 1 + BOTH_NEW, // 2 + ZERO_PLUS_PREDICTED, // 3 + NEW_PLUS_NON_INTRA, // 4 + INVALID_CASE, // 5 + BOTH_ZERO, // 6 + INVALID_CASE, // 7 + INVALID_CASE, // 8 + INTRA_PLUS_NON_INTRA, // 9 + INTRA_PLUS_NON_INTRA, // 10 + INVALID_CASE, // 11 + INTRA_PLUS_NON_INTRA, // 12 + INVALID_CASE, // 13 + INVALID_CASE, // 14 + INVALID_CASE, // 15 + INVALID_CASE, // 16 + INVALID_CASE, // 17 + BOTH_INTRA // 18 +}; + +static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = { + // 4X4 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 4X8 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 8X4 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 8X8 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 8X16 + {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}}, + // 16X8 + {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}}, + // 16X16 + {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + // 16X32 + {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}}, + // 32X16 + {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + // 32X32 + {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + // 32X64 + {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}}, + // 64X32 + {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}}, + // 64X64 + {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}} +}; + +static const int idx_n_column_to_subblock[4][2] = { + {1, 2}, + {1, 3}, + {3, 2}, + {3, 3} +}; + +// clamp_mv_ref +#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units + +static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) { + clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER, + xd->mb_to_right_edge + MV_BORDER, + xd->mb_to_top_edge - MV_BORDER, + xd->mb_to_bottom_edge + MV_BORDER); +} + +// This function returns either the appropriate sub block or block's mv +// on whether the block_size < 8x8 and we have check_sub_blocks set. +static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv, + int search_col, int block_idx) { + return block_idx >= 0 && candidate->sb_type < BLOCK_8X8 + ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] + .as_mv[which_mv] + : candidate->mv[which_mv]; +} + + +// Performs mv sign inversion if indicated by the reference frame combination. +static INLINE int_mv scale_mv(const MODE_INFO *mi, int ref, + const MV_REFERENCE_FRAME this_ref_frame, + const int *ref_sign_bias) { + int_mv mv = mi->mv[ref]; + if (ref_sign_bias[mi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + return mv; +} + +// This macro is used to add a motion vector mv_ref list if it isn't +// already in the list. If it's the second motion vector it will also +// skip all additional processing and jump to Done! +#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \ + do { \ + if (refmv_count) { \ + if ((mv).as_int != (mv_ref_list)[0].as_int) { \ + (mv_ref_list)[(refmv_count)] = (mv); \ + goto Done; \ + } \ + } else { \ + (mv_ref_list)[(refmv_count)++] = (mv); \ + } \ + } while (0) + +// If either reference frame is different, not INTRA, and they +// are different from each other scale and add the mv to our list. +#define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \ + mv_ref_list, Done) \ + do { \ + if (is_inter_block(mbmi)) { \ + if ((mbmi)->ref_frame[0] != ref_frame) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, Done); \ + if (has_second_ref(mbmi) && \ + (mbmi)->ref_frame[1] != ref_frame && \ + (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, Done); \ + } \ + } while (0) + + +// Checks that the given mi_row, mi_col and search point +// are inside the borders of the tile. +static INLINE int is_inside(const TileInfo *const tile, + int mi_col, int mi_row, int mi_rows, + const POSITION *mi_pos) { + return !(mi_row + mi_pos->row < 0 || + mi_col + mi_pos->col < tile->mi_col_start || + mi_row + mi_pos->row >= mi_rows || + mi_col + mi_pos->col >= tile->mi_col_end); +} + +// TODO(jingning): this mv clamping function should be block size dependent. +static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { + clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN, + xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, + xd->mb_to_top_edge - LEFT_TOP_MARGIN, + xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); +} + +static INLINE void lower_mv_precision(MV *mv, int allow_hp) { + const int use_hp = allow_hp && use_mv_hp(mv); + if (!use_hp) { + if (mv->row & 1) + mv->row += (mv->row > 0 ? -1 : 1); + if (mv->col & 1) + mv->col += (mv->col > 0 ? -1 : 1); + } +} + +typedef void (*find_mv_refs_sync)(void *const data, int mi_row); +void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, int mi_row, int mi_col, + uint8_t *mode_context); + +// check a list of motion vectors by sad score using a number rows of pixels +// above and a number cols of pixels in the left to select the one with best +// score to use as ref motion vector +void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, + int_mv *mvlist, int_mv *nearest_mv, int_mv *near_mv); + +void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, + int block, int ref, int mi_row, int mi_col, + int_mv *nearest_mv, int_mv *near_mv, + uint8_t *mode_context); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_MVREF_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h b/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h new file mode 100644 index 00000000000..3fd935e628b --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h @@ -0,0 +1,446 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_ONYXC_INT_H_ +#define VP9_COMMON_VP9_ONYXC_INT_H_ + +#include "./vpx_config.h" +#include "vpx/internal/vpx_codec_internal.h" +#include "vpx_util/vpx_thread.h" +#include "./vp9_rtcd.h" +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/common/vp9_loopfilter.h" +#include "vp9/common/vp9_entropymv.h" +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_frame_buffers.h" +#include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_tile_common.h" + +#if CONFIG_VP9_POSTPROC +#include "vp9/common/vp9_postproc.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define REFS_PER_FRAME 3 + +#define REF_FRAMES_LOG2 3 +#define REF_FRAMES (1 << REF_FRAMES_LOG2) + +// 4 scratch frames for the new frames to support a maximum of 4 cores decoding +// in parallel, 3 for scaled references on the encoder. +// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number +// of framebuffers. +// TODO(jkoleszar): These 3 extra references could probably come from the +// normal reference pool. +#define FRAME_BUFFERS (REF_FRAMES + 7) + +#define FRAME_CONTEXTS_LOG2 2 +#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) + +#define NUM_PING_PONG_BUFFERS 2 + +extern const struct { + PARTITION_CONTEXT above; + PARTITION_CONTEXT left; +} partition_context_lookup[BLOCK_SIZES]; + + +typedef enum { + SINGLE_REFERENCE = 0, + COMPOUND_REFERENCE = 1, + REFERENCE_MODE_SELECT = 2, + REFERENCE_MODES = 3, +} REFERENCE_MODE; + +typedef struct { + int_mv mv[2]; + MV_REFERENCE_FRAME ref_frame[2]; +} MV_REF; + +typedef struct { + int ref_count; + MV_REF *mvs; + int mi_rows; + int mi_cols; + vpx_codec_frame_buffer_t raw_frame_buffer; + YV12_BUFFER_CONFIG buf; + + // The Following variables will only be used in frame parallel decode. + + // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means + // that no FrameWorker owns, or is decoding, this buffer. + VPxWorker *frame_worker_owner; + + // row and col indicate which position frame has been decoded to in real + // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX + // when the frame is fully decoded. + int row; + int col; +} RefCntBuffer; + +typedef struct BufferPool { + // Protect BufferPool from being accessed by several FrameWorkers at + // the same time during frame parallel decode. + // TODO(hkuang): Try to use atomic variable instead of locking the whole pool. +#if CONFIG_MULTITHREAD + pthread_mutex_t pool_mutex; +#endif + + // Private data associated with the frame buffer callbacks. + void *cb_priv; + + vpx_get_frame_buffer_cb_fn_t get_fb_cb; + vpx_release_frame_buffer_cb_fn_t release_fb_cb; + + RefCntBuffer frame_bufs[FRAME_BUFFERS]; + + // Frame buffers allocated internally by the codec. + InternalFrameBufferList int_frame_buffers; +} BufferPool; + +typedef struct VP9Common { + struct vpx_internal_error_info error; + vpx_color_space_t color_space; + vpx_color_range_t color_range; + int width; + int height; + int render_width; + int render_height; + int last_width; + int last_height; + + // TODO(jkoleszar): this implies chroma ss right now, but could vary per + // plane. Revisit as part of the future change to YV12_BUFFER_CONFIG to + // support additional planes. + int subsampling_x; + int subsampling_y; + +#if CONFIG_VP9_HIGHBITDEPTH + int use_highbitdepth; // Marks if we need to use 16bit frame buffers. +#endif + + YV12_BUFFER_CONFIG *frame_to_show; + RefCntBuffer *prev_frame; + + // TODO(hkuang): Combine this with cur_buf in macroblockd. + RefCntBuffer *cur_frame; + + int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */ + + // Prepare ref_frame_map for the next frame. + // Only used in frame parallel decode. + int next_ref_frame_map[REF_FRAMES]; + + // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and + // roll new_fb_idx into it. + + // Each frame can reference REFS_PER_FRAME buffers + RefBuffer frame_refs[REFS_PER_FRAME]; + + int new_fb_idx; + +#if CONFIG_VP9_POSTPROC + YV12_BUFFER_CONFIG post_proc_buffer; + YV12_BUFFER_CONFIG post_proc_buffer_int; +#endif + + FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ + FRAME_TYPE frame_type; + + int show_frame; + int last_show_frame; + int show_existing_frame; + + // Flag signaling that the frame is encoded using only INTRA modes. + uint8_t intra_only; + uint8_t last_intra_only; + + int allow_high_precision_mv; + + // Flag signaling that the frame context should be reset to default values. + // 0 or 1 implies don't reset, 2 reset just the context specified in the + // frame header, 3 reset all contexts. + int reset_frame_context; + + // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in + // MODE_INFO (8-pixel) units. + int MBs; + int mb_rows, mi_rows; + int mb_cols, mi_cols; + int mi_stride; + + /* profile settings */ + TX_MODE tx_mode; + + int base_qindex; + int y_dc_delta_q; + int uv_dc_delta_q; + int uv_ac_delta_q; + int16_t y_dequant[MAX_SEGMENTS][2]; + int16_t uv_dequant[MAX_SEGMENTS][2]; + + /* We allocate a MODE_INFO struct for each macroblock, together with + an extra row on top and column on the left to simplify prediction. */ + int mi_alloc_size; + MODE_INFO *mip; /* Base of allocated array */ + MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ + + // TODO(agrange): Move prev_mi into encoder structure. + // prev_mip and prev_mi will only be allocated in VP9 encoder. + MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ + MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ + + // Separate mi functions between encoder and decoder. + int (*alloc_mi)(struct VP9Common *cm, int mi_size); + void (*free_mi)(struct VP9Common *cm); + void (*setup_mi)(struct VP9Common *cm); + + // Grid of pointers to 8x8 MODE_INFO structs. Any 8x8 not in the visible + // area will be NULL. + MODE_INFO **mi_grid_base; + MODE_INFO **mi_grid_visible; + MODE_INFO **prev_mi_grid_base; + MODE_INFO **prev_mi_grid_visible; + + // Whether to use previous frame's motion vectors for prediction. + int use_prev_frame_mvs; + + // Persistent mb segment id map used in prediction. + int seg_map_idx; + int prev_seg_map_idx; + + uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS]; + uint8_t *last_frame_seg_map; + uint8_t *current_frame_seg_map; + int seg_map_alloc_size; + + INTERP_FILTER interp_filter; + + loop_filter_info_n lf_info; + + int refresh_frame_context; /* Two state 0 = NO, 1 = YES */ + + int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ + + struct loopfilter lf; + struct segmentation seg; + + // TODO(hkuang): Remove this as it is the same as frame_parallel_decode + // in pbi. + int frame_parallel_decode; // frame-based threading. + + // Context probabilities for reference frame prediction + MV_REFERENCE_FRAME comp_fixed_ref; + MV_REFERENCE_FRAME comp_var_ref[2]; + REFERENCE_MODE reference_mode; + + FRAME_CONTEXT *fc; /* this frame entropy */ + FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS + unsigned int frame_context_idx; /* Context to use/update */ + FRAME_COUNTS counts; + + unsigned int current_video_frame; + BITSTREAM_PROFILE profile; + + // VPX_BITS_8 in profile 0 or 1, VPX_BITS_10 or VPX_BITS_12 in profile 2 or 3. + vpx_bit_depth_t bit_depth; + vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer + +#if CONFIG_VP9_POSTPROC + struct postproc_state postproc_state; +#endif + + int error_resilient_mode; + int frame_parallel_decoding_mode; + + int log2_tile_cols, log2_tile_rows; + int byte_alignment; + int skip_loop_filter; + + // Private data associated with the frame buffer callbacks. + void *cb_priv; + vpx_get_frame_buffer_cb_fn_t get_fb_cb; + vpx_release_frame_buffer_cb_fn_t release_fb_cb; + + // Handles memory for the codec. + InternalFrameBufferList int_frame_buffers; + + // External BufferPool passed from outside. + BufferPool *buffer_pool; + + PARTITION_CONTEXT *above_seg_context; + ENTROPY_CONTEXT *above_context; + int above_context_alloc_cols; +} VP9_COMMON; + +// TODO(hkuang): Don't need to lock the whole pool after implementing atomic +// frame reference count. +void lock_buffer_pool(BufferPool *const pool); +void unlock_buffer_pool(BufferPool *const pool); + +static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP9_COMMON *cm, int index) { + if (index < 0 || index >= REF_FRAMES) + return NULL; + if (cm->ref_frame_map[index] < 0) + return NULL; + assert(cm->ref_frame_map[index] < FRAME_BUFFERS); + return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf; +} + +static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) { + return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf; +} + +static INLINE int get_free_fb(VP9_COMMON *cm) { + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + int i; + + lock_buffer_pool(cm->buffer_pool); + for (i = 0; i < FRAME_BUFFERS; ++i) + if (frame_bufs[i].ref_count == 0) + break; + + if (i != FRAME_BUFFERS) { + frame_bufs[i].ref_count = 1; + } else { + // Reset i to be INVALID_IDX to indicate no free buffer found. + i = INVALID_IDX; + } + + unlock_buffer_pool(cm->buffer_pool); + return i; +} + +static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) { + const int ref_index = *idx; + + if (ref_index >= 0 && bufs[ref_index].ref_count > 0) + bufs[ref_index].ref_count--; + + *idx = new_idx; + + bufs[new_idx].ref_count++; +} + +static INLINE int mi_cols_aligned_to_sb(int n_mis) { + return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2); +} + +static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) { + return cm->frame_type == KEY_FRAME || cm->intra_only; +} + +static INLINE void set_partition_probs(const VP9_COMMON *const cm, + MACROBLOCKD *const xd) { + xd->partition_probs = + frame_is_intra_only(cm) ? + &vp9_kf_partition_probs[0] : + (const vpx_prob (*)[PARTITION_TYPES - 1])cm->fc->partition_prob; +} + +static INLINE void vp9_init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd, + tran_low_t *dqcoeff) { + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + xd->plane[i].dqcoeff = dqcoeff; + xd->above_context[i] = cm->above_context + + i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); + + if (get_plane_type(i) == PLANE_TYPE_Y) { + memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant)); + } else { + memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant)); + } + xd->fc = cm->fc; + } + + xd->above_seg_context = cm->above_seg_context; + xd->mi_stride = cm->mi_stride; + xd->error_info = &cm->error; + + set_partition_probs(cm, xd); +} + +static INLINE const vpx_prob* get_partition_probs(const MACROBLOCKD *xd, + int ctx) { + return xd->partition_probs[ctx]; +} + +static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { + const int above_idx = mi_col * 2; + const int left_idx = (mi_row * 2) & 15; + int i; + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblockd_plane *const pd = &xd->plane[i]; + pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x]; + pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y]; + } +} + +static INLINE int calc_mi_size(int len) { + // len is in mi units. + return len + MI_BLOCK_SIZE; +} + +static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, + int mi_row, int bh, + int mi_col, int bw, + int mi_rows, int mi_cols) { + xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); + xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8; + xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); + xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8; + + // Are edges available for intra prediction? + xd->above_mi = (mi_row != 0) ? xd->mi[-xd->mi_stride] : NULL; + xd->left_mi = (mi_col > tile->mi_col_start) ? xd->mi[-1] : NULL; +} + +static INLINE void update_partition_context(MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE subsize, + BLOCK_SIZE bsize) { + PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; + PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + + // num_4x4_blocks_wide_lookup[bsize] / 2 + const int bs = num_8x8_blocks_wide_lookup[bsize]; + + // update the partition context at the end notes. set partition bits + // of block sizes larger than the current one to be one, and partition + // bits of smaller block sizes to be zero. + memset(above_ctx, partition_context_lookup[subsize].above, bs); + memset(left_ctx, partition_context_lookup[subsize].left, bs); +} + +static INLINE int partition_plane_context(const MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; + const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + const int bsl = mi_width_log2_lookup[bsize]; + int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; + + assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); + assert(bsl >= 0); + + return (left * 2 + above) + bsl * PARTITION_PLOFFSET; +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ONYXC_INT_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_ppflags.h b/thirdparty/libvpx/vp9/common/vp9_ppflags.h new file mode 100644 index 00000000000..12b989f43a5 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_ppflags.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_PPFLAGS_H_ +#define VP9_COMMON_VP9_PPFLAGS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + VP9D_NOFILTERING = 0, + VP9D_DEBLOCK = 1 << 0, + VP9D_DEMACROBLOCK = 1 << 1, + VP9D_ADDNOISE = 1 << 2, + VP9D_DEBUG_TXT_FRAME_INFO = 1 << 3, + VP9D_DEBUG_TXT_MBLK_MODES = 1 << 4, + VP9D_DEBUG_TXT_DC_DIFF = 1 << 5, + VP9D_DEBUG_TXT_RATE_INFO = 1 << 6, + VP9D_DEBUG_DRAW_MV = 1 << 7, + VP9D_DEBUG_CLR_BLK_MODES = 1 << 8, + VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9, + VP9D_MFQE = 1 << 10 +}; + +typedef struct { + int post_proc_flag; + int deblocking_level; + int noise_level; +} vp9_ppflags_t; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_PPFLAGS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_pred_common.c b/thirdparty/libvpx/vp9/common/vp9_pred_common.c new file mode 100644 index 00000000000..8f90e70e73e --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_pred_common.c @@ -0,0 +1,314 @@ + +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_pred_common.h" +#include "vp9/common/vp9_seg_common.h" + +// Returns a context number for the given MB prediction signal +int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + const MODE_INFO *const left_mi = xd->left_mi; + const int left_type = left_mi && is_inter_block(left_mi) ? + left_mi->interp_filter : SWITCHABLE_FILTERS; + const MODE_INFO *const above_mi = xd->above_mi; + const int above_type = above_mi && is_inter_block(above_mi) ? + above_mi->interp_filter : SWITCHABLE_FILTERS; + + if (left_type == above_type) + return left_type; + else if (left_type == SWITCHABLE_FILTERS) + return above_type; + else if (above_type == SWITCHABLE_FILTERS) + return left_type; + else + return SWITCHABLE_FILTERS; +} + +int vp9_get_reference_mode_context(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + int ctx; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + if (has_above && has_left) { // both edges available + if (!has_second_ref(above_mi) && !has_second_ref(left_mi)) + // neither edge uses comp pred (0/1) + ctx = (above_mi->ref_frame[0] == cm->comp_fixed_ref) ^ + (left_mi->ref_frame[0] == cm->comp_fixed_ref); + else if (!has_second_ref(above_mi)) + // one of two edges uses comp pred (2/3) + ctx = 2 + (above_mi->ref_frame[0] == cm->comp_fixed_ref || + !is_inter_block(above_mi)); + else if (!has_second_ref(left_mi)) + // one of two edges uses comp pred (2/3) + ctx = 2 + (left_mi->ref_frame[0] == cm->comp_fixed_ref || + !is_inter_block(left_mi)); + else // both edges use comp pred (4) + ctx = 4; + } else if (has_above || has_left) { // one edge available + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; + + if (!has_second_ref(edge_mi)) + // edge does not use comp pred (0/1) + ctx = edge_mi->ref_frame[0] == cm->comp_fixed_ref; + else + // edge uses comp pred (3) + ctx = 3; + } else { // no edges available (1) + ctx = 1; + } + assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS); + return ctx; +} + +// Returns a context number for the given MB prediction signal +int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + int pred_context; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int above_in_image = !!above_mi; + const int left_in_image = !!left_mi; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + const int var_ref_idx = !fix_ref_idx; + + if (above_in_image && left_in_image) { // both edges available + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); + + if (above_intra && left_intra) { // intra/intra (2) + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; + + if (!has_second_ref(edge_mi)) // single pred (1/3) + pred_context = 1 + 2 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); + else // comp pred (1/3) + pred_context = 1 + 2 * (edge_mi->ref_frame[var_ref_idx] + != cm->comp_var_ref[1]); + } else { // inter/inter + const int l_sg = !has_second_ref(left_mi); + const int a_sg = !has_second_ref(above_mi); + const MV_REFERENCE_FRAME vrfa = a_sg ? above_mi->ref_frame[0] + : above_mi->ref_frame[var_ref_idx]; + const MV_REFERENCE_FRAME vrfl = l_sg ? left_mi->ref_frame[0] + : left_mi->ref_frame[var_ref_idx]; + + if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) { + pred_context = 0; + } else if (l_sg && a_sg) { // single/single + if ((vrfa == cm->comp_fixed_ref && vrfl == cm->comp_var_ref[0]) || + (vrfl == cm->comp_fixed_ref && vrfa == cm->comp_var_ref[0])) + pred_context = 4; + else if (vrfa == vrfl) + pred_context = 3; + else + pred_context = 1; + } else if (l_sg || a_sg) { // single/comp + const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl; + const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl; + if (vrfc == cm->comp_var_ref[1] && rfs != cm->comp_var_ref[1]) + pred_context = 1; + else if (rfs == cm->comp_var_ref[1] && vrfc != cm->comp_var_ref[1]) + pred_context = 2; + else + pred_context = 4; + } else if (vrfa == vrfl) { // comp/comp + pred_context = 4; + } else { + pred_context = 2; + } + } + } else if (above_in_image || left_in_image) { // one edge available + const MODE_INFO *edge_mi = above_in_image ? above_mi : left_mi; + + if (!is_inter_block(edge_mi)) { + pred_context = 2; + } else { + if (has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[var_ref_idx] + != cm->comp_var_ref[1]); + else + pred_context = 3 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); + } + } else { // no edges available (2) + pred_context = 2; + } + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + + return pred_context; +} + +int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { + int pred_context; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); + + if (above_intra && left_intra) { // intra/intra + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter or inter/intra + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; + if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); + else + pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || + edge_mi->ref_frame[1] == LAST_FRAME); + } else { // inter/inter + const int above_has_second = has_second_ref(above_mi); + const int left_has_second = has_second_ref(left_mi); + const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; + + if (above_has_second && left_has_second) { + pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME || + left0 == LAST_FRAME || left1 == LAST_FRAME); + } else if (above_has_second || left_has_second) { + const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; + + if (rfs == LAST_FRAME) + pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME); + else + pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME); + } else { + pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME); + } + } + } else if (has_above || has_left) { // one edge available + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; + if (!is_inter_block(edge_mi)) { // intra + pred_context = 2; + } else { // inter + if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); + else + pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || + edge_mi->ref_frame[1] == LAST_FRAME); + } + } else { // no edges available + pred_context = 2; + } + + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + return pred_context; +} + +int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { + int pred_context; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); + + if (above_intra && left_intra) { // intra/intra + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter or inter/intra + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; + if (!has_second_ref(edge_mi)) { + if (edge_mi->ref_frame[0] == LAST_FRAME) + pred_context = 3; + else + pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); + } else { + pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || + edge_mi->ref_frame[1] == GOLDEN_FRAME); + } + } else { // inter/inter + const int above_has_second = has_second_ref(above_mi); + const int left_has_second = has_second_ref(left_mi); + const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; + + if (above_has_second && left_has_second) { + if (above0 == left0 && above1 == left1) + pred_context = 3 * (above0 == GOLDEN_FRAME || + above1 == GOLDEN_FRAME || + left0 == GOLDEN_FRAME || + left1 == GOLDEN_FRAME); + else + pred_context = 2; + } else if (above_has_second || left_has_second) { + const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; + + if (rfs == GOLDEN_FRAME) + pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); + else if (rfs == ALTREF_FRAME) + pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME; + else + pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); + } else { + if (above0 == LAST_FRAME && left0 == LAST_FRAME) { + pred_context = 3; + } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) { + const MV_REFERENCE_FRAME edge0 = (above0 == LAST_FRAME) ? left0 + : above0; + pred_context = 4 * (edge0 == GOLDEN_FRAME); + } else { + pred_context = 2 * (above0 == GOLDEN_FRAME) + + 2 * (left0 == GOLDEN_FRAME); + } + } + } + } else if (has_above || has_left) { // one edge available + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; + + if (!is_inter_block(edge_mi) || + (edge_mi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mi))) + pred_context = 2; + else if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); + else + pred_context = 3 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || + edge_mi->ref_frame[1] == GOLDEN_FRAME); + } else { // no edges available (2) + pred_context = 2; + } + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + return pred_context; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_pred_common.h b/thirdparty/libvpx/vp9/common/vp9_pred_common.h new file mode 100644 index 00000000000..f3c676e953d --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_pred_common.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_PRED_COMMON_H_ +#define VP9_COMMON_VP9_PRED_COMMON_H_ + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vpx_dsp/vpx_dsp_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static INLINE int get_segment_id(const VP9_COMMON *cm, + const uint8_t *segment_ids, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + const int mi_offset = mi_row * cm->mi_cols + mi_col; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + int x, y, segment_id = MAX_SEGMENTS; + + for (y = 0; y < ymis; ++y) + for (x = 0; x < xmis; ++x) + segment_id = + VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); + + assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); + return segment_id; +} + +static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) { + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int above_sip = (above_mi != NULL) ? + above_mi->seg_id_predicted : 0; + const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0; + + return above_sip + left_sip; +} + +static INLINE vpx_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg, + const MACROBLOCKD *xd) { + return seg->pred_probs[vp9_get_pred_context_seg_id(xd)]; +} + +static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) { + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int above_skip = (above_mi != NULL) ? above_mi->skip : 0; + const int left_skip = (left_mi != NULL) ? left_mi->skip : 0; + return above_skip + left_skip; +} + +static INLINE vpx_prob vp9_get_skip_prob(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->skip_probs[vp9_get_skip_context(xd)]; +} + +int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd); + +// The mode info data structure has a one element border above and to the +// left of the entries corresponding to real macroblocks. +// The prediction flags in these dummy entries are initialized to 0. +// 0 - inter/inter, inter/--, --/inter, --/-- +// 1 - intra/inter, inter/intra +// 2 - intra/--, --/intra +// 3 - intra/intra +static INLINE int get_intra_inter_context(const MACROBLOCKD *xd) { + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); + return left_intra && above_intra ? 3 : left_intra || above_intra; + } else if (has_above || has_left) { // one edge available + return 2 * !is_inter_block(has_above ? above_mi : left_mi); + } + return 0; +} + +static INLINE vpx_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->intra_inter_prob[get_intra_inter_context(xd)]; +} + +int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd); + +static INLINE vpx_prob vp9_get_reference_mode_prob(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->comp_inter_prob[vp9_get_reference_mode_context(cm, xd)]; +} + +int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, + const MACROBLOCKD *xd); + +static INLINE vpx_prob vp9_get_pred_prob_comp_ref_p(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + const int pred_context = vp9_get_pred_context_comp_ref_p(cm, xd); + return cm->fc->comp_ref_prob[pred_context]; +} + +int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd); + +static INLINE vpx_prob vp9_get_pred_prob_single_ref_p1(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p1(xd)][0]; +} + +int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd); + +static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p2(xd)][1]; +} + +// Returns a context number for the given MB prediction signal +// The mode info data structure has a one element border above and to the +// left of the entries corresponding to real blocks. +// The prediction flags in these dummy entries are initialized to 0. +static INLINE int get_tx_size_context(const MACROBLOCKD *xd) { + const int max_tx_size = max_txsize_lookup[xd->mi[0]->sb_type]; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + int above_ctx = (has_above && !above_mi->skip) ? (int)above_mi->tx_size + : max_tx_size; + int left_ctx = (has_left && !left_mi->skip) ? (int)left_mi->tx_size + : max_tx_size; + if (!has_left) + left_ctx = above_ctx; + + if (!has_above) + above_ctx = left_ctx; + + return (above_ctx + left_ctx) > max_tx_size; +} + +static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx, + const struct tx_probs *tx_probs) { + switch (max_tx_size) { + case TX_8X8: + return tx_probs->p8x8[ctx]; + case TX_16X16: + return tx_probs->p16x16[ctx]; + case TX_32X32: + return tx_probs->p32x32[ctx]; + default: + assert(0 && "Invalid max_tx_size."); + return NULL; + } +} + +static INLINE const vpx_prob *get_tx_probs2(TX_SIZE max_tx_size, + const MACROBLOCKD *xd, + const struct tx_probs *tx_probs) { + return get_tx_probs(max_tx_size, get_tx_size_context(xd), tx_probs); +} + +static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx, + struct tx_counts *tx_counts) { + switch (max_tx_size) { + case TX_8X8: + return tx_counts->p8x8[ctx]; + case TX_16X16: + return tx_counts->p16x16[ctx]; + case TX_32X32: + return tx_counts->p32x32[ctx]; + default: + assert(0 && "Invalid max_tx_size."); + return NULL; + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_PRED_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_quant_common.c b/thirdparty/libvpx/vp9/common/vp9_quant_common.c new file mode 100644 index 00000000000..d83f3c1a2f5 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_quant_common.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_seg_common.h" + +static const int16_t dc_qlookup[QINDEX_RANGE] = { + 4, 8, 8, 9, 10, 11, 12, 12, + 13, 14, 15, 16, 17, 18, 19, 19, + 20, 21, 22, 23, 24, 25, 26, 26, + 27, 28, 29, 30, 31, 32, 32, 33, + 34, 35, 36, 37, 38, 38, 39, 40, + 41, 42, 43, 43, 44, 45, 46, 47, + 48, 48, 49, 50, 51, 52, 53, 53, + 54, 55, 56, 57, 57, 58, 59, 60, + 61, 62, 62, 63, 64, 65, 66, 66, + 67, 68, 69, 70, 70, 71, 72, 73, + 74, 74, 75, 76, 77, 78, 78, 79, + 80, 81, 81, 82, 83, 84, 85, 85, + 87, 88, 90, 92, 93, 95, 96, 98, + 99, 101, 102, 104, 105, 107, 108, 110, + 111, 113, 114, 116, 117, 118, 120, 121, + 123, 125, 127, 129, 131, 134, 136, 138, + 140, 142, 144, 146, 148, 150, 152, 154, + 156, 158, 161, 164, 166, 169, 172, 174, + 177, 180, 182, 185, 187, 190, 192, 195, + 199, 202, 205, 208, 211, 214, 217, 220, + 223, 226, 230, 233, 237, 240, 243, 247, + 250, 253, 257, 261, 265, 269, 272, 276, + 280, 284, 288, 292, 296, 300, 304, 309, + 313, 317, 322, 326, 330, 335, 340, 344, + 349, 354, 359, 364, 369, 374, 379, 384, + 389, 395, 400, 406, 411, 417, 423, 429, + 435, 441, 447, 454, 461, 467, 475, 482, + 489, 497, 505, 513, 522, 530, 539, 549, + 559, 569, 579, 590, 602, 614, 626, 640, + 654, 668, 684, 700, 717, 736, 755, 775, + 796, 819, 843, 869, 896, 925, 955, 988, + 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336, +}; + +#if CONFIG_VP9_HIGHBITDEPTH +static const int16_t dc_qlookup_10[QINDEX_RANGE] = { + 4, 9, 10, 13, 15, 17, 20, 22, + 25, 28, 31, 34, 37, 40, 43, 47, + 50, 53, 57, 60, 64, 68, 71, 75, + 78, 82, 86, 90, 93, 97, 101, 105, + 109, 113, 116, 120, 124, 128, 132, 136, + 140, 143, 147, 151, 155, 159, 163, 166, + 170, 174, 178, 182, 185, 189, 193, 197, + 200, 204, 208, 212, 215, 219, 223, 226, + 230, 233, 237, 241, 244, 248, 251, 255, + 259, 262, 266, 269, 273, 276, 280, 283, + 287, 290, 293, 297, 300, 304, 307, 310, + 314, 317, 321, 324, 327, 331, 334, 337, + 343, 350, 356, 362, 369, 375, 381, 387, + 394, 400, 406, 412, 418, 424, 430, 436, + 442, 448, 454, 460, 466, 472, 478, 484, + 490, 499, 507, 516, 525, 533, 542, 550, + 559, 567, 576, 584, 592, 601, 609, 617, + 625, 634, 644, 655, 666, 676, 687, 698, + 708, 718, 729, 739, 749, 759, 770, 782, + 795, 807, 819, 831, 844, 856, 868, 880, + 891, 906, 920, 933, 947, 961, 975, 988, + 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105, + 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, + 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, + 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, + 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, + 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, + 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197, + 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, + 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102, + 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, + 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347, +}; + +static const int16_t dc_qlookup_12[QINDEX_RANGE] = { + 4, 12, 18, 25, 33, 41, 50, 60, + 70, 80, 91, 103, 115, 127, 140, 153, + 166, 180, 194, 208, 222, 237, 251, 266, + 281, 296, 312, 327, 343, 358, 374, 390, + 405, 421, 437, 453, 469, 484, 500, 516, + 532, 548, 564, 580, 596, 611, 627, 643, + 659, 674, 690, 706, 721, 737, 752, 768, + 783, 798, 814, 829, 844, 859, 874, 889, + 904, 919, 934, 949, 964, 978, 993, 1008, + 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122, + 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, + 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, + 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544, + 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, + 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, + 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, + 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467, + 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788, + 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, + 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517, + 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, + 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, + 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, + 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, + 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149, + 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, + 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, + 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788, + 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, + 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, + 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812, + 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387, +}; +#endif + +static const int16_t ac_qlookup[QINDEX_RANGE] = { + 4, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, + 79, 80, 81, 82, 83, 84, 85, 86, + 87, 88, 89, 90, 91, 92, 93, 94, + 95, 96, 97, 98, 99, 100, 101, 102, + 104, 106, 108, 110, 112, 114, 116, 118, + 120, 122, 124, 126, 128, 130, 132, 134, + 136, 138, 140, 142, 144, 146, 148, 150, + 152, 155, 158, 161, 164, 167, 170, 173, + 176, 179, 182, 185, 188, 191, 194, 197, + 200, 203, 207, 211, 215, 219, 223, 227, + 231, 235, 239, 243, 247, 251, 255, 260, + 265, 270, 275, 280, 285, 290, 295, 300, + 305, 311, 317, 323, 329, 335, 341, 347, + 353, 359, 366, 373, 380, 387, 394, 401, + 408, 416, 424, 432, 440, 448, 456, 465, + 474, 483, 492, 501, 510, 520, 530, 540, + 550, 560, 571, 582, 593, 604, 615, 627, + 639, 651, 663, 676, 689, 702, 715, 729, + 743, 757, 771, 786, 801, 816, 832, 848, + 864, 881, 898, 915, 933, 951, 969, 988, + 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, + 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, + 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, + 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, +}; + +#if CONFIG_VP9_HIGHBITDEPTH +static const int16_t ac_qlookup_10[QINDEX_RANGE] = { + 4, 9, 11, 13, 16, 18, 21, 24, + 27, 30, 33, 37, 40, 44, 48, 51, + 55, 59, 63, 67, 71, 75, 79, 83, + 88, 92, 96, 100, 105, 109, 114, 118, + 122, 127, 131, 136, 140, 145, 149, 154, + 158, 163, 168, 172, 177, 181, 186, 190, + 195, 199, 204, 208, 213, 217, 222, 226, + 231, 235, 240, 244, 249, 253, 258, 262, + 267, 271, 275, 280, 284, 289, 293, 297, + 302, 306, 311, 315, 319, 324, 328, 332, + 337, 341, 345, 349, 354, 358, 362, 367, + 371, 375, 379, 384, 388, 392, 396, 401, + 409, 417, 425, 433, 441, 449, 458, 466, + 474, 482, 490, 498, 506, 514, 523, 531, + 539, 547, 555, 563, 571, 579, 588, 596, + 604, 616, 628, 640, 652, 664, 676, 688, + 700, 713, 725, 737, 749, 761, 773, 785, + 797, 809, 825, 841, 857, 873, 889, 905, + 922, 938, 954, 970, 986, 1002, 1018, 1038, + 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198, + 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, + 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603, + 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859, + 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, + 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507, + 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, + 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, + 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952, + 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, + 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372, + 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268, + 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312, +}; + +static const int16_t ac_qlookup_12[QINDEX_RANGE] = { + 4, 13, 19, 27, 35, 44, 54, 64, + 75, 87, 99, 112, 126, 139, 154, 168, + 183, 199, 214, 230, 247, 263, 280, 297, + 314, 331, 349, 366, 384, 402, 420, 438, + 456, 475, 493, 511, 530, 548, 567, 586, + 604, 623, 642, 660, 679, 698, 716, 735, + 753, 772, 791, 809, 828, 846, 865, 884, + 902, 920, 939, 957, 976, 994, 1012, 1030, + 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175, + 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317, + 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, + 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, + 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856, + 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118, + 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, + 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750, + 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137, + 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619, + 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, + 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791, + 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544, + 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, + 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, + 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635, + 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028, + 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, + 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565, + 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806, + 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414, + 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, + 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070, + 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247, +}; +#endif + +int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { +#if CONFIG_VP9_HIGHBITDEPTH + switch (bit_depth) { + case VPX_BITS_8: + return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_10: + return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_12: + return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; + default: + assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); + return -1; + } +#else + (void) bit_depth; + return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; +#endif +} + +int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { +#if CONFIG_VP9_HIGHBITDEPTH + switch (bit_depth) { + case VPX_BITS_8: + return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_10: + return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_12: + return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; + default: + assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); + return -1; + } +#else + (void) bit_depth; + return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; +#endif +} + +int vp9_get_qindex(const struct segmentation *seg, int segment_id, + int base_qindex) { + if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) { + const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); + const int seg_qindex = seg->abs_delta == SEGMENT_ABSDATA ? + data : base_qindex + data; + return clamp(seg_qindex, 0, MAXQ); + } else { + return base_qindex; + } +} + diff --git a/thirdparty/libvpx/vp9/common/vp9_quant_common.h b/thirdparty/libvpx/vp9/common/vp9_quant_common.h new file mode 100644 index 00000000000..4bae4a89677 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_quant_common.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_QUANT_COMMON_H_ +#define VP9_COMMON_VP9_QUANT_COMMON_H_ + +#include "vpx/vpx_codec.h" +#include "vp9/common/vp9_seg_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MINQ 0 +#define MAXQ 255 +#define QINDEX_RANGE (MAXQ - MINQ + 1) +#define QINDEX_BITS 8 + +int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); +int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); + +int vp9_get_qindex(const struct segmentation *seg, int segment_id, + int base_qindex); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_QUANT_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_reconinter.c b/thirdparty/libvpx/vp9/common/vp9_reconinter.c new file mode 100644 index 00000000000..84718e97036 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_reconinter.c @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_scale_rtcd.h" +#include "./vpx_config.h" + +#include "vpx/vpx_integer.h" + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_reconintra.h" + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const MV *src_mv, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + enum mv_precision precision, + int x, int y, int bd) { + const int is_q4 = precision == MV_PRECISION_Q4; + const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, + is_q4 ? src_mv->col : src_mv->col * 2 }; + MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf); + const int subpel_x = mv.col & SUBPEL_MASK; + const int subpel_y = mv.row & SUBPEL_MASK; + + src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); + + highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, + sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, + bd); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +void vp9_build_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const MV *src_mv, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + enum mv_precision precision, + int x, int y) { + const int is_q4 = precision == MV_PRECISION_Q4; + const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, + is_q4 ? src_mv->col : src_mv->col * 2 }; + MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf); + const int subpel_x = mv.col & SUBPEL_MASK; + const int subpel_y = mv.row & SUBPEL_MASK; + + src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); + + inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, + sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4); +} + +static INLINE int round_mv_comp_q4(int value) { + return (value < 0 ? value - 2 : value + 2) / 4; +} + +static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) { + MV res = { round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row + + mi->bmi[1].as_mv[idx].as_mv.row + + mi->bmi[2].as_mv[idx].as_mv.row + + mi->bmi[3].as_mv[idx].as_mv.row), + round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + + mi->bmi[1].as_mv[idx].as_mv.col + + mi->bmi[2].as_mv[idx].as_mv.col + + mi->bmi[3].as_mv[idx].as_mv.col) }; + return res; +} + +static INLINE int round_mv_comp_q2(int value) { + return (value < 0 ? value - 1 : value + 1) / 2; +} + +static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) { + MV res = { round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.row + + mi->bmi[block1].as_mv[idx].as_mv.row), + round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.col + + mi->bmi[block1].as_mv[idx].as_mv.col) }; + return res; +} + +// TODO(jkoleszar): yet another mv clamping function :-( +MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, + int bw, int bh, int ss_x, int ss_y) { + // If the MV points so far into the UMV border that no visible pixels + // are used for reconstruction, the subpel part of the MV can be + // discarded and the MV limited to 16 pixels with equivalent results. + const int spel_left = (VP9_INTERP_EXTEND + bw) << SUBPEL_BITS; + const int spel_right = spel_left - SUBPEL_SHIFTS; + const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS; + const int spel_bottom = spel_top - SUBPEL_SHIFTS; + MV clamped_mv = { + src_mv->row * (1 << (1 - ss_y)), + src_mv->col * (1 << (1 - ss_x)) + }; + assert(ss_x <= 1); + assert(ss_y <= 1); + + clamp_mv(&clamped_mv, + xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left, + xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right, + xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top, + xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom); + + return clamped_mv; +} + +MV average_split_mvs(const struct macroblockd_plane *pd, + const MODE_INFO *mi, int ref, int block) { + const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0); + MV res = {0, 0}; + switch (ss_idx) { + case 0: + res = mi->bmi[block].as_mv[ref].as_mv; + break; + case 1: + res = mi_mv_pred_q2(mi, ref, block, block + 2); + break; + case 2: + res = mi_mv_pred_q2(mi, ref, block, block + 1); + break; + case 3: + res = mi_mv_pred_q4(mi, ref); + break; + default: + assert(ss_idx <= 3 && ss_idx >= 0); + } + return res; +} + +static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, + int bw, int bh, + int x, int y, int w, int h, + int mi_x, int mi_y) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const MODE_INFO *mi = xd->mi[0]; + const int is_compound = has_second_ref(mi); + const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; + int ref; + + for (ref = 0; ref < 1 + is_compound; ++ref) { + const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + struct buf_2d *const pre_buf = &pd->pre[ref]; + struct buf_2d *const dst_buf = &pd->dst; + uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; + const MV mv = mi->sb_type < BLOCK_8X8 + ? average_split_mvs(pd, mi, ref, block) + : mi->mv[ref].as_mv; + + // TODO(jkoleszar): This clamping is done in the incorrect place for the + // scaling case. It needs to be done on the scaled MV, not the pre-scaling + // MV. Note however that it performs the subsampling aware scaling so + // that the result is always q4. + // mv_precision precision is MV_PRECISION_Q4. + const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, + pd->subsampling_x, + pd->subsampling_y); + + uint8_t *pre; + MV32 scaled_mv; + int xs, ys, subpel_x, subpel_y; + const int is_scaled = vp9_is_scaled(sf); + + if (is_scaled) { + // Co-ordinate of containing block to pixel precision. + const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); + const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); +#if CONFIG_BETTER_HW_COMPATIBILITY + assert(xd->mi[0]->sb_type != BLOCK_4X8 && + xd->mi[0]->sb_type != BLOCK_8X4); + assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) && + mv_q4.col == mv.col * (1 << (1 - pd->subsampling_x))); +#endif + if (plane == 0) + pre_buf->buf = xd->block_refs[ref]->buf->y_buffer; + else if (plane == 1) + pre_buf->buf = xd->block_refs[ref]->buf->u_buffer; + else + pre_buf->buf = xd->block_refs[ref]->buf->v_buffer; + + pre_buf->buf += scaled_buffer_offset(x_start + x, y_start + y, + pre_buf->stride, sf); + pre = pre_buf->buf; + scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); + xs = sf->x_step_q4; + ys = sf->y_step_q4; + } else { + pre = pre_buf->buf + (y * pre_buf->stride + x); + scaled_mv.row = mv_q4.row; + scaled_mv.col = mv_q4.col; + xs = ys = 16; + } + subpel_x = scaled_mv.col & SUBPEL_MASK; + subpel_y = scaled_mv.row & SUBPEL_MASK; + pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + + (scaled_mv.col >> SUBPEL_BITS); + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, + xd->bd); + } else { + inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); + } +#else + inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); +#endif // CONFIG_VP9_HIGHBITDEPTH + } +} + +static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, + int mi_row, int mi_col, + int plane_from, int plane_to) { + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + for (plane = plane_from; plane <= plane_to; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0]->sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + build_inter_predictors(xd, plane, i++, bw, bh, + 4 * x, 4 * y, 4, 4, mi_x, mi_y); + } else { + build_inter_predictors(xd, plane, 0, bw, bh, + 0, 0, bw, bh, mi_x, mi_y); + } + } +} + +void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0); +} + +void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize, int plane) { + build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, plane, plane); +} + +void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1, + MAX_MB_PLANE - 1); +} + +void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, + MAX_MB_PLANE - 1); +} + +void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col) { + uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, + src->v_buffer}; + const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, + src->uv_stride}; + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblockd_plane *const pd = &planes[i]; + setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL, + pd->subsampling_x, pd->subsampling_y); + } +} + +void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col, + const struct scale_factors *sf) { + if (src != NULL) { + int i; + uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, + src->v_buffer}; + const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, + src->uv_stride}; + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblockd_plane *const pd = &xd->plane[i]; + setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col, + sf, pd->subsampling_x, pd->subsampling_y); + } + } +} diff --git a/thirdparty/libvpx/vp9/common/vp9_reconinter.h b/thirdparty/libvpx/vp9/common/vp9_reconinter.h new file mode 100644 index 00000000000..07745e3aaa5 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_reconinter.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_RECONINTER_H_ +#define VP9_COMMON_VP9_RECONINTER_H_ + +#include "vp9/common/vp9_filter.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_filter.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static INLINE void inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + int xs, int ys) { + sf->predict[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + int xs, int ys, int bd) { + sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi, + int ref, int block); + +MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, + int bw, int bh, int ss_x, int ss_y); + +void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize); + +void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize, int plane); + +void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize); + +void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize); + +void vp9_build_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const MV *mv_q3, + const struct scale_factors *sf, + int w, int h, int do_avg, + const InterpKernel *kernel, + enum mv_precision precision, + int x, int y); + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const MV *mv_q3, + const struct scale_factors *sf, + int w, int h, int do_avg, + const InterpKernel *kernel, + enum mv_precision precision, + int x, int y, int bd); +#endif + +static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride, + const struct scale_factors *sf) { + const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset; + const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset; + return y * stride + x; +} + +static INLINE void setup_pred_plane(struct buf_2d *dst, + uint8_t *src, int stride, + int mi_row, int mi_col, + const struct scale_factors *scale, + int subsampling_x, int subsampling_y) { + const int x = (MI_SIZE * mi_col) >> subsampling_x; + const int y = (MI_SIZE * mi_row) >> subsampling_y; + dst->buf = src + scaled_buffer_offset(x, y, stride, scale); + dst->stride = stride; +} + +void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col); + +void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, + const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, + const struct scale_factors *sf); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_RECONINTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_reconintra.c b/thirdparty/libvpx/vp9/common/vp9_reconintra.c new file mode 100644 index 00000000000..445785835a6 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_reconintra.c @@ -0,0 +1,445 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +#if CONFIG_VP9_HIGHBITDEPTH +#include "vpx_dsp/vpx_dsp_common.h" +#endif // CONFIG_VP9_HIGHBITDEPTH +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" +#include "vpx_ports/vpx_once.h" + +#include "vp9/common/vp9_reconintra.h" +#include "vp9/common/vp9_onyxc_int.h" + +const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { + DCT_DCT, // DC + ADST_DCT, // V + DCT_ADST, // H + DCT_DCT, // D45 + ADST_ADST, // D135 + ADST_DCT, // D117 + DCT_ADST, // D153 + DCT_ADST, // D207 + ADST_DCT, // D63 + ADST_ADST, // TM +}; + +enum { + NEED_LEFT = 1 << 1, + NEED_ABOVE = 1 << 2, + NEED_ABOVERIGHT = 1 << 3, +}; + +static const uint8_t extend_modes[INTRA_MODES] = { + NEED_ABOVE | NEED_LEFT, // DC + NEED_ABOVE, // V + NEED_LEFT, // H + NEED_ABOVERIGHT, // D45 + NEED_LEFT | NEED_ABOVE, // D135 + NEED_LEFT | NEED_ABOVE, // D117 + NEED_LEFT | NEED_ABOVE, // D153 + NEED_LEFT, // D207 + NEED_ABOVERIGHT, // D63 + NEED_LEFT | NEED_ABOVE, // TM +}; + +typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left); + +static intra_pred_fn pred[INTRA_MODES][TX_SIZES]; +static intra_pred_fn dc_pred[2][2][TX_SIZES]; + +#if CONFIG_VP9_HIGHBITDEPTH +typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, + int bd); +static intra_high_pred_fn pred_high[INTRA_MODES][4]; +static intra_high_pred_fn dc_pred_high[2][2][4]; +#endif // CONFIG_VP9_HIGHBITDEPTH + +static void vp9_init_intra_predictors_internal(void) { +#define INIT_ALL_SIZES(p, type) \ + p[TX_4X4] = vpx_##type##_predictor_4x4; \ + p[TX_8X8] = vpx_##type##_predictor_8x8; \ + p[TX_16X16] = vpx_##type##_predictor_16x16; \ + p[TX_32X32] = vpx_##type##_predictor_32x32 + + INIT_ALL_SIZES(pred[V_PRED], v); + INIT_ALL_SIZES(pred[H_PRED], h); + INIT_ALL_SIZES(pred[D207_PRED], d207); + INIT_ALL_SIZES(pred[D45_PRED], d45); + INIT_ALL_SIZES(pred[D63_PRED], d63); + INIT_ALL_SIZES(pred[D117_PRED], d117); + INIT_ALL_SIZES(pred[D135_PRED], d135); + INIT_ALL_SIZES(pred[D153_PRED], d153); + INIT_ALL_SIZES(pred[TM_PRED], tm); + + INIT_ALL_SIZES(dc_pred[0][0], dc_128); + INIT_ALL_SIZES(dc_pred[0][1], dc_top); + INIT_ALL_SIZES(dc_pred[1][0], dc_left); + INIT_ALL_SIZES(dc_pred[1][1], dc); + +#if CONFIG_VP9_HIGHBITDEPTH + INIT_ALL_SIZES(pred_high[V_PRED], highbd_v); + INIT_ALL_SIZES(pred_high[H_PRED], highbd_h); + INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207); + INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45); + INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63); + INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117); + INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135); + INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153); + INIT_ALL_SIZES(pred_high[TM_PRED], highbd_tm); + + INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128); + INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top); + INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left); + INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc); +#endif // CONFIG_VP9_HIGHBITDEPTH + +#undef intra_pred_allsizes +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void build_intra_predictors_high(const MACROBLOCKD *xd, + const uint8_t *ref8, + int ref_stride, + uint8_t *dst8, + int dst_stride, + PREDICTION_MODE mode, + TX_SIZE tx_size, + int up_available, + int left_available, + int right_available, + int x, int y, + int plane, int bd) { + int i; + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + DECLARE_ALIGNED(16, uint16_t, left_col[32]); + DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]); + uint16_t *above_row = above_data + 16; + const uint16_t *const_above_row = above_row; + const int bs = 4 << tx_size; + int frame_width, frame_height; + int x0, y0; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int need_left = extend_modes[mode] & NEED_LEFT; + const int need_above = extend_modes[mode] & NEED_ABOVE; + const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT; + int base = 128 << (bd - 8); + // 127 127 127 .. 127 127 127 127 127 127 + // 129 A B .. Y Z + // 129 C D .. W X + // 129 E F .. U V + // 129 G H .. S T T T T T + // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1. + + // Get current frame pointer, width and height. + if (plane == 0) { + frame_width = xd->cur_buf->y_width; + frame_height = xd->cur_buf->y_height; + } else { + frame_width = xd->cur_buf->uv_width; + frame_height = xd->cur_buf->uv_height; + } + + // Get block position in current frame. + x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; + y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + + // NEED_LEFT + if (need_left) { + if (left_available) { + if (xd->mb_to_bottom_edge < 0) { + /* slower path if the block needs border extension */ + if (y0 + bs <= frame_height) { + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } else { + const int extend_bottom = frame_height - y0; + for (i = 0; i < extend_bottom; ++i) + left_col[i] = ref[i * ref_stride - 1]; + for (; i < bs; ++i) + left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; + } + } else { + /* faster path if the block does not need extension */ + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } + } else { + vpx_memset16(left_col, base + 1, bs); + } + } + + // NEED_ABOVE + if (need_above) { + if (up_available) { + const uint16_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + bs <= frame_width) { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r * sizeof(above_row[0])); + vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width); + } + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + } + } + above_row[-1] = left_available ? above_ref[-1] : (base + 1); + } else { + vpx_memset16(above_row, base - 1, bs); + above_row[-1] = base - 1; + } + } + + // NEED_ABOVERIGHT + if (need_aboveright) { + if (up_available) { + const uint16_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + 2 * bs <= frame_width) { + if (right_available && bs == 4) { + memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0])); + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 + bs <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + memcpy(above_row, above_ref, r * sizeof(above_row[0])); + vpx_memset16(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r * sizeof(above_row[0])); + vpx_memset16(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } + above_row[-1] = left_available ? above_ref[-1] : (base + 1); + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + if (bs == 4 && right_available) + memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0])); + else + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + above_row[-1] = left_available ? above_ref[-1] : (base + 1); + } + } + } else { + vpx_memset16(above_row, base - 1, bs * 2); + above_row[-1] = base - 1; + } + } + + // predict + if (mode == DC_PRED) { + dc_pred_high[left_available][up_available][tx_size](dst, dst_stride, + const_above_row, + left_col, xd->bd); + } else { + pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col, + xd->bd); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, + int ref_stride, uint8_t *dst, int dst_stride, + PREDICTION_MODE mode, TX_SIZE tx_size, + int up_available, int left_available, + int right_available, int x, int y, + int plane) { + int i; + DECLARE_ALIGNED(16, uint8_t, left_col[32]); + DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]); + uint8_t *above_row = above_data + 16; + const uint8_t *const_above_row = above_row; + const int bs = 4 << tx_size; + int frame_width, frame_height; + int x0, y0; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + + // 127 127 127 .. 127 127 127 127 127 127 + // 129 A B .. Y Z + // 129 C D .. W X + // 129 E F .. U V + // 129 G H .. S T T T T T + // .. + + // Get current frame pointer, width and height. + if (plane == 0) { + frame_width = xd->cur_buf->y_width; + frame_height = xd->cur_buf->y_height; + } else { + frame_width = xd->cur_buf->uv_width; + frame_height = xd->cur_buf->uv_height; + } + + // Get block position in current frame. + x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; + y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + + // NEED_LEFT + if (extend_modes[mode] & NEED_LEFT) { + if (left_available) { + if (xd->mb_to_bottom_edge < 0) { + /* slower path if the block needs border extension */ + if (y0 + bs <= frame_height) { + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } else { + const int extend_bottom = frame_height - y0; + for (i = 0; i < extend_bottom; ++i) + left_col[i] = ref[i * ref_stride - 1]; + for (; i < bs; ++i) + left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; + } + } else { + /* faster path if the block does not need extension */ + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } + } else { + memset(left_col, 129, bs); + } + } + + // NEED_ABOVE + if (extend_modes[mode] & NEED_ABOVE) { + if (up_available) { + const uint8_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + bs <= frame_width) { + memcpy(above_row, above_ref, bs); + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r); + memset(above_row + r, above_row[r - 1], x0 + bs - frame_width); + } + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; + } else { + memcpy(above_row, above_ref, bs); + } + } + above_row[-1] = left_available ? above_ref[-1] : 129; + } else { + memset(above_row, 127, bs); + above_row[-1] = 127; + } + } + + // NEED_ABOVERIGHT + if (extend_modes[mode] & NEED_ABOVERIGHT) { + if (up_available) { + const uint8_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + 2 * bs <= frame_width) { + if (right_available && bs == 4) { + memcpy(above_row, above_ref, 2 * bs); + } else { + memcpy(above_row, above_ref, bs); + memset(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 + bs <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + memcpy(above_row, above_ref, r); + memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); + } else { + memcpy(above_row, above_ref, bs); + memset(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r); + memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); + } + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; + } else { + memcpy(above_row, above_ref, bs); + if (bs == 4 && right_available) + memcpy(above_row + bs, above_ref + bs, bs); + else + memset(above_row + bs, above_row[bs - 1], bs); + } + } + above_row[-1] = left_available ? above_ref[-1] : 129; + } else { + memset(above_row, 127, bs * 2); + above_row[-1] = 127; + } + } + + // predict + if (mode == DC_PRED) { + dc_pred[left_available][up_available][tx_size](dst, dst_stride, + const_above_row, left_col); + } else { + pred[mode][tx_size](dst, dst_stride, const_above_row, left_col); + } +} + +void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, + TX_SIZE tx_size, PREDICTION_MODE mode, + const uint8_t *ref, int ref_stride, + uint8_t *dst, int dst_stride, + int aoff, int loff, int plane) { + const int bw = (1 << bwl_in); + const int txw = (1 << tx_size); + const int have_top = loff || (xd->above_mi != NULL); + const int have_left = aoff || (xd->left_mi != NULL); + const int have_right = (aoff + txw) < bw; + const int x = aoff * 4; + const int y = loff * 4; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode, + tx_size, have_top, have_left, have_right, + x, y, plane, xd->bd); + return; + } +#endif + build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, + have_top, have_left, have_right, x, y, plane); +} + +void vp9_init_intra_predictors(void) { + once(vp9_init_intra_predictors_internal); +} diff --git a/thirdparty/libvpx/vp9/common/vp9_reconintra.h b/thirdparty/libvpx/vp9/common/vp9_reconintra.h new file mode 100644 index 00000000000..de453808b78 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_reconintra.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_RECONINTRA_H_ +#define VP9_COMMON_VP9_RECONINTRA_H_ + +#include "vpx/vpx_integer.h" +#include "vp9/common/vp9_blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp9_init_intra_predictors(void); + +void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, + TX_SIZE tx_size, PREDICTION_MODE mode, + const uint8_t *ref, int ref_stride, + uint8_t *dst, int dst_stride, + int aoff, int loff, int plane); +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_rtcd.c b/thirdparty/libvpx/vp9/common/vp9_rtcd.c new file mode 100644 index 00000000000..2dfa09f50e0 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_rtcd.c @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#define RTCD_C +#include "./vp9_rtcd.h" +#include "vpx_ports/vpx_once.h" + +void vp9_rtcd() { + // TODO(JBB): Remove this once, by insuring that both the encoder and + // decoder setup functions are protected by once(); + once(setup_rtcd_internal); +} diff --git a/thirdparty/libvpx/vp9/common/vp9_scale.c b/thirdparty/libvpx/vp9/common/vp9_scale.c new file mode 100644 index 00000000000..b763b925b3e --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_scale.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_dsp_rtcd.h" +#include "vp9/common/vp9_filter.h" +#include "vp9/common/vp9_scale.h" +#include "vpx_dsp/vpx_filter.h" + +static INLINE int scaled_x(int val, const struct scale_factors *sf) { + return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT); +} + +static INLINE int scaled_y(int val, const struct scale_factors *sf) { + return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT); +} + +static int unscaled_value(int val, const struct scale_factors *sf) { + (void) sf; + return val; +} + +static int get_fixed_point_scale_factor(int other_size, int this_size) { + // Calculate scaling factor once for each reference frame + // and use fixed point scaling factors in decoding and encoding routines. + // Hardware implementations can calculate scale factor in device driver + // and use multiplication and shifting on hardware instead of division. + return (other_size << REF_SCALE_SHIFT) / this_size; +} + +MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) { + const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK; + const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK; + const MV32 res = { + scaled_y(mv->row, sf) + y_off_q4, + scaled_x(mv->col, sf) + x_off_q4 + }; + return res; +} + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, + int other_w, int other_h, + int this_w, int this_h, + int use_highbd) { +#else +void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, + int other_w, int other_h, + int this_w, int this_h) { +#endif + if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) { + sf->x_scale_fp = REF_INVALID_SCALE; + sf->y_scale_fp = REF_INVALID_SCALE; + return; + } + + sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w); + sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h); + sf->x_step_q4 = scaled_x(16, sf); + sf->y_step_q4 = scaled_y(16, sf); + + if (vp9_is_scaled(sf)) { + sf->scale_value_x = scaled_x; + sf->scale_value_y = scaled_y; + } else { + sf->scale_value_x = unscaled_value; + sf->scale_value_y = unscaled_value; + } + + // TODO(agrange): Investigate the best choice of functions to use here + // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what + // to do at full-pel offsets. The current selection, where the filter is + // applied in one direction only, and not at all for 0,0, seems to give the + // best quality, but it may be worth trying an additional mode that does + // do the filtering on full-pel. + + if (sf->x_step_q4 == 16) { + if (sf->y_step_q4 == 16) { + // No scaling in either direction. + sf->predict[0][0][0] = vpx_convolve_copy; + sf->predict[0][0][1] = vpx_convolve_avg; + sf->predict[0][1][0] = vpx_convolve8_vert; + sf->predict[0][1][1] = vpx_convolve8_avg_vert; + sf->predict[1][0][0] = vpx_convolve8_horiz; + sf->predict[1][0][1] = vpx_convolve8_avg_horiz; + } else { + // No scaling in x direction. Must always scale in the y direction. + sf->predict[0][0][0] = vpx_scaled_vert; + sf->predict[0][0][1] = vpx_scaled_avg_vert; + sf->predict[0][1][0] = vpx_scaled_vert; + sf->predict[0][1][1] = vpx_scaled_avg_vert; + sf->predict[1][0][0] = vpx_scaled_2d; + sf->predict[1][0][1] = vpx_scaled_avg_2d; + } + } else { + if (sf->y_step_q4 == 16) { + // No scaling in the y direction. Must always scale in the x direction. + sf->predict[0][0][0] = vpx_scaled_horiz; + sf->predict[0][0][1] = vpx_scaled_avg_horiz; + sf->predict[0][1][0] = vpx_scaled_2d; + sf->predict[0][1][1] = vpx_scaled_avg_2d; + sf->predict[1][0][0] = vpx_scaled_horiz; + sf->predict[1][0][1] = vpx_scaled_avg_horiz; + } else { + // Must always scale in both directions. + sf->predict[0][0][0] = vpx_scaled_2d; + sf->predict[0][0][1] = vpx_scaled_avg_2d; + sf->predict[0][1][0] = vpx_scaled_2d; + sf->predict[0][1][1] = vpx_scaled_avg_2d; + sf->predict[1][0][0] = vpx_scaled_2d; + sf->predict[1][0][1] = vpx_scaled_avg_2d; + } + } + + // 2D subpel motion always gets filtered in both directions + + if ((sf->x_step_q4 != 16) || (sf->y_step_q4 != 16)) { + sf->predict[1][1][0] = vpx_scaled_2d; + sf->predict[1][1][1] = vpx_scaled_avg_2d; + } else { + sf->predict[1][1][0] = vpx_convolve8; + sf->predict[1][1][1] = vpx_convolve8_avg; + } + +#if CONFIG_VP9_HIGHBITDEPTH + if (use_highbd) { + if (sf->x_step_q4 == 16) { + if (sf->y_step_q4 == 16) { + // No scaling in either direction. + sf->highbd_predict[0][0][0] = vpx_highbd_convolve_copy; + sf->highbd_predict[0][0][1] = vpx_highbd_convolve_avg; + sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert; + sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert; + sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz; + sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz; + } else { + // No scaling in x direction. Must always scale in the y direction. + sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_vert; + sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_vert; + sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert; + sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert; + sf->highbd_predict[1][0][0] = vpx_highbd_convolve8; + sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg; + } + } else { + if (sf->y_step_q4 == 16) { + // No scaling in the y direction. Must always scale in the x direction. + sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_horiz; + sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_horiz; + sf->highbd_predict[0][1][0] = vpx_highbd_convolve8; + sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg; + sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz; + sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz; + } else { + // Must always scale in both directions. + sf->highbd_predict[0][0][0] = vpx_highbd_convolve8; + sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg; + sf->highbd_predict[0][1][0] = vpx_highbd_convolve8; + sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg; + sf->highbd_predict[1][0][0] = vpx_highbd_convolve8; + sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg; + } + } + // 2D subpel motion always gets filtered in both directions. + sf->highbd_predict[1][1][0] = vpx_highbd_convolve8; + sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg; + } +#endif +} diff --git a/thirdparty/libvpx/vp9/common/vp9_scale.h b/thirdparty/libvpx/vp9/common/vp9_scale.h new file mode 100644 index 00000000000..5e910410797 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_scale.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_SCALE_H_ +#define VP9_COMMON_VP9_SCALE_H_ + +#include "vp9/common/vp9_mv.h" +#include "vpx_dsp/vpx_convolve.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define REF_SCALE_SHIFT 14 +#define REF_NO_SCALE (1 << REF_SCALE_SHIFT) +#define REF_INVALID_SCALE -1 + +struct scale_factors { + int x_scale_fp; // horizontal fixed point scale factor + int y_scale_fp; // vertical fixed point scale factor + int x_step_q4; + int y_step_q4; + + int (*scale_value_x)(int val, const struct scale_factors *sf); + int (*scale_value_y)(int val, const struct scale_factors *sf); + + convolve_fn_t predict[2][2][2]; // horiz, vert, avg +#if CONFIG_VP9_HIGHBITDEPTH + highbd_convolve_fn_t highbd_predict[2][2][2]; // horiz, vert, avg +#endif +}; + +MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf); + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, + int other_w, int other_h, + int this_w, int this_h, + int use_high); +#else +void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, + int other_w, int other_h, + int this_w, int this_h); +#endif + +static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) { + return sf->x_scale_fp != REF_INVALID_SCALE && + sf->y_scale_fp != REF_INVALID_SCALE; +} + +static INLINE int vp9_is_scaled(const struct scale_factors *sf) { + return vp9_is_valid_scale(sf) && + (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE); +} + +static INLINE int valid_ref_frame_size(int ref_width, int ref_height, + int this_width, int this_height) { + return 2 * this_width >= ref_width && + 2 * this_height >= ref_height && + this_width <= 16 * ref_width && + this_height <= 16 * ref_height; +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_SCALE_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_scan.c b/thirdparty/libvpx/vp9/common/vp9_scan.c new file mode 100644 index 00000000000..8b8b09f4a33 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_scan.c @@ -0,0 +1,725 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vp9/common/vp9_scan.h" + +DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = { + 0, 4, 1, 5, + 8, 2, 12, 9, + 3, 6, 13, 10, + 7, 14, 11, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4[16]) = { + 0, 4, 8, 1, + 12, 5, 9, 2, + 13, 6, 10, 3, + 7, 14, 11, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4[16]) = { + 0, 1, 4, 2, + 5, 3, 6, 8, + 9, 7, 12, 10, + 13, 11, 14, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = { + 0, 8, 1, 16, 9, 2, 17, 24, + 10, 3, 18, 25, 32, 11, 4, 26, + 33, 19, 40, 12, 34, 27, 5, 41, + 20, 48, 13, 35, 42, 28, 21, 6, + 49, 56, 36, 43, 29, 7, 14, 50, + 57, 44, 22, 37, 15, 51, 58, 30, + 45, 23, 52, 59, 38, 31, 60, 53, + 46, 39, 61, 54, 47, 62, 55, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8[64]) = { + 0, 8, 16, 1, 24, 9, 32, 17, + 2, 40, 25, 10, 33, 18, 48, 3, + 26, 41, 11, 56, 19, 34, 4, 49, + 27, 42, 12, 35, 20, 57, 50, 28, + 5, 43, 13, 36, 58, 51, 21, 44, + 6, 29, 59, 37, 14, 52, 22, 7, + 45, 60, 30, 15, 38, 53, 23, 46, + 31, 61, 39, 54, 47, 62, 55, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8[64]) = { + 0, 1, 2, 8, 9, 3, 16, 10, + 4, 17, 11, 24, 5, 18, 25, 12, + 19, 26, 32, 6, 13, 20, 33, 27, + 7, 34, 40, 21, 28, 41, 14, 35, + 48, 42, 29, 36, 49, 22, 43, 15, + 56, 37, 50, 44, 30, 57, 23, 51, + 58, 45, 38, 52, 31, 59, 53, 46, + 60, 39, 61, 47, 54, 55, 62, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = { + 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80, + 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52, + 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69, + 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146, + 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25, + 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119, + 26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194, + 180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59, + 12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13, + 226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, + 242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, + 77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, + 230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, + 63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, + 219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, + 251, + 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, + 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16[256]) = { + 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81, + 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4, + 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21, + 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85, + 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179, + 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24, + 87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227, + 88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167, + 213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229, + 74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, + 200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, + 60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, + 233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, + 62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, + 126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, + 236, + 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, + 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16[256]) = { + 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20, + 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52, + 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69, + 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100, + 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102, + 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160, + 89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176, + 75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136, + 165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166, + 167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, + 197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, + 124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, + 156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, + 157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, + 158, + 188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, + 175, + 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, + 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = { + 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, + 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, + 68, 131, 37, 100, + 225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, + 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, + 102, 352, 8, 197, + 71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, + 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, + 41, 417, 199, 136, + 262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, + 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, + 295, 420, 106, 451, + 481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, + 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, + 453, 139, 44, 234, + 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108, + 546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577, + 486, 77, 204, 362, + 608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, + 610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, + 111, 238, 48, 143, + 80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51, + 83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424, + 393, 300, 269, 176, 145, + 52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301, + 270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581, + 550, 519, 488, 457, 426, 395, + 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737, + 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241, + 210, 179, 117, 86, 55, 738, 707, + 614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491, + 367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676, + 645, 552, 521, 428, 397, 304, + 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553, + 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26, + 864, 833, 802, 771, 740, 709, + 678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306, + 275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741, + 710, 679, 617, 586, 555, 493, + 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, + 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, + 743, 619, 495, 371, 247, 123, + 896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680, + 649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929, + 898, 836, 805, 774, 712, 681, + 650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, + 92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, + 651, 620, 589, 558, 527, + 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, + 93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590, + 559, 497, 466, 435, 373, + 342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, + 622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, + 499, 375, 251, 127, + 900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560, + 529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716, + 685, 654, 592, 561, + 530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, + 872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, + 438, 407, 376, 345, + 314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718, + 687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998, + 967, 874, 843, 750, + 719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, + 379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, + 564, 533, 440, 409, + 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534, + 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783, + 752, 721, 690, 659, + 628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970, + 939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, + 350, 319, 1002, 971, + 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631, + 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568, + 537, 444, 413, 972, + 941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414, + 1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601, + 570, 539, 508, 477, + 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571, + 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479, + 1007, 883, 759, 635, 511, + 912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945, + 914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915, + 884, 853, 822, 791, + 760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, + 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607, + 1011, 887, 763, 639, + 916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825, + 794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733, + 702, 671, 1013, 982, + 951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, + 891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, + 1016, 985, 954, 923, + 892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863, + 1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021, + 990, 959, 1022, 991, 1023, +}; + +// Neighborhood 2-tuples for various scans and blocksizes, +// in {top, left} order for each position in corresponding scan order. +DECLARE_ALIGNED(16, static const int16_t, + default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9, + 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 4, 4, 0, 0, 8, 8, 1, 1, 5, 5, 1, 1, 9, 9, 2, 2, 6, 6, 2, 2, 3, + 3, 10, 10, 7, 7, 11, 11, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 0, 0, 1, 1, 4, 4, 2, 2, 5, 5, 4, 4, 8, 8, 6, 6, 8, 8, 9, 9, 12, + 12, 10, 10, 13, 13, 14, 14, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 8, 8, 0, 0, 16, 16, 1, 1, 24, 24, 9, 9, 1, 1, 32, 32, 17, 17, 2, + 2, 25, 25, 10, 10, 40, 40, 2, 2, 18, 18, 33, 33, 3, 3, 48, 48, 11, 11, 26, + 26, 3, 3, 41, 41, 19, 19, 34, 34, 4, 4, 27, 27, 12, 12, 49, 49, 42, 42, 20, + 20, 4, 4, 35, 35, 5, 5, 28, 28, 50, 50, 43, 43, 13, 13, 36, 36, 5, 5, 21, 21, + 51, 51, 29, 29, 6, 6, 44, 44, 14, 14, 6, 6, 37, 37, 52, 52, 22, 22, 7, 7, 30, + 30, 45, 45, 15, 15, 38, 38, 23, 23, 53, 53, 31, 31, 46, 46, 39, 39, 54, 54, + 47, 47, 55, 55, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 1, 1, 0, 0, 8, 8, 2, 2, 8, 8, 9, 9, 3, 3, 16, 16, 10, 10, 16, 16, + 4, 4, 17, 17, 24, 24, 11, 11, 18, 18, 25, 25, 24, 24, 5, 5, 12, 12, 19, 19, + 32, 32, 26, 26, 6, 6, 33, 33, 32, 32, 20, 20, 27, 27, 40, 40, 13, 13, 34, 34, + 40, 40, 41, 41, 28, 28, 35, 35, 48, 48, 21, 21, 42, 42, 14, 14, 48, 48, 36, + 36, 49, 49, 43, 43, 29, 29, 56, 56, 22, 22, 50, 50, 57, 57, 44, 44, 37, 37, + 51, 51, 30, 30, 58, 58, 52, 52, 45, 45, 59, 59, 38, 38, 60, 60, 46, 46, 53, + 53, 54, 54, 61, 61, 62, 62, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 0, 0, 8, 8, 1, 8, 1, 1, 9, 16, 16, 16, 2, 9, 2, 2, 10, 17, 17, + 24, 24, 24, 3, 10, 3, 3, 18, 25, 25, 32, 11, 18, 32, 32, 4, 11, 26, 33, 19, + 26, 4, 4, 33, 40, 12, 19, 40, 40, 5, 12, 27, 34, 34, 41, 20, 27, 13, 20, 5, + 5, 41, 48, 48, 48, 28, 35, 35, 42, 21, 28, 6, 6, 6, 13, 42, 49, 49, 56, 36, + 43, 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22, 29, 37, 44, 15, 22, 44, 51, + 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45, 31, 38, 53, 60, 46, 53, 39, + 46, 54, 61, 47, 54, 55, 62, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 16, 16, 32, 32, 0, 0, 48, 48, 1, 1, 64, 64, + 17, 17, 80, 80, 33, 33, 1, 1, 49, 49, 96, 96, 2, 2, 65, 65, + 18, 18, 112, 112, 34, 34, 81, 81, 2, 2, 50, 50, 128, 128, 3, 3, + 97, 97, 19, 19, 66, 66, 144, 144, 82, 82, 35, 35, 113, 113, 3, 3, + 51, 51, 160, 160, 4, 4, 98, 98, 129, 129, 67, 67, 20, 20, 83, 83, + 114, 114, 36, 36, 176, 176, 4, 4, 145, 145, 52, 52, 99, 99, 5, 5, + 130, 130, 68, 68, 192, 192, 161, 161, 21, 21, 115, 115, 84, 84, 37, 37, + 146, 146, 208, 208, 53, 53, 5, 5, 100, 100, 177, 177, 131, 131, 69, 69, + 6, 6, 224, 224, 116, 116, 22, 22, 162, 162, 85, 85, 147, 147, 38, 38, + 193, 193, 101, 101, 54, 54, 6, 6, 132, 132, 178, 178, 70, 70, 163, 163, + 209, 209, 7, 7, 117, 117, 23, 23, 148, 148, 7, 7, 86, 86, 194, 194, + 225, 225, 39, 39, 179, 179, 102, 102, 133, 133, 55, 55, 164, 164, 8, 8, + 71, 71, 210, 210, 118, 118, 149, 149, 195, 195, 24, 24, 87, 87, 40, 40, + 56, 56, 134, 134, 180, 180, 226, 226, 103, 103, 8, 8, 165, 165, 211, 211, + 72, 72, 150, 150, 9, 9, 119, 119, 25, 25, 88, 88, 196, 196, 41, 41, + 135, 135, 181, 181, 104, 104, 57, 57, 227, 227, 166, 166, 120, 120, 151, 151, + 197, 197, 73, 73, 9, 9, 212, 212, 89, 89, 136, 136, 182, 182, 10, 10, + 26, 26, 105, 105, 167, 167, 228, 228, 152, 152, 42, 42, 121, 121, 213, 213, + 58, 58, 198, 198, 74, 74, 137, 137, 183, 183, 168, 168, 10, 10, 90, 90, + 229, 229, 11, 11, 106, 106, 214, 214, 153, 153, 27, 27, 199, 199, 43, 43, + 184, 184, 122, 122, 169, 169, 230, 230, 59, 59, 11, 11, 75, 75, 138, 138, + 200, 200, 215, 215, 91, 91, 12, 12, 28, 28, 185, 185, 107, 107, 154, 154, + 44, 44, 231, 231, 216, 216, 60, 60, 123, 123, 12, 12, 76, 76, 201, 201, + 170, 170, 232, 232, 139, 139, 92, 92, 13, 13, 108, 108, 29, 29, 186, 186, + 217, 217, 155, 155, 45, 45, 13, 13, 61, 61, 124, 124, 14, 14, 233, 233, + 77, 77, 14, 14, 171, 171, 140, 140, 202, 202, 30, 30, 93, 93, 109, 109, + 46, 46, 156, 156, 62, 62, 187, 187, 15, 15, 125, 125, 218, 218, 78, 78, + 31, 31, 172, 172, 47, 47, 141, 141, 94, 94, 234, 234, 203, 203, 63, 63, + 110, 110, 188, 188, 157, 157, 126, 126, 79, 79, 173, 173, 95, 95, 219, 219, + 142, 142, 204, 204, 235, 235, 111, 111, 158, 158, 127, 127, 189, 189, 220, + 220, 143, 143, 174, 174, 205, 205, 236, 236, 159, 159, 190, 190, 221, 221, + 175, 175, 237, 237, 206, 206, 222, 222, 191, 191, 238, 238, 207, 207, 223, + 223, 239, 239, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 16, 16, 3, 3, 17, 17, + 16, 16, 4, 4, 32, 32, 18, 18, 5, 5, 33, 33, 32, 32, 19, 19, + 48, 48, 6, 6, 34, 34, 20, 20, 49, 49, 48, 48, 7, 7, 35, 35, + 64, 64, 21, 21, 50, 50, 36, 36, 64, 64, 8, 8, 65, 65, 51, 51, + 22, 22, 37, 37, 80, 80, 66, 66, 9, 9, 52, 52, 23, 23, 81, 81, + 67, 67, 80, 80, 38, 38, 10, 10, 53, 53, 82, 82, 96, 96, 68, 68, + 24, 24, 97, 97, 83, 83, 39, 39, 96, 96, 54, 54, 11, 11, 69, 69, + 98, 98, 112, 112, 84, 84, 25, 25, 40, 40, 55, 55, 113, 113, 99, 99, + 12, 12, 70, 70, 112, 112, 85, 85, 26, 26, 114, 114, 100, 100, 128, 128, + 41, 41, 56, 56, 71, 71, 115, 115, 13, 13, 86, 86, 129, 129, 101, 101, + 128, 128, 72, 72, 130, 130, 116, 116, 27, 27, 57, 57, 14, 14, 87, 87, + 42, 42, 144, 144, 102, 102, 131, 131, 145, 145, 117, 117, 73, 73, 144, 144, + 88, 88, 132, 132, 103, 103, 28, 28, 58, 58, 146, 146, 118, 118, 43, 43, + 160, 160, 147, 147, 89, 89, 104, 104, 133, 133, 161, 161, 119, 119, 160, 160, + 74, 74, 134, 134, 148, 148, 29, 29, 59, 59, 162, 162, 176, 176, 44, 44, + 120, 120, 90, 90, 105, 105, 163, 163, 177, 177, 149, 149, 176, 176, 135, 135, + 164, 164, 178, 178, 30, 30, 150, 150, 192, 192, 75, 75, 121, 121, 60, 60, + 136, 136, 193, 193, 106, 106, 151, 151, 179, 179, 192, 192, 45, 45, 165, 165, + 166, 166, 194, 194, 91, 91, 180, 180, 137, 137, 208, 208, 122, 122, 152, 152, + 208, 208, 195, 195, 76, 76, 167, 167, 209, 209, 181, 181, 224, 224, 107, 107, + 196, 196, 61, 61, 153, 153, 224, 224, 182, 182, 168, 168, 210, 210, 46, 46, + 138, 138, 92, 92, 183, 183, 225, 225, 211, 211, 240, 240, 197, 197, 169, 169, + 123, 123, 154, 154, 198, 198, 77, 77, 212, 212, 184, 184, 108, 108, 226, 226, + 199, 199, 62, 62, 227, 227, 241, 241, 139, 139, 213, 213, 170, 170, 185, 185, + 155, 155, 228, 228, 242, 242, 124, 124, 93, 93, 200, 200, 243, 243, 214, 214, + 215, 215, 229, 229, 140, 140, 186, 186, 201, 201, 78, 78, 171, 171, 109, 109, + 156, 156, 244, 244, 216, 216, 230, 230, 94, 94, 245, 245, 231, 231, 125, 125, + 202, 202, 246, 246, 232, 232, 172, 172, 217, 217, 141, 141, 110, 110, 157, + 157, 187, 187, 247, 247, 126, 126, 233, 233, 218, 218, 248, 248, 188, 188, + 203, 203, 142, 142, 173, 173, 158, 158, 249, 249, 234, 234, 204, 204, 219, + 219, 174, 174, 189, 189, 250, 250, 220, 220, 190, 190, 205, 205, 235, 235, + 206, 206, 236, 236, 251, 251, 221, 221, 252, 252, 222, 222, 237, 237, 238, + 238, 253, 253, 254, 254, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 0, 0, 16, 16, 1, 16, 1, 1, 32, 32, 17, 32, + 2, 17, 2, 2, 48, 48, 18, 33, 33, 48, 3, 18, 49, 64, 64, 64, + 34, 49, 3, 3, 19, 34, 50, 65, 4, 19, 65, 80, 80, 80, 35, 50, + 4, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 96, 5, 20, 36, 51, + 82, 97, 21, 36, 67, 82, 97, 112, 5, 5, 52, 67, 112, 112, 37, 52, + 6, 21, 83, 98, 98, 113, 68, 83, 6, 6, 113, 128, 22, 37, 53, 68, + 84, 99, 99, 114, 128, 128, 114, 129, 69, 84, 38, 53, 7, 22, 7, 7, + 129, 144, 23, 38, 54, 69, 100, 115, 85, 100, 115, 130, 144, 144, 130, 145, + 39, 54, 70, 85, 8, 23, 55, 70, 116, 131, 101, 116, 145, 160, 24, 39, + 8, 8, 86, 101, 131, 146, 160, 160, 146, 161, 71, 86, 40, 55, 9, 24, + 117, 132, 102, 117, 161, 176, 132, 147, 56, 71, 87, 102, 25, 40, 147, 162, + 9, 9, 176, 176, 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, + 10, 25, 148, 163, 57, 72, 88, 103, 177, 192, 26, 41, 163, 178, 192, 192, + 10, 10, 119, 134, 73, 88, 149, 164, 104, 119, 134, 149, 42, 57, 178, 193, + 164, 179, 11, 26, 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42, + 74, 89, 208, 208, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43, 58, + 11, 11, 136, 151, 90, 105, 151, 166, 180, 195, 59, 74, 121, 136, 209, 224, + 195, 210, 224, 224, 166, 181, 106, 121, 75, 90, 12, 27, 181, 196, 12, 12, + 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211, 122, 137, 91, 106, + 225, 240, 44, 59, 13, 28, 107, 122, 182, 197, 168, 183, 211, 226, 153, 168, + 226, 241, 60, 75, 197, 212, 138, 153, 29, 44, 76, 91, 13, 13, 183, 198, + 123, 138, 45, 60, 212, 227, 198, 213, 154, 169, 169, 184, 227, 242, 92, 107, + 61, 76, 139, 154, 14, 29, 14, 14, 184, 199, 213, 228, 108, 123, 199, 214, + 228, 243, 77, 92, 30, 45, 170, 185, 155, 170, 185, 200, 93, 108, 124, 139, + 214, 229, 46, 61, 200, 215, 229, 244, 15, 30, 109, 124, 62, 77, 140, 155, + 215, 230, 31, 46, 171, 186, 186, 201, 201, 216, 78, 93, 230, 245, 125, 140, + 47, 62, 216, 231, 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, + 187, 202, 110, 125, 217, 232, 172, 187, 232, 247, 79, 94, 157, 172, 126, 141, + 203, 218, 95, 110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, + 234, 249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, + 250, 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, + 236, 251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, + 238, 239, 254, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 0, 0, 32, 32, 1, 32, 1, 1, 64, 64, 33, 64, + 2, 33, 96, 96, 2, 2, 65, 96, 34, 65, 128, 128, 97, 128, 3, 34, + 66, 97, 3, 3, 35, 66, 98, 129, 129, 160, 160, 160, 4, 35, 67, 98, + 192, 192, 4, 4, 130, 161, 161, 192, 36, 67, 99, 130, 5, 36, 68, 99, + 193, 224, 162, 193, 224, 224, 131, 162, 37, 68, 100, 131, 5, 5, 194, 225, + 225, 256, 256, 256, 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 6, 6, + 195, 226, 257, 288, 101, 132, 288, 288, 38, 69, 164, 195, 133, 164, 258, 289, + 227, 258, 196, 227, 7, 38, 289, 320, 70, 101, 320, 320, 7, 7, 165, 196, + 39, 70, 102, 133, 290, 321, 259, 290, 228, 259, 321, 352, 352, 352, 197, 228, + 134, 165, 71, 102, 8, 39, 322, 353, 291, 322, 260, 291, 103, 134, 353, 384, + 166, 197, 229, 260, 40, 71, 8, 8, 384, 384, 135, 166, 354, 385, 323, 354, + 198, 229, 292, 323, 72, 103, 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, + 230, 261, 355, 386, 416, 416, 293, 324, 324, 355, 9, 9, 41, 72, 386, 417, + 199, 230, 136, 167, 417, 448, 262, 293, 356, 387, 73, 104, 387, 418, 231, 262, + 10, 41, 168, 199, 325, 356, 418, 449, 105, 136, 448, 448, 42, 73, 294, 325, + 200, 231, 10, 10, 357, 388, 137, 168, 263, 294, 388, 419, 74, 105, 419, 450, + 449, 480, 326, 357, 232, 263, 295, 326, 169, 200, 11, 42, 106, 137, 480, 480, + 450, 481, 358, 389, 264, 295, 201, 232, 138, 169, 389, 420, 43, 74, 420, 451, + 327, 358, 11, 11, 481, 512, 233, 264, 451, 482, 296, 327, 75, 106, 170, 201, + 482, 513, 512, 512, 390, 421, 359, 390, 421, 452, 107, 138, 12, 43, 202, 233, + 452, 483, 265, 296, 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265, + 297, 328, 422, 453, 12, 12, 391, 422, 171, 202, 76, 107, 514, 545, 453, 484, + 544, 544, 266, 297, 203, 234, 108, 139, 329, 360, 298, 329, 140, 171, 515, + 546, 13, 44, 423, 454, 235, 266, 545, 576, 454, 485, 45, 76, 172, 203, 330, + 361, 576, 576, 13, 13, 267, 298, 546, 577, 77, 108, 204, 235, 455, 486, 577, + 608, 299, 330, 109, 140, 547, 578, 14, 45, 14, 14, 141, 172, 578, 609, 331, + 362, 46, 77, 173, 204, 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, + 142, 173, 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, 16, 47, 111, 142, + 48, 79, 143, 174, 80, 111, 175, 206, 17, 48, 17, 17, 207, 238, 49, 80, + 81, 112, 18, 18, 18, 49, 50, 81, 82, 113, 19, 50, 51, 82, 83, 114, 608, 608, + 484, 515, 360, 391, 236, 267, 112, 143, 19, 19, 640, 640, 609, 640, 516, 547, + 485, 516, 392, 423, 361, 392, 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, + 20, 20, 672, 672, 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, 424, 455, + 393, 424, 362, 393, 300, 331, 269, 300, 238, 269, 176, 207, 145, 176, 114, + 145, 52, 83, 21, 52, 21, 21, 704, 704, 673, 704, 642, 673, 611, 642, 580, + 611, 549, 580, 518, 549, 487, 518, 456, 487, 425, 456, 394, 425, 363, 394, + 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208, 146, 177, 115, + 146, 84, 115, 53, 84, 22, 53, 22, 22, 705, 736, 674, 705, 643, 674, 581, 612, + 550, 581, 519, 550, 457, 488, 426, 457, 395, 426, 333, 364, 302, 333, 271, + 302, 209, 240, 178, 209, 147, 178, 85, 116, 54, 85, 23, 54, 706, 737, 675, + 706, 582, 613, 551, 582, 458, 489, 427, 458, 334, 365, 303, 334, 210, 241, + 179, 210, 86, 117, 55, 86, 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, + 87, 118, 736, 736, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, 23, 23, + 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, 489, 520, 396, 427, 365, + 396, 272, 303, 241, 272, 148, 179, 117, 148, 24, 55, 24, 24, 800, 800, 769, + 800, 738, 769, 676, 707, 645, 676, 614, 645, 552, 583, 521, 552, 490, 521, + 428, 459, 397, 428, 366, 397, 304, 335, 273, 304, 242, 273, 180, 211, 149, + 180, 118, 149, 56, 87, 25, 56, 25, 25, 832, 832, 801, 832, 770, 801, 739, + 770, 708, 739, 677, 708, 646, 677, 615, 646, 584, 615, 553, 584, 522, 553, + 491, 522, 460, 491, 429, 460, 398, 429, 367, 398, 336, 367, 305, 336, 274, + 305, 243, 274, 212, 243, 181, 212, 150, 181, 119, 150, 88, 119, 57, 88, 26, + 57, 26, 26, 833, 864, 802, 833, 771, 802, 709, 740, 678, 709, 647, 678, 585, + 616, 554, 585, 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337, + 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, 27, 58, 834, 865, + 803, 834, 710, 741, 679, 710, 586, 617, 555, 586, 462, 493, 431, 462, 338, + 369, 307, 338, 214, 245, 183, 214, 90, 121, 59, 90, 835, 866, 711, 742, 587, + 618, 463, 494, 339, 370, 215, 246, 91, 122, 864, 864, 740, 771, 616, 647, + 492, 523, 368, 399, 244, 275, 120, 151, 27, 27, 896, 896, 865, 896, 772, 803, + 741, 772, 648, 679, 617, 648, 524, 555, 493, 524, 400, 431, 369, 400, 276, + 307, 245, 276, 152, 183, 121, 152, 28, 59, 28, 28, 928, 928, 897, 928, 866, + 897, 804, 835, 773, 804, 742, 773, 680, 711, 649, 680, 618, 649, 556, 587, + 525, 556, 494, 525, 432, 463, 401, 432, 370, 401, 308, 339, 277, 308, 246, + 277, 184, 215, 153, 184, 122, 153, 60, 91, 29, 60, 29, 29, 960, 960, 929, + 960, 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774, 712, 743, + 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, 526, 557, 495, 526, 464, + 495, 433, 464, 402, 433, 371, 402, 340, 371, 309, 340, 278, 309, 247, 278, + 216, 247, 185, 216, 154, 185, 123, 154, 92, 123, 61, 92, 30, 61, 30, 30, + 961, 992, 930, 961, 899, 930, 837, 868, 806, 837, 775, 806, 713, 744, 682, + 713, 651, 682, 589, 620, 558, 589, 527, 558, 465, 496, 434, 465, 403, 434, + 341, 372, 310, 341, 279, 310, 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, + 31, 62, 962, 993, 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, 590, 621, + 559, 590, 466, 497, 435, 466, 342, 373, 311, 342, 218, 249, 187, 218, 94, + 125, 63, 94, 963, 994, 839, 870, 715, 746, 591, 622, 467, 498, 343, 374, 219, + 250, 95, 126, 868, 899, 744, 775, 620, 651, 496, 527, 372, 403, 248, 279, + 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683, 621, 652, 528, + 559, 497, 528, 404, 435, 373, 404, 280, 311, 249, 280, 156, 187, 125, 156, + 932, 963, 901, 932, 870, 901, 808, 839, 777, 808, 746, 777, 684, 715, 653, + 684, 622, 653, 560, 591, 529, 560, 498, 529, 436, 467, 405, 436, 374, 405, + 312, 343, 281, 312, 250, 281, 188, 219, 157, 188, 126, 157, 964, 995, 933, + 964, 902, 933, 871, 902, 840, 871, 809, 840, 778, 809, 747, 778, 716, 747, + 685, 716, 654, 685, 623, 654, 592, 623, 561, 592, 530, 561, 499, 530, 468, + 499, 437, 468, 406, 437, 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, + 220, 251, 189, 220, 158, 189, 127, 158, 965, 996, 934, 965, 903, 934, 841, + 872, 810, 841, 779, 810, 717, 748, 686, 717, 655, 686, 593, 624, 562, 593, + 531, 562, 469, 500, 438, 469, 407, 438, 345, 376, 314, 345, 283, 314, 221, + 252, 190, 221, 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749, + 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, 315, 346, 222, + 253, 191, 222, 967, 998, 843, 874, 719, 750, 595, 626, 471, 502, 347, 378, + 223, 254, 872, 903, 748, 779, 624, 655, 500, 531, 376, 407, 252, 283, 904, + 935, 873, 904, 780, 811, 749, 780, 656, 687, 625, 656, 532, 563, 501, 532, + 408, 439, 377, 408, 284, 315, 253, 284, 936, 967, 905, 936, 874, 905, 812, + 843, 781, 812, 750, 781, 688, 719, 657, 688, 626, 657, 564, 595, 533, 564, + 502, 533, 440, 471, 409, 440, 378, 409, 316, 347, 285, 316, 254, 285, 968, + 999, 937, 968, 906, 937, 875, 906, 844, 875, 813, 844, 782, 813, 751, 782, + 720, 751, 689, 720, 658, 689, 627, 658, 596, 627, 565, 596, 534, 565, 503, + 534, 472, 503, 441, 472, 410, 441, 379, 410, 348, 379, 317, 348, 286, 317, + 255, 286, 969, 1000, 938, 969, 907, 938, 845, 876, 814, 845, 783, 814, 721, + 752, 690, 721, 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, + 411, 442, 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, 846, 877, 815, + 846, 722, 753, 691, 722, 598, 629, 567, 598, 474, 505, 443, 474, 350, 381, + 319, 350, 971, 1002, 847, 878, 723, 754, 599, 630, 475, 506, 351, 382, 876, + 907, 752, 783, 628, 659, 504, 535, 380, 411, 908, 939, 877, 908, 784, 815, + 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443, 381, 412, 940, + 971, 909, 940, 878, 909, 816, 847, 785, 816, 754, 785, 692, 723, 661, 692, + 630, 661, 568, 599, 537, 568, 506, 537, 444, 475, 413, 444, 382, 413, 972, + 1003, 941, 972, 910, 941, 879, 910, 848, 879, 817, 848, 786, 817, 755, 786, + 724, 755, 693, 724, 662, 693, 631, 662, 600, 631, 569, 600, 538, 569, 507, + 538, 476, 507, 445, 476, 414, 445, 383, 414, 973, 1004, 942, 973, 911, 942, + 849, 880, 818, 849, 787, 818, 725, 756, 694, 725, 663, 694, 601, 632, 570, + 601, 539, 570, 477, 508, 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, + 819, 850, 726, 757, 695, 726, 602, 633, 571, 602, 478, 509, 447, 478, 975, + 1006, 851, 882, 727, 758, 603, 634, 479, 510, 880, 911, 756, 787, 632, 663, + 508, 539, 912, 943, 881, 912, 788, 819, 757, 788, 664, 695, 633, 664, 540, + 571, 509, 540, 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789, + 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, 976, 1007, 945, + 976, 914, 945, 883, 914, 852, 883, 821, 852, 790, 821, 759, 790, 728, 759, + 697, 728, 666, 697, 635, 666, 604, 635, 573, 604, 542, 573, 511, 542, 977, + 1008, 946, 977, 915, 946, 853, 884, 822, 853, 791, 822, 729, 760, 698, 729, + 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009, 947, 978, 854, 885, 823, + 854, 730, 761, 699, 730, 606, 637, 575, 606, 979, 1010, 855, 886, 731, 762, + 607, 638, 884, 915, 760, 791, 636, 667, 916, 947, 885, 916, 792, 823, 761, + 792, 668, 699, 637, 668, 948, 979, 917, 948, 886, 917, 824, 855, 793, 824, + 762, 793, 700, 731, 669, 700, 638, 669, 980, 1011, 949, 980, 918, 949, 887, + 918, 856, 887, 825, 856, 794, 825, 763, 794, 732, 763, 701, 732, 670, 701, + 639, 670, 981, 1012, 950, 981, 919, 950, 857, 888, 826, 857, 795, 826, 733, + 764, 702, 733, 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, + 703, 734, 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, 920, 951, 889, + 920, 796, 827, 765, 796, 952, 983, 921, 952, 890, 921, 828, 859, 797, 828, + 766, 797, 984, 1015, 953, 984, 922, 953, 891, 922, 860, 891, 829, 860, 798, + 829, 767, 798, 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861, 799, 830, + 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894, 892, 923, 924, + 955, 893, 924, 956, 987, 925, 956, 894, 925, 988, 1019, 957, 988, 926, 957, + 895, 926, 989, 1020, 958, 989, 927, 958, 990, 1021, 959, 990, 991, 1022, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_4x4[16]) = { + 0, 2, 5, 8, 1, 3, 9, 12, 4, 7, 11, 14, 6, 10, 13, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_4x4[16]) = { + 0, 3, 7, 11, 1, 5, 9, 12, 2, 6, 10, 14, 4, 8, 13, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_4x4[16]) = { + 0, 1, 3, 5, 2, 4, 6, 9, 7, 8, 11, 13, 10, 12, 14, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_8x8[64]) = { + 0, 3, 8, 15, 22, 32, 40, 47, 1, 5, 11, 18, 26, 34, 44, 51, + 2, 7, 13, 20, 28, 38, 46, 54, 4, 10, 16, 24, 31, 41, 50, 56, + 6, 12, 21, 27, 35, 43, 52, 58, 9, 17, 25, 33, 39, 48, 55, 60, + 14, 23, 30, 37, 45, 53, 59, 62, 19, 29, 36, 42, 49, 57, 61, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_8x8[64]) = { + 0, 1, 2, 5, 8, 12, 19, 24, 3, 4, 7, 10, 15, 20, 30, 39, + 6, 9, 13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52, + 18, 22, 25, 31, 35, 41, 50, 57, 26, 29, 33, 38, 43, 49, 55, 59, + 32, 36, 42, 47, 51, 54, 60, 61, 40, 45, 48, 53, 56, 58, 62, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_8x8[64]) = { + 0, 2, 5, 9, 14, 22, 31, 37, 1, 4, 8, 13, 19, 26, 38, 44, + 3, 6, 10, 17, 24, 30, 42, 49, 7, 11, 15, 21, 29, 36, 47, 53, + 12, 16, 20, 27, 34, 43, 52, 57, 18, 23, 28, 35, 41, 48, 56, 60, + 25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_16x16[256]) = { + 0, 4, 11, 20, 31, 43, 59, 75, 85, 109, 130, 150, 165, 181, 195, 198, + 1, 6, 14, 23, 34, 47, 64, 81, 95, 114, 135, 153, 171, 188, 201, 212, + 2, 8, 16, 25, 38, 52, 67, 83, 101, 116, 136, 157, 172, 190, 205, 216, + 3, 10, 18, 29, 41, 55, 71, 89, 103, 119, 141, 159, 176, 194, 208, 218, + 5, 12, 21, 32, 45, 58, 74, 93, 104, 123, 144, 164, 179, 196, 210, 223, + 7, 15, 26, 37, 49, 63, 78, 96, 112, 129, 146, 166, 182, 200, 215, 228, + 9, 19, 28, 39, 54, 69, 86, 102, 117, 132, 151, 170, 187, 206, 220, 230, + 13, 24, 35, 46, 60, 73, 91, 108, 122, 137, 154, 174, 189, 207, 224, 235, + 17, 30, 40, 53, 66, 82, 98, 115, 126, 142, 161, 180, 197, 213, 227, 237, + 22, 36, 48, 62, 76, 92, 105, 120, 133, 147, 167, 186, 203, 219, 232, 240, + 27, 44, 56, 70, 84, 99, 113, 127, 140, 156, 175, 193, 209, 226, 236, 244, + 33, 51, 68, 79, 94, 110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247, + 42, 61, 77, 90, 106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251, + 50, 72, 87, 100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253, + 57, 80, 97, 111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254, + 65, 88, 107, 124, 139, 152, 163, 177, 185, 199, 221, 234, 243, 248, 252, 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_16x16[256]) = { + 0, 1, 2, 4, 6, 9, 12, 17, 22, 29, 36, 43, 54, 64, 76, 86, + 3, 5, 7, 11, 15, 19, 25, 32, 38, 48, 59, 68, 84, 99, 115, 130, + 8, 10, 13, 18, 23, 27, 33, 42, 51, 60, 72, 88, 103, 119, 142, 167, + 14, 16, 20, 26, 31, 37, 44, 53, 61, 73, 85, 100, 116, 135, 161, 185, + 21, 24, 30, 35, 40, 47, 55, 65, 74, 81, 94, 112, 133, 154, 179, 205, + 28, 34, 39, 45, 50, 58, 67, 77, 87, 96, 106, 121, 146, 169, 196, 212, + 41, 46, 49, 56, 63, 70, 79, 90, 98, 107, 122, 138, 159, 182, 207, 222, + 52, 57, 62, 69, 75, 83, 93, 102, 110, 120, 134, 150, 176, 195, 215, 226, + 66, 71, 78, 82, 91, 97, 108, 113, 127, 136, 148, 168, 188, 202, 221, 232, + 80, 89, 92, 101, 105, 114, 125, 131, 139, 151, 162, 177, 192, 208, 223, 234, + 95, 104, 109, 117, 123, 128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, + 111, 118, 124, 129, 140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, + 243, 126, 132, 137, 145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, + 244, 246, 141, 149, 156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, + 242, 249, 251, 152, 163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, + 245, 247, 252, 253, 158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, + 241, 248, 250, 254, 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_16x16[256]) = { + 0, 2, 5, 9, 17, 24, 36, 44, 55, 72, 88, 104, 128, 143, 166, 179, + 1, 4, 8, 13, 20, 30, 40, 54, 66, 79, 96, 113, 141, 154, 178, 196, + 3, 7, 11, 18, 25, 33, 46, 57, 71, 86, 101, 119, 148, 164, 186, 201, + 6, 12, 16, 23, 31, 39, 53, 64, 78, 92, 110, 127, 153, 169, 193, 208, + 10, 14, 19, 28, 37, 47, 58, 67, 84, 98, 114, 133, 161, 176, 198, 214, + 15, 21, 26, 34, 43, 52, 65, 77, 91, 106, 120, 140, 165, 185, 205, 221, + 22, 27, 32, 41, 48, 60, 73, 85, 99, 116, 130, 151, 175, 190, 211, 225, + 29, 35, 42, 49, 59, 69, 81, 95, 108, 125, 139, 155, 182, 197, 217, 229, + 38, 45, 51, 61, 68, 80, 93, 105, 118, 134, 150, 168, 191, 207, 223, 234, + 50, 56, 63, 74, 83, 94, 109, 117, 129, 147, 163, 177, 199, 213, 228, 238, + 62, 70, 76, 87, 97, 107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, + 75, 82, 90, 102, 112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, + 89, 100, 111, 123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, + 103, 115, 126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, + 252, 121, 135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, + 251, 254, 137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, + 249, 253, 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_32x32[1024]) = { + 0, 2, 5, 10, 17, 25, 38, 47, 62, 83, 101, 121, 145, 170, 193, 204, + 210, 219, 229, 233, 245, 257, 275, 299, 342, 356, 377, 405, 455, 471, 495, + 527, 1, 4, 8, 15, 22, 30, 45, 58, 74, 92, 112, 133, 158, 184, 203, 215, 222, + 228, 234, 237, 256, 274, 298, 317, 355, 376, 404, 426, 470, 494, 526, 551, + 3, 7, 12, 18, 28, 36, 52, 64, 82, 102, 118, 142, 164, 189, 208, 217, 224, + 231, 235, 238, 273, 297, 316, 329, 375, 403, 425, 440, 493, 525, 550, 567, + 6, 11, 16, 23, 31, 43, 60, 73, 90, 109, 126, 150, 173, 196, 211, 220, 226, + 232, 236, 239, 296, 315, 328, 335, 402, 424, 439, 447, 524, 549, 566, 575, + 9, 14, 19, 29, 37, 50, 65, 78, 95, 116, 134, 157, 179, 201, 214, 223, 244, + 255, 272, 295, 341, 354, 374, 401, 454, 469, 492, 523, 582, 596, 617, 645, + 13, 20, 26, 35, 44, 54, 72, 85, 105, 123, 140, 163, 182, 205, 216, 225, + 254, 271, 294, 314, 353, 373, 400, 423, 468, 491, 522, 548, 595, 616, 644, + 666, 21, 27, 33, 42, 53, 63, 80, 94, 113, 132, 151, 172, 190, 209, 218, 227, + 270, 293, 313, 327, 372, 399, 422, 438, 490, 521, 547, 565, 615, 643, 665, + 680, 24, 32, 39, 48, 57, 71, 88, 104, 120, 139, 159, 178, 197, 212, 221, 230, + 292, 312, 326, 334, 398, 421, 437, 446, 520, 546, 564, 574, 642, 664, 679, + 687, 34, 40, 46, 56, 68, 81, 96, 111, 130, 147, 167, 186, 243, 253, 269, 291, + 340, 352, 371, 397, 453, 467, 489, 519, 581, 594, 614, 641, 693, 705, 723, + 747, 41, 49, 55, 67, 77, 91, 107, 124, 138, 161, 177, 194, 252, 268, 290, + 311, 351, 370, 396, 420, 466, 488, 518, 545, 593, 613, 640, 663, 704, 722, + 746, 765, 51, 59, 66, 76, 89, 99, 119, 131, 149, 168, 181, 200, 267, 289, + 310, 325, 369, 395, 419, 436, 487, 517, 544, 563, 612, 639, 662, 678, 721, + 745, 764, 777, 61, 69, 75, 87, 100, 114, 129, 144, 162, 180, 191, 207, 288, + 309, 324, 333, 394, 418, 435, 445, 516, 543, 562, 573, 638, 661, 677, 686, + 744, 763, 776, 783, 70, 79, 86, 97, 108, 122, 137, 155, 242, 251, 266, 287, + 339, 350, 368, 393, 452, 465, 486, 515, 580, 592, 611, 637, 692, 703, 720, + 743, 788, 798, 813, 833, 84, 93, 103, 110, 125, 141, 154, 171, 250, 265, 286, + 308, 349, 367, 392, 417, 464, 485, 514, 542, 591, 610, 636, 660, 702, 719, + 742, 762, 797, 812, 832, 848, 98, 106, 115, 127, 143, 156, 169, 185, 264, + 285, 307, 323, 366, 391, 416, 434, 484, 513, 541, 561, 609, 635, 659, 676, + 718, 741, 761, 775, 811, 831, 847, 858, 117, 128, 136, 148, 160, 175, 188, + 198, 284, 306, 322, 332, 390, 415, 433, 444, 512, 540, 560, 572, 634, 658, + 675, 685, 740, 760, 774, 782, 830, 846, 857, 863, 135, 146, 152, 165, 241, + 249, 263, 283, 338, 348, 365, 389, 451, 463, 483, 511, 579, 590, 608, 633, + 691, 701, 717, 739, 787, 796, 810, 829, 867, 875, 887, 903, 153, 166, 174, + 183, 248, 262, 282, 305, 347, 364, 388, 414, 462, 482, 510, 539, 589, 607, + 632, 657, 700, 716, 738, 759, 795, 809, 828, 845, 874, 886, 902, 915, 176, + 187, 195, 202, 261, 281, 304, 321, 363, 387, 413, 432, 481, 509, 538, 559, + 606, 631, 656, 674, 715, 737, 758, 773, 808, 827, 844, 856, 885, 901, 914, + 923, 192, 199, 206, 213, 280, 303, 320, 331, 386, 412, 431, 443, 508, 537, + 558, 571, 630, 655, 673, 684, 736, 757, 772, 781, 826, 843, 855, 862, 900, + 913, 922, 927, 240, 247, 260, 279, 337, 346, 362, 385, 450, 461, 480, 507, + 578, 588, 605, 629, 690, 699, 714, 735, 786, 794, 807, 825, 866, 873, 884, + 899, 930, 936, 945, 957, 246, 259, 278, 302, 345, 361, 384, 411, 460, 479, + 506, 536, 587, 604, 628, 654, 698, 713, 734, 756, 793, 806, 824, 842, 872, + 883, 898, 912, 935, 944, 956, 966, 258, 277, 301, 319, 360, 383, 410, 430, + 478, 505, 535, 557, 603, 627, 653, 672, 712, 733, 755, 771, 805, 823, 841, + 854, 882, 897, 911, 921, 943, 955, 965, 972, 276, 300, 318, 330, 382, 409, + 429, 442, 504, 534, 556, 570, 626, 652, 671, 683, 732, 754, 770, 780, 822, + 840, 853, 861, 896, 910, 920, 926, 954, 964, 971, 975, 336, 344, 359, 381, + 449, 459, 477, 503, 577, 586, 602, 625, 689, 697, 711, 731, 785, 792, 804, + 821, 865, 871, 881, 895, 929, 934, 942, 953, 977, 981, 987, 995, 343, 358, + 380, 408, 458, 476, 502, 533, 585, 601, 624, 651, 696, 710, 730, 753, 791, + 803, 820, 839, 870, 880, 894, 909, 933, 941, 952, 963, 980, 986, 994, 1001, + 357, 379, 407, 428, 475, 501, 532, 555, 600, 623, 650, 670, 709, 729, 752, + 769, 802, 819, 838, 852, 879, 893, 908, 919, 940, 951, 962, 970, 985, 993, + 1000, 1005, 378, 406, 427, 441, 500, 531, 554, 569, 622, 649, 669, 682, 728, + 751, 768, 779, 818, 837, 851, 860, 892, 907, 918, 925, 950, 961, 969, 974, + 992, 999, 1004, 1007, 448, 457, 474, 499, 576, 584, 599, 621, 688, 695, 708, + 727, 784, 790, 801, 817, 864, 869, 878, 891, 928, 932, 939, 949, 976, 979, + 984, 991, 1008, 1010, 1013, 1017, 456, 473, 498, 530, 583, 598, 620, 648, + 694, 707, 726, 750, 789, 800, 816, 836, 868, 877, 890, 906, 931, 938, 948, + 960, 978, 983, 990, 998, 1009, 1012, 1016, 1020, 472, 497, 529, 553, 597, + 619, 647, 668, 706, 725, 749, 767, 799, 815, 835, 850, 876, 889, 905, 917, + 937, 947, 959, 968, 982, 989, 997, 1003, 1011, 1015, 1019, 1022, 496, 528, + 552, 568, 618, 646, 667, 681, 724, 748, 766, 778, 814, 834, 849, 859, 888, + 904, 916, 924, 946, 958, 967, 973, 988, 996, 1002, 1006, 1014, 1018, 1021, + 1023, +}; + +const scan_order vp9_default_scan_orders[TX_SIZES] = { + {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, +}; + +const scan_order vp9_scan_orders[TX_SIZES][TX_TYPES] = { + { // TX_4X4 + {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors}, + {row_scan_4x4, vp9_row_iscan_4x4, row_scan_4x4_neighbors}, + {col_scan_4x4, vp9_col_iscan_4x4, col_scan_4x4_neighbors}, + {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors} + }, { // TX_8X8 + {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors}, + {row_scan_8x8, vp9_row_iscan_8x8, row_scan_8x8_neighbors}, + {col_scan_8x8, vp9_col_iscan_8x8, col_scan_8x8_neighbors}, + {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors} + }, { // TX_16X16 + {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors}, + {row_scan_16x16, vp9_row_iscan_16x16, row_scan_16x16_neighbors}, + {col_scan_16x16, vp9_col_iscan_16x16, col_scan_16x16_neighbors}, + {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors} + }, { // TX_32X32 + {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, + } +}; diff --git a/thirdparty/libvpx/vp9/common/vp9_scan.h b/thirdparty/libvpx/vp9/common/vp9_scan.h new file mode 100644 index 00000000000..4c1ee8107c2 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_scan.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_SCAN_H_ +#define VP9_COMMON_VP9_SCAN_H_ + +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +#include "vp9/common/vp9_enums.h" +#include "vp9/common/vp9_blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_NEIGHBORS 2 + +typedef struct { + const int16_t *scan; + const int16_t *iscan; + const int16_t *neighbors; +} scan_order; + +extern const scan_order vp9_default_scan_orders[TX_SIZES]; +extern const scan_order vp9_scan_orders[TX_SIZES][TX_TYPES]; + +static INLINE int get_coef_context(const int16_t *neighbors, + const uint8_t *token_cache, int c) { + return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] + + token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; +} + +static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, + PLANE_TYPE type, int block_idx) { + const MODE_INFO *const mi = xd->mi[0]; + + if (is_inter_block(mi) || type != PLANE_TYPE_Y || xd->lossless) { + return &vp9_default_scan_orders[tx_size]; + } else { + const PREDICTION_MODE mode = get_y_mode(mi, block_idx); + return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]]; + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_SCAN_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_seg_common.c b/thirdparty/libvpx/vp9/common/vp9_seg_common.c new file mode 100644 index 00000000000..7af61629a09 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_seg_common.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_loopfilter.h" +#include "vp9/common/vp9_seg_common.h" +#include "vp9/common/vp9_quant_common.h" + +static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 }; + +static const int seg_feature_data_max[SEG_LVL_MAX] = { + MAXQ, MAX_LOOP_FILTER, 3, 0 }; + +// These functions provide access to new segment level features. +// Eventually these function may be "optimized out" but for the moment, +// the coding mechanism is still subject to change so these provide a +// convenient single point of change. + +void vp9_clearall_segfeatures(struct segmentation *seg) { + vp9_zero(seg->feature_data); + vp9_zero(seg->feature_mask); + seg->aq_av_offset = 0; +} + +void vp9_enable_segfeature(struct segmentation *seg, int segment_id, + SEG_LVL_FEATURES feature_id) { + seg->feature_mask[segment_id] |= 1 << feature_id; +} + +int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id) { + return seg_feature_data_max[feature_id]; +} + +int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id) { + return seg_feature_data_signed[feature_id]; +} + +void vp9_set_segdata(struct segmentation *seg, int segment_id, + SEG_LVL_FEATURES feature_id, int seg_data) { + assert(seg_data <= seg_feature_data_max[feature_id]); + if (seg_data < 0) { + assert(seg_feature_data_signed[feature_id]); + assert(-seg_data <= seg_feature_data_max[feature_id]); + } + + seg->feature_data[segment_id][feature_id] = seg_data; +} + +const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = { + 2, 4, 6, 8, 10, 12, + 0, -1, -2, -3, -4, -5, -6, -7 +}; + + +// TBD? Functions to read and write segment data with range / validity checking diff --git a/thirdparty/libvpx/vp9/common/vp9_seg_common.h b/thirdparty/libvpx/vp9/common/vp9_seg_common.h new file mode 100644 index 00000000000..99a9440c17e --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_seg_common.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_SEG_COMMON_H_ +#define VP9_COMMON_VP9_SEG_COMMON_H_ + +#include "vpx_dsp/prob.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SEGMENT_DELTADATA 0 +#define SEGMENT_ABSDATA 1 + +#define MAX_SEGMENTS 8 +#define SEG_TREE_PROBS (MAX_SEGMENTS-1) + +#define PREDICTION_PROBS 3 + +// Segment level features. +typedef enum { + SEG_LVL_ALT_Q = 0, // Use alternate Quantizer .... + SEG_LVL_ALT_LF = 1, // Use alternate loop filter value... + SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame + SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode + SEG_LVL_MAX = 4 // Number of features supported +} SEG_LVL_FEATURES; + + +struct segmentation { + uint8_t enabled; + uint8_t update_map; + uint8_t update_data; + uint8_t abs_delta; + uint8_t temporal_update; + + vpx_prob tree_probs[SEG_TREE_PROBS]; + vpx_prob pred_probs[PREDICTION_PROBS]; + + int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; + uint32_t feature_mask[MAX_SEGMENTS]; + int aq_av_offset; +}; + +static INLINE int segfeature_active(const struct segmentation *seg, + int segment_id, + SEG_LVL_FEATURES feature_id) { + return seg->enabled && + (seg->feature_mask[segment_id] & (1 << feature_id)); +} + +void vp9_clearall_segfeatures(struct segmentation *seg); + +void vp9_enable_segfeature(struct segmentation *seg, + int segment_id, + SEG_LVL_FEATURES feature_id); + +int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id); + +int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id); + +void vp9_set_segdata(struct segmentation *seg, + int segment_id, + SEG_LVL_FEATURES feature_id, + int seg_data); + +static INLINE int get_segdata(const struct segmentation *seg, int segment_id, + SEG_LVL_FEATURES feature_id) { + return seg->feature_data[segment_id][feature_id]; +} + +extern const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)]; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_SEG_COMMON_H_ + diff --git a/thirdparty/libvpx/vp9/common/vp9_thread_common.c b/thirdparty/libvpx/vp9/common/vp9_thread_common.c new file mode 100644 index 00000000000..db78d6be894 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_thread_common.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_thread_common.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_loopfilter.h" + +#if CONFIG_MULTITHREAD +static INLINE void mutex_lock(pthread_mutex_t *const mutex) { + const int kMaxTryLocks = 4000; + int locked = 0; + int i; + + for (i = 0; i < kMaxTryLocks; ++i) { + if (!pthread_mutex_trylock(mutex)) { + locked = 1; + break; + } + } + + if (!locked) + pthread_mutex_lock(mutex); +} +#endif // CONFIG_MULTITHREAD + +static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) { +#if CONFIG_MULTITHREAD + const int nsync = lf_sync->sync_range; + + if (r && !(c & (nsync - 1))) { + pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1]; + mutex_lock(mutex); + + while (c > lf_sync->cur_sb_col[r - 1] - nsync) { + pthread_cond_wait(&lf_sync->cond_[r - 1], mutex); + } + pthread_mutex_unlock(mutex); + } +#else + (void)lf_sync; + (void)r; + (void)c; +#endif // CONFIG_MULTITHREAD +} + +static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c, + const int sb_cols) { +#if CONFIG_MULTITHREAD + const int nsync = lf_sync->sync_range; + int cur; + // Only signal when there are enough filtered SB for next row to run. + int sig = 1; + + if (c < sb_cols - 1) { + cur = c; + if (c % nsync) + sig = 0; + } else { + cur = sb_cols + nsync; + } + + if (sig) { + mutex_lock(&lf_sync->mutex_[r]); + + lf_sync->cur_sb_col[r] = cur; + + pthread_cond_signal(&lf_sync->cond_[r]); + pthread_mutex_unlock(&lf_sync->mutex_[r]); + } +#else + (void)lf_sync; + (void)r; + (void)c; + (void)sb_cols; +#endif // CONFIG_MULTITHREAD +} + +// Implement row loopfiltering for each thread. +static INLINE +void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, + VP9_COMMON *const cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int start, int stop, int y_only, + VP9LfSync *const lf_sync) { + const int num_planes = y_only ? 1 : MAX_MB_PLANE; + const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; + int mi_row, mi_col; + enum lf_path path; + if (y_only) + path = LF_PATH_444; + else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) + path = LF_PATH_420; + else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) + path = LF_PATH_444; + else + path = LF_PATH_SLOW; + + for (mi_row = start; mi_row < stop; + mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { + MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; + LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); + + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { + const int r = mi_row >> MI_BLOCK_SIZE_LOG2; + const int c = mi_col >> MI_BLOCK_SIZE_LOG2; + int plane; + + sync_read(lf_sync, r, c); + + vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); + + vp9_adjust_mask(cm, mi_row, mi_col, lfm); + + vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); + for (plane = 1; plane < num_planes; ++plane) { + switch (path) { + case LF_PATH_420: + vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); + break; + case LF_PATH_444: + vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); + break; + case LF_PATH_SLOW: + vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, + mi_row, mi_col); + break; + } + } + + sync_write(lf_sync, r, c, sb_cols); + } + } +} + +// Row-based multi-threaded loopfilter hook +static int loop_filter_row_worker(VP9LfSync *const lf_sync, + LFWorkerData *const lf_data) { + thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, + lf_data->start, lf_data->stop, lf_data->y_only, + lf_sync); + return 1; +} + +static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, + VP9_COMMON *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int start, int stop, int y_only, + VPxWorker *workers, int nworkers, + VP9LfSync *lf_sync) { + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + // Number of superblock rows and cols + const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + // Decoder may allocate more threads than number of tiles based on user's + // input. + const int tile_cols = 1 << cm->log2_tile_cols; + const int num_workers = VPXMIN(nworkers, tile_cols); + int i; + + if (!lf_sync->sync_range || sb_rows != lf_sync->rows || + num_workers > lf_sync->num_workers) { + vp9_loop_filter_dealloc(lf_sync); + vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); + } + + // Initialize cur_sb_col to -1 for all SB rows. + memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); + + // Set up loopfilter thread data. + // The decoder is capping num_workers because it has been observed that using + // more threads on the loopfilter than there are cores will hurt performance + // on Android. This is because the system will only schedule the tile decode + // workers on cores equal to the number of tile columns. Then if the decoder + // tries to use more threads for the loopfilter, it will hurt performance + // because of contention. If the multithreading code changes in the future + // then the number of workers used by the loopfilter should be revisited. + for (i = 0; i < num_workers; ++i) { + VPxWorker *const worker = &workers[i]; + LFWorkerData *const lf_data = &lf_sync->lfdata[i]; + + worker->hook = (VPxWorkerHook)loop_filter_row_worker; + worker->data1 = lf_sync; + worker->data2 = lf_data; + + // Loopfilter data + vp9_loop_filter_data_reset(lf_data, frame, cm, planes); + lf_data->start = start + i * MI_BLOCK_SIZE; + lf_data->stop = stop; + lf_data->y_only = y_only; + + // Start loopfiltering + if (i == num_workers - 1) { + winterface->execute(worker); + } else { + winterface->launch(worker); + } + } + + // Wait till all rows are finished + for (i = 0; i < num_workers; ++i) { + winterface->sync(&workers[i]); + } +} + +void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, + VP9_COMMON *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int frame_filter_level, + int y_only, int partial_frame, + VPxWorker *workers, int num_workers, + VP9LfSync *lf_sync) { + int start_mi_row, end_mi_row, mi_rows_to_filter; + + if (!frame_filter_level) return; + + start_mi_row = 0; + mi_rows_to_filter = cm->mi_rows; + if (partial_frame && cm->mi_rows > 8) { + start_mi_row = cm->mi_rows >> 1; + start_mi_row &= 0xfffffff8; + mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); + } + end_mi_row = start_mi_row + mi_rows_to_filter; + vp9_loop_filter_frame_init(cm, frame_filter_level); + + loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, + y_only, workers, num_workers, lf_sync); +} + +// Set up nsync by width. +static INLINE int get_sync_range(int width) { + // nsync numbers are picked by testing. For example, for 4k + // video, using 4 gives best performance. + if (width < 640) + return 1; + else if (width <= 1280) + return 2; + else if (width <= 4096) + return 4; + else + return 8; +} + +// Allocate memory for lf row synchronization +void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, + int width, int num_workers) { + lf_sync->rows = rows; +#if CONFIG_MULTITHREAD + { + int i; + + CHECK_MEM_ERROR(cm, lf_sync->mutex_, + vpx_malloc(sizeof(*lf_sync->mutex_) * rows)); + if (lf_sync->mutex_) { + for (i = 0; i < rows; ++i) { + pthread_mutex_init(&lf_sync->mutex_[i], NULL); + } + } + + CHECK_MEM_ERROR(cm, lf_sync->cond_, + vpx_malloc(sizeof(*lf_sync->cond_) * rows)); + if (lf_sync->cond_) { + for (i = 0; i < rows; ++i) { + pthread_cond_init(&lf_sync->cond_[i], NULL); + } + } + } +#endif // CONFIG_MULTITHREAD + + CHECK_MEM_ERROR(cm, lf_sync->lfdata, + vpx_malloc(num_workers * sizeof(*lf_sync->lfdata))); + lf_sync->num_workers = num_workers; + + CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, + vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); + + // Set up nsync. + lf_sync->sync_range = get_sync_range(width); +} + +// Deallocate lf synchronization related mutex and data +void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { + if (lf_sync != NULL) { +#if CONFIG_MULTITHREAD + int i; + + if (lf_sync->mutex_ != NULL) { + for (i = 0; i < lf_sync->rows; ++i) { + pthread_mutex_destroy(&lf_sync->mutex_[i]); + } + vpx_free(lf_sync->mutex_); + } + if (lf_sync->cond_ != NULL) { + for (i = 0; i < lf_sync->rows; ++i) { + pthread_cond_destroy(&lf_sync->cond_[i]); + } + vpx_free(lf_sync->cond_); + } +#endif // CONFIG_MULTITHREAD + vpx_free(lf_sync->lfdata); + vpx_free(lf_sync->cur_sb_col); + // clear the structure as the source of this call may be a resize in which + // case this call will be followed by an _alloc() which may fail. + vp9_zero(*lf_sync); + } +} + +// Accumulate frame counts. +void vp9_accumulate_frame_counts(FRAME_COUNTS *accum, + const FRAME_COUNTS *counts, int is_dec) { + int i, j, k, l, m; + + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) + for (j = 0; j < INTRA_MODES; j++) + accum->y_mode[i][j] += counts->y_mode[i][j]; + + for (i = 0; i < INTRA_MODES; i++) + for (j = 0; j < INTRA_MODES; j++) + accum->uv_mode[i][j] += counts->uv_mode[i][j]; + + for (i = 0; i < PARTITION_CONTEXTS; i++) + for (j = 0; j < PARTITION_TYPES; j++) + accum->partition[i][j] += counts->partition[i][j]; + + if (is_dec) { + int n; + for (i = 0; i < TX_SIZES; i++) + for (j = 0; j < PLANE_TYPES; j++) + for (k = 0; k < REF_TYPES; k++) + for (l = 0; l < COEF_BANDS; l++) + for (m = 0; m < COEFF_CONTEXTS; m++) { + accum->eob_branch[i][j][k][l][m] += + counts->eob_branch[i][j][k][l][m]; + for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) + accum->coef[i][j][k][l][m][n] += + counts->coef[i][j][k][l][m][n]; + } + } else { + for (i = 0; i < TX_SIZES; i++) + for (j = 0; j < PLANE_TYPES; j++) + for (k = 0; k < REF_TYPES; k++) + for (l = 0; l < COEF_BANDS; l++) + for (m = 0; m < COEFF_CONTEXTS; m++) + accum->eob_branch[i][j][k][l][m] += + counts->eob_branch[i][j][k][l][m]; + // In the encoder, coef is only updated at frame + // level, so not need to accumulate it here. + // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) + // accum->coef[i][j][k][l][m][n] += + // counts->coef[i][j][k][l][m][n]; + } + + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) + for (j = 0; j < SWITCHABLE_FILTERS; j++) + accum->switchable_interp[i][j] += counts->switchable_interp[i][j]; + + for (i = 0; i < INTER_MODE_CONTEXTS; i++) + for (j = 0; j < INTER_MODES; j++) + accum->inter_mode[i][j] += counts->inter_mode[i][j]; + + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) + for (j = 0; j < 2; j++) + accum->intra_inter[i][j] += counts->intra_inter[i][j]; + + for (i = 0; i < COMP_INTER_CONTEXTS; i++) + for (j = 0; j < 2; j++) + accum->comp_inter[i][j] += counts->comp_inter[i][j]; + + for (i = 0; i < REF_CONTEXTS; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + accum->single_ref[i][j][k] += counts->single_ref[i][j][k]; + + for (i = 0; i < REF_CONTEXTS; i++) + for (j = 0; j < 2; j++) + accum->comp_ref[i][j] += counts->comp_ref[i][j]; + + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + for (j = 0; j < TX_SIZES; j++) + accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j]; + + for (j = 0; j < TX_SIZES - 1; j++) + accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j]; + + for (j = 0; j < TX_SIZES - 2; j++) + accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j]; + } + + for (i = 0; i < TX_SIZES; i++) + accum->tx.tx_totals[i] += counts->tx.tx_totals[i]; + + for (i = 0; i < SKIP_CONTEXTS; i++) + for (j = 0; j < 2; j++) + accum->skip[i][j] += counts->skip[i][j]; + + for (i = 0; i < MV_JOINTS; i++) + accum->mv.joints[i] += counts->mv.joints[i]; + + for (k = 0; k < 2; k++) { + nmv_component_counts *const comps = &accum->mv.comps[k]; + const nmv_component_counts *const comps_t = &counts->mv.comps[k]; + + for (i = 0; i < 2; i++) { + comps->sign[i] += comps_t->sign[i]; + comps->class0_hp[i] += comps_t->class0_hp[i]; + comps->hp[i] += comps_t->hp[i]; + } + + for (i = 0; i < MV_CLASSES; i++) + comps->classes[i] += comps_t->classes[i]; + + for (i = 0; i < CLASS0_SIZE; i++) { + comps->class0[i] += comps_t->class0[i]; + for (j = 0; j < MV_FP_SIZE; j++) + comps->class0_fp[i][j] += comps_t->class0_fp[i][j]; + } + + for (i = 0; i < MV_OFFSET_BITS; i++) + for (j = 0; j < 2; j++) + comps->bits[i][j] += comps_t->bits[i][j]; + + for (i = 0; i < MV_FP_SIZE; i++) + comps->fp[i] += comps_t->fp[i]; + } +} diff --git a/thirdparty/libvpx/vp9/common/vp9_thread_common.h b/thirdparty/libvpx/vp9/common/vp9_thread_common.h new file mode 100644 index 00000000000..b3b60c253f2 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_thread_common.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_THREAD_COMMON_H_ +#define VP9_COMMON_VP9_THREAD_COMMON_H_ +#include "./vpx_config.h" +#include "vp9/common/vp9_loopfilter.h" +#include "vpx_util/vpx_thread.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9Common; +struct FRAME_COUNTS; + +// Loopfilter row synchronization +typedef struct VP9LfSyncData { +#if CONFIG_MULTITHREAD + pthread_mutex_t *mutex_; + pthread_cond_t *cond_; +#endif + // Allocate memory to store the loop-filtered superblock index in each row. + int *cur_sb_col; + // The optimal sync_range for different resolution and platform should be + // determined by testing. Currently, it is chosen to be a power-of-2 number. + int sync_range; + int rows; + + // Row-based parallel loopfilter data + LFWorkerData *lfdata; + int num_workers; +} VP9LfSync; + +// Allocate memory for loopfilter row synchronization. +void vp9_loop_filter_alloc(VP9LfSync *lf_sync, struct VP9Common *cm, int rows, + int width, int num_workers); + +// Deallocate loopfilter synchronization related mutex and data. +void vp9_loop_filter_dealloc(VP9LfSync *lf_sync); + +// Multi-threaded loopfilter that uses the tile threads. +void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, + struct VP9Common *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int frame_filter_level, + int y_only, int partial_frame, + VPxWorker *workers, int num_workers, + VP9LfSync *lf_sync); + +void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, + const struct FRAME_COUNTS *counts, int is_dec); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_THREAD_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_tile_common.c b/thirdparty/libvpx/vp9/common/vp9_tile_common.c new file mode 100644 index 00000000000..9fcb97c8542 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_tile_common.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_tile_common.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vpx_dsp/vpx_dsp_common.h" + +#define MIN_TILE_WIDTH_B64 4 +#define MAX_TILE_WIDTH_B64 64 + +static int get_tile_offset(int idx, int mis, int log2) { + const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2; + const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2; + return VPXMIN(offset, mis); +} + +void vp9_tile_set_row(TileInfo *tile, const VP9_COMMON *cm, int row) { + tile->mi_row_start = get_tile_offset(row, cm->mi_rows, cm->log2_tile_rows); + tile->mi_row_end = get_tile_offset(row + 1, cm->mi_rows, cm->log2_tile_rows); +} + +void vp9_tile_set_col(TileInfo *tile, const VP9_COMMON *cm, int col) { + tile->mi_col_start = get_tile_offset(col, cm->mi_cols, cm->log2_tile_cols); + tile->mi_col_end = get_tile_offset(col + 1, cm->mi_cols, cm->log2_tile_cols); +} + +void vp9_tile_init(TileInfo *tile, const VP9_COMMON *cm, int row, int col) { + vp9_tile_set_row(tile, cm, row); + vp9_tile_set_col(tile, cm, col); +} + +static int get_min_log2_tile_cols(const int sb64_cols) { + int min_log2 = 0; + while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols) + ++min_log2; + return min_log2; +} + +static int get_max_log2_tile_cols(const int sb64_cols) { + int max_log2 = 1; + while ((sb64_cols >> max_log2) >= MIN_TILE_WIDTH_B64) + ++max_log2; + return max_log2 - 1; +} + +void vp9_get_tile_n_bits(int mi_cols, + int *min_log2_tile_cols, int *max_log2_tile_cols) { + const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2; + *min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols); + *max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols); + assert(*min_log2_tile_cols <= *max_log2_tile_cols); +} diff --git a/thirdparty/libvpx/vp9/common/vp9_tile_common.h b/thirdparty/libvpx/vp9/common/vp9_tile_common.h new file mode 100644 index 00000000000..ae58805de1c --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_tile_common.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_TILE_COMMON_H_ +#define VP9_COMMON_VP9_TILE_COMMON_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9Common; + +typedef struct TileInfo { + int mi_row_start, mi_row_end; + int mi_col_start, mi_col_end; +} TileInfo; + +// initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on +// 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)' +void vp9_tile_init(TileInfo *tile, const struct VP9Common *cm, + int row, int col); + +void vp9_tile_set_row(TileInfo *tile, const struct VP9Common *cm, int row); +void vp9_tile_set_col(TileInfo *tile, const struct VP9Common *cm, int col); + +void vp9_get_tile_n_bits(int mi_cols, + int *min_log2_tile_cols, int *max_log2_tile_cols); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_TILE_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c new file mode 100644 index 00000000000..1c77b57ff11 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vp9_rtcd.h" +#include "vpx_dsp/x86/inv_txfm_sse2.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" +#include "vpx_ports/mem.h" + +void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + __m128i in[2]; + const __m128i zero = _mm_setzero_si128(); + const __m128i eight = _mm_set1_epi16(8); + + in[0] = load_input_data(input); + in[1] = load_input_data(input + 8); + + switch (tx_type) { + case 0: // DCT_DCT + idct4_sse2(in); + idct4_sse2(in); + break; + case 1: // ADST_DCT + idct4_sse2(in); + iadst4_sse2(in); + break; + case 2: // DCT_ADST + iadst4_sse2(in); + idct4_sse2(in); + break; + case 3: // ADST_ADST + iadst4_sse2(in); + iadst4_sse2(in); + break; + default: + assert(0); + break; + } + + // Final round and shift + in[0] = _mm_add_epi16(in[0], eight); + in[1] = _mm_add_epi16(in[1], eight); + + in[0] = _mm_srai_epi16(in[0], 4); + in[1] = _mm_srai_epi16(in[1], 4); + + // Reconstruction and Store + { + __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); + __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); + d0 = _mm_unpacklo_epi32(d0, + _mm_cvtsi32_si128(*(const int *)(dest + stride))); + d2 = _mm_unpacklo_epi32( + d2, _mm_cvtsi32_si128(*(const int *)(dest + stride * 3))); + d0 = _mm_unpacklo_epi8(d0, zero); + d2 = _mm_unpacklo_epi8(d2, zero); + d0 = _mm_add_epi16(d0, in[0]); + d2 = _mm_add_epi16(d2, in[1]); + d0 = _mm_packus_epi16(d0, d2); + // store result[0] + *(int *)dest = _mm_cvtsi128_si32(d0); + // store result[1] + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); + // store result[2] + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); + // store result[3] + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); + } +} + +void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + __m128i in[8]; + const __m128i zero = _mm_setzero_si128(); + const __m128i final_rounding = _mm_set1_epi16(1 << 4); + + // load input data + in[0] = load_input_data(input); + in[1] = load_input_data(input + 8 * 1); + in[2] = load_input_data(input + 8 * 2); + in[3] = load_input_data(input + 8 * 3); + in[4] = load_input_data(input + 8 * 4); + in[5] = load_input_data(input + 8 * 5); + in[6] = load_input_data(input + 8 * 6); + in[7] = load_input_data(input + 8 * 7); + + switch (tx_type) { + case 0: // DCT_DCT + idct8_sse2(in); + idct8_sse2(in); + break; + case 1: // ADST_DCT + idct8_sse2(in); + iadst8_sse2(in); + break; + case 2: // DCT_ADST + iadst8_sse2(in); + idct8_sse2(in); + break; + case 3: // ADST_ADST + iadst8_sse2(in); + iadst8_sse2(in); + break; + default: + assert(0); + break; + } + + // Final rounding and shift + in[0] = _mm_adds_epi16(in[0], final_rounding); + in[1] = _mm_adds_epi16(in[1], final_rounding); + in[2] = _mm_adds_epi16(in[2], final_rounding); + in[3] = _mm_adds_epi16(in[3], final_rounding); + in[4] = _mm_adds_epi16(in[4], final_rounding); + in[5] = _mm_adds_epi16(in[5], final_rounding); + in[6] = _mm_adds_epi16(in[6], final_rounding); + in[7] = _mm_adds_epi16(in[7], final_rounding); + + in[0] = _mm_srai_epi16(in[0], 5); + in[1] = _mm_srai_epi16(in[1], 5); + in[2] = _mm_srai_epi16(in[2], 5); + in[3] = _mm_srai_epi16(in[3], 5); + in[4] = _mm_srai_epi16(in[4], 5); + in[5] = _mm_srai_epi16(in[5], 5); + in[6] = _mm_srai_epi16(in[6], 5); + in[7] = _mm_srai_epi16(in[7], 5); + + RECON_AND_STORE(dest + 0 * stride, in[0]); + RECON_AND_STORE(dest + 1 * stride, in[1]); + RECON_AND_STORE(dest + 2 * stride, in[2]); + RECON_AND_STORE(dest + 3 * stride, in[3]); + RECON_AND_STORE(dest + 4 * stride, in[4]); + RECON_AND_STORE(dest + 5 * stride, in[5]); + RECON_AND_STORE(dest + 6 * stride, in[6]); + RECON_AND_STORE(dest + 7 * stride, in[7]); +} + +void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride, int tx_type) { + __m128i in0[16], in1[16]; + + load_buffer_8x16(input, in0); + input += 8; + load_buffer_8x16(input, in1); + + switch (tx_type) { + case 0: // DCT_DCT + idct16_sse2(in0, in1); + idct16_sse2(in0, in1); + break; + case 1: // ADST_DCT + idct16_sse2(in0, in1); + iadst16_sse2(in0, in1); + break; + case 2: // DCT_ADST + iadst16_sse2(in0, in1); + idct16_sse2(in0, in1); + break; + case 3: // ADST_ADST + iadst16_sse2(in0, in1); + iadst16_sse2(in0, in1); + break; + default: + assert(0); + break; + } + + write_buffer_8x16(dest, in0, stride); + dest += 8; + write_buffer_8x16(dest, in1, stride); +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c new file mode 100644 index 00000000000..d63912932c1 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c @@ -0,0 +1,2271 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include // qsort() + +#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "./vpx_scale_rtcd.h" + +#include "vpx_dsp/bitreader_buffer.h" +#include "vpx_dsp/bitreader.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" +#include "vpx_ports/mem_ops.h" +#include "vpx_scale/vpx_scale.h" +#include "vpx_util/vpx_thread.h" + +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_idct.h" +#include "vp9/common/vp9_thread_common.h" +#include "vp9/common/vp9_pred_common.h" +#include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_reconintra.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_seg_common.h" +#include "vp9/common/vp9_tile_common.h" + +#include "vp9/decoder/vp9_decodeframe.h" +#include "vp9/decoder/vp9_detokenize.h" +#include "vp9/decoder/vp9_decodemv.h" +#include "vp9/decoder/vp9_decoder.h" +#include "vp9/decoder/vp9_dsubexp.h" + +#define MAX_VP9_HEADER_SIZE 80 + +static int is_compound_reference_allowed(const VP9_COMMON *cm) { + int i; + for (i = 1; i < REFS_PER_FRAME; ++i) + if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) + return 1; + + return 0; +} + +static void setup_compound_reference_mode(VP9_COMMON *cm) { + if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[GOLDEN_FRAME]) { + cm->comp_fixed_ref = ALTREF_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = GOLDEN_FRAME; + } else if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[ALTREF_FRAME]) { + cm->comp_fixed_ref = GOLDEN_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } else { + cm->comp_fixed_ref = LAST_FRAME; + cm->comp_var_ref[0] = GOLDEN_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } +} + +static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) { + return len != 0 && len <= (size_t)(end - start); +} + +static int decode_unsigned_max(struct vpx_read_bit_buffer *rb, int max) { + const int data = vpx_rb_read_literal(rb, get_unsigned_bits(max)); + return data > max ? max : data; +} + +static TX_MODE read_tx_mode(vpx_reader *r) { + TX_MODE tx_mode = vpx_read_literal(r, 2); + if (tx_mode == ALLOW_32X32) + tx_mode += vpx_read_bit(r); + return tx_mode; +} + +static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) { + int i, j; + + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) + for (j = 0; j < TX_SIZES - 3; ++j) + vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); + + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) + for (j = 0; j < TX_SIZES - 2; ++j) + vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); + + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) + for (j = 0; j < TX_SIZES - 1; ++j) + vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); +} + +static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vpx_reader *r) { + int i, j; + for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) + for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) + vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); +} + +static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) { + int i, j; + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) + for (j = 0; j < INTER_MODES - 1; ++j) + vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); +} + +static REFERENCE_MODE read_frame_reference_mode(const VP9_COMMON *cm, + vpx_reader *r) { + if (is_compound_reference_allowed(cm)) { + return vpx_read_bit(r) ? (vpx_read_bit(r) ? REFERENCE_MODE_SELECT + : COMPOUND_REFERENCE) + : SINGLE_REFERENCE; + } else { + return SINGLE_REFERENCE; + } +} + +static void read_frame_reference_mode_probs(VP9_COMMON *cm, vpx_reader *r) { + FRAME_CONTEXT *const fc = cm->fc; + int i; + + if (cm->reference_mode == REFERENCE_MODE_SELECT) + for (i = 0; i < COMP_INTER_CONTEXTS; ++i) + vp9_diff_update_prob(r, &fc->comp_inter_prob[i]); + + if (cm->reference_mode != COMPOUND_REFERENCE) + for (i = 0; i < REF_CONTEXTS; ++i) { + vp9_diff_update_prob(r, &fc->single_ref_prob[i][0]); + vp9_diff_update_prob(r, &fc->single_ref_prob[i][1]); + } + + if (cm->reference_mode != SINGLE_REFERENCE) + for (i = 0; i < REF_CONTEXTS; ++i) + vp9_diff_update_prob(r, &fc->comp_ref_prob[i]); +} + +static void update_mv_probs(vpx_prob *p, int n, vpx_reader *r) { + int i; + for (i = 0; i < n; ++i) + if (vpx_read(r, MV_UPDATE_PROB)) + p[i] = (vpx_read_literal(r, 7) << 1) | 1; +} + +static void read_mv_probs(nmv_context *ctx, int allow_hp, vpx_reader *r) { + int i, j; + + update_mv_probs(ctx->joints, MV_JOINTS - 1, r); + + for (i = 0; i < 2; ++i) { + nmv_component *const comp_ctx = &ctx->comps[i]; + update_mv_probs(&comp_ctx->sign, 1, r); + update_mv_probs(comp_ctx->classes, MV_CLASSES - 1, r); + update_mv_probs(comp_ctx->class0, CLASS0_SIZE - 1, r); + update_mv_probs(comp_ctx->bits, MV_OFFSET_BITS, r); + } + + for (i = 0; i < 2; ++i) { + nmv_component *const comp_ctx = &ctx->comps[i]; + for (j = 0; j < CLASS0_SIZE; ++j) + update_mv_probs(comp_ctx->class0_fp[j], MV_FP_SIZE - 1, r); + update_mv_probs(comp_ctx->fp, 3, r); + } + + if (allow_hp) { + for (i = 0; i < 2; ++i) { + nmv_component *const comp_ctx = &ctx->comps[i]; + update_mv_probs(&comp_ctx->class0_hp, 1, r); + update_mv_probs(&comp_ctx->hp, 1, r); + } + } +} + +static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane, + const TX_SIZE tx_size, + uint8_t *dst, int stride, + int eob) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + tran_low_t *const dqcoeff = pd->dqcoeff; + assert(eob > 0); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + if (xd->lossless) { + vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); + } else { + switch (tx_size) { + case TX_4X4: + vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_8X8: + vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_16X16: + vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_32X32: + vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); + break; + default: + assert(0 && "Invalid transform size"); + } + } + } else { + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_idct4x4_add(dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_idct8x8_add(dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_idct16x16_add(dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } + } +#else + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_idct4x4_add(dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_idct8x8_add(dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_idct16x16_add(dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (eob == 1) { + dqcoeff[0] = 0; + } else { + if (tx_size <= TX_16X16 && eob <= 10) + memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); + else if (tx_size == TX_32X32 && eob <= 34) + memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); + else + memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); + } +} + +static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, + const TX_TYPE tx_type, + const TX_SIZE tx_size, + uint8_t *dst, int stride, + int eob) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + tran_low_t *const dqcoeff = pd->dqcoeff; + assert(eob > 0); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + if (xd->lossless) { + vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); + } else { + switch (tx_size) { + case TX_4X4: + vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_8X8: + vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_16X16: + vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_32X32: + vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); + break; + default: + assert(0 && "Invalid transform size"); + } + } + } else { + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } + } +#else + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (eob == 1) { + dqcoeff[0] = 0; + } else { + if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) + memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); + else if (tx_size == TX_32X32 && eob <= 34) + memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); + else + memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); + } +} + +static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd, + vpx_reader *r, + MODE_INFO *const mi, + int plane, + int row, int col, + TX_SIZE tx_size) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; + uint8_t *dst; + dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; + + if (mi->sb_type < BLOCK_8X8) + if (plane == 0) + mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; + + vp9_predict_intra_block(xd, pd->n4_wl, tx_size, mode, + dst, pd->dst.stride, dst, pd->dst.stride, + col, row, plane); + + if (!mi->skip) { + const TX_TYPE tx_type = (plane || xd->lossless) ? + DCT_DCT : intra_mode_to_tx_type_lookup[mode]; + const scan_order *sc = (plane || xd->lossless) ? + &vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type]; + const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, + r, mi->segment_id); + if (eob > 0) { + inverse_transform_block_intra(xd, plane, tx_type, tx_size, + dst, pd->dst.stride, eob); + } + } +} + +static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r, + MODE_INFO *const mi, int plane, + int row, int col, TX_SIZE tx_size) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const scan_order *sc = &vp9_default_scan_orders[tx_size]; + const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, r, + mi->segment_id); + + if (eob > 0) { + inverse_transform_block_inter( + xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], + pd->dst.stride, eob); + } + return eob; +} + +static void build_mc_border(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + int x, int y, int b_w, int b_h, int w, int h) { + // Get a pointer to the start of the real data for this row. + const uint8_t *ref_row = src - x - y * src_stride; + + if (y >= h) + ref_row += (h - 1) * src_stride; + else if (y > 0) + ref_row += y * src_stride; + + do { + int right = 0, copy; + int left = x < 0 ? -x : 0; + + if (left > b_w) + left = b_w; + + if (x + b_w > w) + right = x + b_w - w; + + if (right > b_w) + right = b_w; + + copy = b_w - left - right; + + if (left) + memset(dst, ref_row[0], left); + + if (copy) + memcpy(dst + left, ref_row + x + left, copy); + + if (right) + memset(dst + left + copy, ref_row[w - 1], right); + + dst += dst_stride; + ++y; + + if (y > 0 && y < h) + ref_row += src_stride; + } while (--b_h); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void high_build_mc_border(const uint8_t *src8, int src_stride, + uint16_t *dst, int dst_stride, + int x, int y, int b_w, int b_h, + int w, int h) { + // Get a pointer to the start of the real data for this row. + const uint16_t *src = CONVERT_TO_SHORTPTR(src8); + const uint16_t *ref_row = src - x - y * src_stride; + + if (y >= h) + ref_row += (h - 1) * src_stride; + else if (y > 0) + ref_row += y * src_stride; + + do { + int right = 0, copy; + int left = x < 0 ? -x : 0; + + if (left > b_w) + left = b_w; + + if (x + b_w > w) + right = x + b_w - w; + + if (right > b_w) + right = b_w; + + copy = b_w - left - right; + + if (left) + vpx_memset16(dst, ref_row[0], left); + + if (copy) + memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t)); + + if (right) + vpx_memset16(dst + left + copy, ref_row[w - 1], right); + + dst += dst_stride; + ++y; + + if (y > 0 && y < h) + ref_row += src_stride; + } while (--b_h); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +#if CONFIG_VP9_HIGHBITDEPTH +static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, + int x0, int y0, int b_w, int b_h, + int frame_width, int frame_height, + int border_offset, + uint8_t *const dst, int dst_buf_stride, + int subpel_x, int subpel_y, + const InterpKernel *kernel, + const struct scale_factors *sf, + MACROBLOCKD *xd, + int w, int h, int ref, int xs, int ys) { + DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]); + const uint8_t *buf_ptr; + + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, + x0, y0, b_w, b_h, frame_width, frame_height); + buf_ptr = CONVERT_TO_BYTEPTR(mc_buf_high) + border_offset; + } else { + build_mc_border(buf_ptr1, pre_buf_stride, (uint8_t *)mc_buf_high, b_w, + x0, y0, b_w, b_h, frame_width, frame_height); + buf_ptr = ((uint8_t *)mc_buf_high) + border_offset; + } + + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + highbd_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); + } else { + inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); + } +} +#else +static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, + int x0, int y0, int b_w, int b_h, + int frame_width, int frame_height, + int border_offset, + uint8_t *const dst, int dst_buf_stride, + int subpel_x, int subpel_y, + const InterpKernel *kernel, + const struct scale_factors *sf, + int w, int h, int ref, int xs, int ys) { + DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]); + const uint8_t *buf_ptr; + + build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, + x0, y0, b_w, b_h, frame_width, frame_height); + buf_ptr = mc_buf + border_offset; + + inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +static void dec_build_inter_predictors(VPxWorker *const worker, MACROBLOCKD *xd, + int plane, int bw, int bh, int x, + int y, int w, int h, int mi_x, int mi_y, + const InterpKernel *kernel, + const struct scale_factors *sf, + struct buf_2d *pre_buf, + struct buf_2d *dst_buf, const MV* mv, + RefCntBuffer *ref_frame_buf, + int is_scaled, int ref) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; + MV32 scaled_mv; + int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, + buf_stride, subpel_x, subpel_y; + uint8_t *ref_frame, *buf_ptr; + + // Get reference frame pointer, width and height. + if (plane == 0) { + frame_width = ref_frame_buf->buf.y_crop_width; + frame_height = ref_frame_buf->buf.y_crop_height; + ref_frame = ref_frame_buf->buf.y_buffer; + } else { + frame_width = ref_frame_buf->buf.uv_crop_width; + frame_height = ref_frame_buf->buf.uv_crop_height; + ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer + : ref_frame_buf->buf.v_buffer; + } + + if (is_scaled) { + const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh, + pd->subsampling_x, + pd->subsampling_y); + // Co-ordinate of containing block to pixel precision. + int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); + int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); +#if CONFIG_BETTER_HW_COMPATIBILITY + assert(xd->mi[0]->sb_type != BLOCK_4X8 && + xd->mi[0]->sb_type != BLOCK_8X4); + assert(mv_q4.row == mv->row * (1 << (1 - pd->subsampling_y)) && + mv_q4.col == mv->col * (1 << (1 - pd->subsampling_x))); +#endif + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = (x_start + x) << SUBPEL_BITS; + y0_16 = (y_start + y) << SUBPEL_BITS; + + // Co-ordinate of current block in reference frame + // to 1/16th pixel precision. + x0_16 = sf->scale_value_x(x0_16, sf); + y0_16 = sf->scale_value_y(y0_16, sf); + + // Map the top left corner of the block into the reference frame. + x0 = sf->scale_value_x(x_start + x, sf); + y0 = sf->scale_value_y(y_start + y, sf); + + // Scale the MV and incorporate the sub-pixel offset of the block + // in the reference frame. + scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); + xs = sf->x_step_q4; + ys = sf->y_step_q4; + } else { + // Co-ordinate of containing block to pixel precision. + x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; + y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = x0 << SUBPEL_BITS; + y0_16 = y0 << SUBPEL_BITS; + + scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y)); + scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x)); + xs = ys = 16; + } + subpel_x = scaled_mv.col & SUBPEL_MASK; + subpel_y = scaled_mv.row & SUBPEL_MASK; + + // Calculate the top left corner of the best matching block in the + // reference frame. + x0 += scaled_mv.col >> SUBPEL_BITS; + y0 += scaled_mv.row >> SUBPEL_BITS; + x0_16 += scaled_mv.col; + y0_16 += scaled_mv.row; + + // Get reference block pointer. + buf_ptr = ref_frame + y0 * pre_buf->stride + x0; + buf_stride = pre_buf->stride; + + // Do border extension if there is motion or the + // width/height is not a multiple of 8 pixels. + if (is_scaled || scaled_mv.col || scaled_mv.row || + (frame_width & 0x7) || (frame_height & 0x7)) { + int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1; + + // Get reference block bottom right horizontal coordinate. + int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; + int x_pad = 0, y_pad = 0; + + if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) { + x0 -= VP9_INTERP_EXTEND - 1; + x1 += VP9_INTERP_EXTEND; + x_pad = 1; + } + + if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) { + y0 -= VP9_INTERP_EXTEND - 1; + y1 += VP9_INTERP_EXTEND; + y_pad = 1; + } + + // Wait until reference block is ready. Pad 7 more pixels as last 7 + // pixels of each superblock row can be changed by next superblock row. + if (worker != NULL) + vp9_frameworker_wait(worker, ref_frame_buf, + VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); + + // Skip border extension if block is inside the frame. + if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 || + y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { + // Extend the border. + const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0; + const int b_w = x1 - x0 + 1; + const int b_h = y1 - y0 + 1; + const int border_offset = y_pad * 3 * b_w + x_pad * 3; + + extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h, + frame_width, frame_height, border_offset, + dst, dst_buf->stride, + subpel_x, subpel_y, + kernel, sf, +#if CONFIG_VP9_HIGHBITDEPTH + xd, +#endif + w, h, ref, xs, ys); + return; + } + } else { + // Wait until reference block is ready. Pad 7 more pixels as last 7 + // pixels of each superblock row can be changed by next superblock row. + if (worker != NULL) { + const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS; + vp9_frameworker_wait(worker, ref_frame_buf, + VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); + } + } +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); + } else { + inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); + } +#else + inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); +#endif // CONFIG_VP9_HIGHBITDEPTH +} + +static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, + MACROBLOCKD *xd, + int mi_row, int mi_col) { + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + const MODE_INFO *mi = xd->mi[0]; + const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; + const BLOCK_SIZE sb_type = mi->sb_type; + const int is_compound = has_second_ref(mi); + int ref; + int is_scaled; + VPxWorker *const fwo = pbi->frame_parallel_decode ? + pbi->frame_worker_owner : NULL; + + for (ref = 0; ref < 1 + is_compound; ++ref) { + const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; + RefBuffer *ref_buf = &pbi->common.frame_refs[frame - LAST_FRAME]; + const struct scale_factors *const sf = &ref_buf->sf; + const int idx = ref_buf->idx; + BufferPool *const pool = pbi->common.buffer_pool; + RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; + + if (!vp9_is_valid_scale(sf)) + vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, + "Reference frame has invalid dimensions"); + + is_scaled = vp9_is_scaled(sf); + vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, + is_scaled ? sf : NULL); + xd->block_refs[ref] = ref_buf; + + if (sb_type < BLOCK_8X8) { + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + struct buf_2d *const dst_buf = &pd->dst; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int n4w_x4 = 4 * num_4x4_w; + const int n4h_x4 = 4 * num_4x4_h; + struct buf_2d *const pre_buf = &pd->pre[ref]; + int i = 0, x, y; + for (y = 0; y < num_4x4_h; ++y) { + for (x = 0; x < num_4x4_w; ++x) { + const MV mv = average_split_mvs(pd, mi, ref, i++); + dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, + 4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel, + sf, pre_buf, dst_buf, &mv, + ref_frame_buf, is_scaled, ref); + } + } + } + } else { + const MV mv = mi->mv[ref].as_mv; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + struct buf_2d *const dst_buf = &pd->dst; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int n4w_x4 = 4 * num_4x4_w; + const int n4h_x4 = 4 * num_4x4_h; + struct buf_2d *const pre_buf = &pd->pre[ref]; + dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, + 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel, + sf, pre_buf, dst_buf, &mv, + ref_frame_buf, is_scaled, ref); + } + } + } +} + +static INLINE TX_SIZE dec_get_uv_tx_size(const MODE_INFO *mi, + int n4_wl, int n4_hl) { + // get minimum log2 num4x4s dimension + const int x = VPXMIN(n4_wl, n4_hl); + return VPXMIN(mi->tx_size, x); +} + +static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) { + struct macroblockd_plane *const pd = &xd->plane[i]; + memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_w); + memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_h); + } +} + +static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl, + int bhl) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].n4_w = (bw << 1) >> xd->plane[i].subsampling_x; + xd->plane[i].n4_h = (bh << 1) >> xd->plane[i].subsampling_y; + xd->plane[i].n4_wl = bwl - xd->plane[i].subsampling_x; + xd->plane[i].n4_hl = bhl - xd->plane[i].subsampling_y; + } +} + +static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int bw, int bh, int x_mis, int y_mis, + int bwl, int bhl) { + const int offset = mi_row * cm->mi_stride + mi_col; + int x, y; + const TileInfo *const tile = &xd->tile; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + // TODO(slavarnway): Generate sb_type based on bwl and bhl, instead of + // passing bsize from decode_partition(). + xd->mi[0]->sb_type = bsize; + for (y = 0; y < y_mis; ++y) + for (x = !y; x < x_mis; ++x) { + xd->mi[y * cm->mi_stride + x] = xd->mi[0]; + } + + set_plane_n4(xd, bw, bh, bwl, bhl); + + set_skip_context(xd, mi_row, mi_col); + + // Distance of Mb to the various image edges. These are specified to 8th pel + // as they are always compared to values that are in 1/8th pel units + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); + + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); + return xd->mi[0]; +} + +static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, + int mi_row, int mi_col, + vpx_reader *r, BLOCK_SIZE bsize, + int bwl, int bhl) { + VP9_COMMON *const cm = &pbi->common; + const int less8x8 = bsize < BLOCK_8X8; + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); + const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); + + MODE_INFO *mi = set_offsets(cm, xd, bsize, mi_row, mi_col, + bw, bh, x_mis, y_mis, bwl, bhl); + + if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { + const BLOCK_SIZE uv_subsize = + ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; + if (uv_subsize == BLOCK_INVALID) + vpx_internal_error(xd->error_info, + VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); + } + + vp9_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); + + if (mi->skip) { + dec_reset_skip_context(xd); + } + + if (!is_inter_block(mi)) { + int plane; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const TX_SIZE tx_size = + plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl) + : mi->tx_size; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int step = (1 << tx_size); + int row, col; + const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? + 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? + 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; + xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + predict_and_reconstruct_intra_block(xd, r, mi, plane, + row, col, tx_size); + } + } else { + // Prediction + dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); + + // Reconstruction + if (!mi->skip) { + int eobtotal = 0; + int plane; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const TX_SIZE tx_size = + plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl) + : mi->tx_size; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int step = (1 << tx_size); + int row, col; + const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? + 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? + 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; + xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + eobtotal += reconstruct_inter_block(xd, r, mi, plane, row, col, + tx_size); + } + + if (!less8x8 && eobtotal == 0) + mi->skip = 1; // skip loopfilter + } + } + + xd->corrupted |= vpx_reader_has_error(r); + + if (cm->lf.filter_level) { + vp9_build_mask(cm, mi, mi_row, mi_col, bw, bh); + } +} + +static INLINE int dec_partition_plane_context(const MACROBLOCKD *xd, + int mi_row, int mi_col, + int bsl) { + const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; + const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; + +// assert(bsl >= 0); + + return (left * 2 + above) + bsl * PARTITION_PLOFFSET; +} + +static INLINE void dec_update_partition_context(MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE subsize, + int bw) { + PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; + PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + + // update the partition context at the end notes. set partition bits + // of block sizes larger than the current one to be one, and partition + // bits of smaller block sizes to be zero. + memset(above_ctx, partition_context_lookup[subsize].above, bw); + memset(left_ctx, partition_context_lookup[subsize].left, bw); +} + +static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col, + vpx_reader *r, + int has_rows, int has_cols, int bsl) { + const int ctx = dec_partition_plane_context(xd, mi_row, mi_col, bsl); + const vpx_prob *const probs = get_partition_probs(xd, ctx); + FRAME_COUNTS *counts = xd->counts; + PARTITION_TYPE p; + + if (has_rows && has_cols) + p = (PARTITION_TYPE)vpx_read_tree(r, vp9_partition_tree, probs); + else if (!has_rows && has_cols) + p = vpx_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; + else if (has_rows && !has_cols) + p = vpx_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT; + else + p = PARTITION_SPLIT; + + if (counts) + ++counts->partition[ctx][p]; + + return p; +} + +// TODO(slavarnway): eliminate bsize and subsize in future commits +static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd, + int mi_row, int mi_col, + vpx_reader* r, BLOCK_SIZE bsize, int n4x4_l2) { + VP9_COMMON *const cm = &pbi->common; + const int n8x8_l2 = n4x4_l2 - 1; + const int num_8x8_wh = 1 << n8x8_l2; + const int hbs = num_8x8_wh >> 1; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + const int has_rows = (mi_row + hbs) < cm->mi_rows; + const int has_cols = (mi_col + hbs) < cm->mi_cols; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + partition = read_partition(xd, mi_row, mi_col, r, has_rows, has_cols, + n8x8_l2); + subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition); + if (!hbs) { + // calculate bmode block dimensions (log 2) + xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); + xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); + decode_block(pbi, xd, mi_row, mi_col, r, subsize, 1, 1); + } else { + switch (partition) { + case PARTITION_NONE: + decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n4x4_l2); + break; + case PARTITION_HORZ: + decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n8x8_l2); + if (has_rows) + decode_block(pbi, xd, mi_row + hbs, mi_col, r, subsize, n4x4_l2, + n8x8_l2); + break; + case PARTITION_VERT: + decode_block(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2, n4x4_l2); + if (has_cols) + decode_block(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2, + n4x4_l2); + break; + case PARTITION_SPLIT: + decode_partition(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2); + decode_partition(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2); + decode_partition(pbi, xd, mi_row + hbs, mi_col, r, subsize, n8x8_l2); + decode_partition(pbi, xd, mi_row + hbs, mi_col + hbs, r, subsize, + n8x8_l2); + break; + default: + assert(0 && "Invalid partition type"); + } + } + + // update partition context + if (bsize >= BLOCK_8X8 && + (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) + dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh); +} + +static void setup_token_decoder(const uint8_t *data, + const uint8_t *data_end, + size_t read_size, + struct vpx_internal_error_info *error_info, + vpx_reader *r, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) { + // Validate the calculated partition length. If the buffer + // described by the partition can't be fully read, then restrict + // it to the portion that can be (for EC mode) or throw an error. + if (!read_is_valid(data, read_size, data_end)) + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt tile length"); + + if (vpx_reader_init(r, data, read_size, decrypt_cb, decrypt_state)) + vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR, + "Failed to allocate bool decoder %d", 1); +} + +static void read_coef_probs_common(vp9_coeff_probs_model *coef_probs, + vpx_reader *r) { + int i, j, k, l, m; + + if (vpx_read_bit(r)) + for (i = 0; i < PLANE_TYPES; ++i) + for (j = 0; j < REF_TYPES; ++j) + for (k = 0; k < COEF_BANDS; ++k) + for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) + for (m = 0; m < UNCONSTRAINED_NODES; ++m) + vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]); +} + +static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, + vpx_reader *r) { + const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; + TX_SIZE tx_size; + for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) + read_coef_probs_common(fc->coef_probs[tx_size], r); +} + +static void setup_segmentation(struct segmentation *seg, + struct vpx_read_bit_buffer *rb) { + int i, j; + + seg->update_map = 0; + seg->update_data = 0; + + seg->enabled = vpx_rb_read_bit(rb); + if (!seg->enabled) + return; + + // Segmentation map update + seg->update_map = vpx_rb_read_bit(rb); + if (seg->update_map) { + for (i = 0; i < SEG_TREE_PROBS; i++) + seg->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) + : MAX_PROB; + + seg->temporal_update = vpx_rb_read_bit(rb); + if (seg->temporal_update) { + for (i = 0; i < PREDICTION_PROBS; i++) + seg->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) + : MAX_PROB; + } else { + for (i = 0; i < PREDICTION_PROBS; i++) + seg->pred_probs[i] = MAX_PROB; + } + } + + // Segmentation data update + seg->update_data = vpx_rb_read_bit(rb); + if (seg->update_data) { + seg->abs_delta = vpx_rb_read_bit(rb); + + vp9_clearall_segfeatures(seg); + + for (i = 0; i < MAX_SEGMENTS; i++) { + for (j = 0; j < SEG_LVL_MAX; j++) { + int data = 0; + const int feature_enabled = vpx_rb_read_bit(rb); + if (feature_enabled) { + vp9_enable_segfeature(seg, i, j); + data = decode_unsigned_max(rb, vp9_seg_feature_data_max(j)); + if (vp9_is_segfeature_signed(j)) + data = vpx_rb_read_bit(rb) ? -data : data; + } + vp9_set_segdata(seg, i, j, data); + } + } + } +} + +static void setup_loopfilter(struct loopfilter *lf, + struct vpx_read_bit_buffer *rb) { + lf->filter_level = vpx_rb_read_literal(rb, 6); + lf->sharpness_level = vpx_rb_read_literal(rb, 3); + + // Read in loop filter deltas applied at the MB level based on mode or ref + // frame. + lf->mode_ref_delta_update = 0; + + lf->mode_ref_delta_enabled = vpx_rb_read_bit(rb); + if (lf->mode_ref_delta_enabled) { + lf->mode_ref_delta_update = vpx_rb_read_bit(rb); + if (lf->mode_ref_delta_update) { + int i; + + for (i = 0; i < MAX_REF_LF_DELTAS; i++) + if (vpx_rb_read_bit(rb)) + lf->ref_deltas[i] = vpx_rb_read_signed_literal(rb, 6); + + for (i = 0; i < MAX_MODE_LF_DELTAS; i++) + if (vpx_rb_read_bit(rb)) + lf->mode_deltas[i] = vpx_rb_read_signed_literal(rb, 6); + } + } +} + +static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) { + return vpx_rb_read_bit(rb) ? vpx_rb_read_signed_literal(rb, 4) : 0; +} + +static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd, + struct vpx_read_bit_buffer *rb) { + cm->base_qindex = vpx_rb_read_literal(rb, QINDEX_BITS); + cm->y_dc_delta_q = read_delta_q(rb); + cm->uv_dc_delta_q = read_delta_q(rb); + cm->uv_ac_delta_q = read_delta_q(rb); + cm->dequant_bit_depth = cm->bit_depth; + xd->lossless = cm->base_qindex == 0 && + cm->y_dc_delta_q == 0 && + cm->uv_dc_delta_q == 0 && + cm->uv_ac_delta_q == 0; + +#if CONFIG_VP9_HIGHBITDEPTH + xd->bd = (int)cm->bit_depth; +#endif +} + +static void setup_segmentation_dequant(VP9_COMMON *const cm) { + // Build y/uv dequant values based on segmentation. + if (cm->seg.enabled) { + int i; + for (i = 0; i < MAX_SEGMENTS; ++i) { + const int qindex = vp9_get_qindex(&cm->seg, i, cm->base_qindex); + cm->y_dequant[i][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, + cm->bit_depth); + cm->y_dequant[i][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); + cm->uv_dequant[i][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, + cm->bit_depth); + cm->uv_dequant[i][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, + cm->bit_depth); + } + } else { + const int qindex = cm->base_qindex; + // When segmentation is disabled, only the first value is used. The + // remaining are don't cares. + cm->y_dequant[0][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, cm->bit_depth); + cm->y_dequant[0][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); + cm->uv_dequant[0][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, + cm->bit_depth); + cm->uv_dequant[0][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, + cm->bit_depth); + } +} + +static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) { + const INTERP_FILTER literal_to_filter[] = { EIGHTTAP_SMOOTH, + EIGHTTAP, + EIGHTTAP_SHARP, + BILINEAR }; + return vpx_rb_read_bit(rb) ? SWITCHABLE + : literal_to_filter[vpx_rb_read_literal(rb, 2)]; +} + +static void setup_render_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { + cm->render_width = cm->width; + cm->render_height = cm->height; + if (vpx_rb_read_bit(rb)) + vp9_read_frame_size(rb, &cm->render_width, &cm->render_height); +} + +static void resize_mv_buffer(VP9_COMMON *cm) { + vpx_free(cm->cur_frame->mvs); + cm->cur_frame->mi_rows = cm->mi_rows; + cm->cur_frame->mi_cols = cm->mi_cols; + CHECK_MEM_ERROR(cm, cm->cur_frame->mvs, + (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(*cm->cur_frame->mvs))); +} + +static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { +#if CONFIG_SIZE_LIMIT + if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Dimensions of %dx%d beyond allowed size of %dx%d.", + width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT); +#endif + if (cm->width != width || cm->height != height) { + const int new_mi_rows = + ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2; + const int new_mi_cols = + ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2; + + // Allocations in vp9_alloc_context_buffers() depend on individual + // dimensions as well as the overall size. + if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) { + if (vp9_alloc_context_buffers(cm, width, height)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate context buffers"); + } else { + vp9_set_mb_mi(cm, width, height); + } + vp9_init_context_buffers(cm); + cm->width = width; + cm->height = height; + } + if (cm->cur_frame->mvs == NULL || cm->mi_rows > cm->cur_frame->mi_rows || + cm->mi_cols > cm->cur_frame->mi_cols) { + resize_mv_buffer(cm); + } +} + +static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { + int width, height; + BufferPool *const pool = cm->buffer_pool; + vp9_read_frame_size(rb, &width, &height); + resize_context_buffers(cm, width, height); + setup_render_size(cm, rb); + + lock_buffer_pool(pool); + if (vpx_realloc_frame_buffer( + get_frame_new_buffer(cm), cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_DEC_BORDER_IN_PIXELS, + cm->byte_alignment, + &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, + pool->cb_priv)) { + unlock_buffer_pool(pool); + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffer"); + } + unlock_buffer_pool(pool); + + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; + pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; + pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; + pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; + pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; +} + +static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth, + int ref_xss, int ref_yss, + vpx_bit_depth_t this_bit_depth, + int this_xss, int this_yss) { + return ref_bit_depth == this_bit_depth && ref_xss == this_xss && + ref_yss == this_yss; +} + +static void setup_frame_size_with_refs(VP9_COMMON *cm, + struct vpx_read_bit_buffer *rb) { + int width, height; + int found = 0, i; + int has_valid_ref_frame = 0; + BufferPool *const pool = cm->buffer_pool; + for (i = 0; i < REFS_PER_FRAME; ++i) { + if (vpx_rb_read_bit(rb)) { + if (cm->frame_refs[i].idx != INVALID_IDX) { + YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf; + width = buf->y_crop_width; + height = buf->y_crop_height; + found = 1; + break; + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Failed to decode frame size"); + } + } + } + + if (!found) + vp9_read_frame_size(rb, &width, &height); + + if (width <= 0 || height <= 0) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid frame size"); + + // Check to make sure at least one of frames that this frame references + // has valid dimensions. + for (i = 0; i < REFS_PER_FRAME; ++i) { + RefBuffer *const ref_frame = &cm->frame_refs[i]; + has_valid_ref_frame |= (ref_frame->idx != INVALID_IDX && + valid_ref_frame_size(ref_frame->buf->y_crop_width, + ref_frame->buf->y_crop_height, + width, height)); + } + if (!has_valid_ref_frame) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Referenced frame has invalid size"); + for (i = 0; i < REFS_PER_FRAME; ++i) { + RefBuffer *const ref_frame = &cm->frame_refs[i]; + if (ref_frame->idx == INVALID_IDX || + !valid_ref_frame_img_fmt(ref_frame->buf->bit_depth, + ref_frame->buf->subsampling_x, + ref_frame->buf->subsampling_y, + cm->bit_depth, + cm->subsampling_x, + cm->subsampling_y)) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Referenced frame has incompatible color format"); + } + + resize_context_buffers(cm, width, height); + setup_render_size(cm, rb); + + lock_buffer_pool(pool); + if (vpx_realloc_frame_buffer( + get_frame_new_buffer(cm), cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_DEC_BORDER_IN_PIXELS, + cm->byte_alignment, + &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, + pool->cb_priv)) { + unlock_buffer_pool(pool); + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffer"); + } + unlock_buffer_pool(pool); + + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; + pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; + pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; + pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; + pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; +} + +static void setup_tile_info(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { + int min_log2_tile_cols, max_log2_tile_cols, max_ones; + vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); + + // columns + max_ones = max_log2_tile_cols - min_log2_tile_cols; + cm->log2_tile_cols = min_log2_tile_cols; + while (max_ones-- && vpx_rb_read_bit(rb)) + cm->log2_tile_cols++; + + if (cm->log2_tile_cols > 6) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid number of tile columns"); + + // rows + cm->log2_tile_rows = vpx_rb_read_bit(rb); + if (cm->log2_tile_rows) + cm->log2_tile_rows += vpx_rb_read_bit(rb); +} + +// Reads the next tile returning its size and adjusting '*data' accordingly +// based on 'is_last'. +static void get_tile_buffer(const uint8_t *const data_end, + int is_last, + struct vpx_internal_error_info *error_info, + const uint8_t **data, + vpx_decrypt_cb decrypt_cb, void *decrypt_state, + TileBuffer *buf) { + size_t size; + + if (!is_last) { + if (!read_is_valid(*data, 4, data_end)) + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt tile length"); + + if (decrypt_cb) { + uint8_t be_data[4]; + decrypt_cb(decrypt_state, *data, be_data, 4); + size = mem_get_be32(be_data); + } else { + size = mem_get_be32(*data); + } + *data += 4; + + if (size > (size_t)(data_end - *data)) + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt tile size"); + } else { + size = data_end - *data; + } + + buf->data = *data; + buf->size = size; + + *data += size; +} + +static void get_tile_buffers(VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + int tile_cols, int tile_rows, + TileBuffer (*tile_buffers)[1 << 6]) { + int r, c; + + for (r = 0; r < tile_rows; ++r) { + for (c = 0; c < tile_cols; ++c) { + const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1); + TileBuffer *const buf = &tile_buffers[r][c]; + buf->col = c; + get_tile_buffer(data_end, is_last, &pbi->common.error, &data, + pbi->decrypt_cb, pbi->decrypt_state, buf); + } + } +} + +static const uint8_t *decode_tiles(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { + VP9_COMMON *const cm = &pbi->common; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + TileBuffer tile_buffers[4][1 << 6]; + int tile_row, tile_col; + int mi_row, mi_col; + TileWorkerData *tile_data = NULL; + + if (cm->lf.filter_level && !cm->skip_loop_filter && + pbi->lf_worker.data1 == NULL) { + CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, + vpx_memalign(32, sizeof(LFWorkerData))); + pbi->lf_worker.hook = (VPxWorkerHook)vp9_loop_filter_worker; + if (pbi->max_threads > 1 && !winterface->reset(&pbi->lf_worker)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Loop filter thread creation failed"); + } + } + + if (cm->lf.filter_level && !cm->skip_loop_filter) { + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + // Be sure to sync as we might be resuming after a failed frame decode. + winterface->sync(&pbi->lf_worker); + vp9_loop_filter_data_reset(lf_data, get_frame_new_buffer(cm), cm, + pbi->mb.plane); + } + + assert(tile_rows <= 4); + assert(tile_cols <= (1 << 6)); + + // Note: this memset assumes above_context[0], [1] and [2] + // are allocated as part of the same buffer. + memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); + + memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_cols); + + vp9_reset_lfm(cm); + + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); + + // Load all tile information into tile_data. + for (tile_row = 0; tile_row < tile_rows; ++tile_row) { + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + const TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; + tile_data = pbi->tile_worker_data + tile_cols * tile_row + tile_col; + tile_data->xd = pbi->mb; + tile_data->xd.corrupted = 0; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? NULL : &cm->counts; + vp9_zero(tile_data->dqcoeff); + vp9_tile_init(&tile_data->xd.tile, cm, tile_row, tile_col); + setup_token_decoder(buf->data, data_end, buf->size, &cm->error, + &tile_data->bit_reader, pbi->decrypt_cb, + pbi->decrypt_state); + vp9_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); + } + } + + for (tile_row = 0; tile_row < tile_rows; ++tile_row) { + TileInfo tile; + vp9_tile_set_row(&tile, cm, tile_row); + for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; + mi_row += MI_BLOCK_SIZE) { + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + const int col = pbi->inv_tile_order ? + tile_cols - tile_col - 1 : tile_col; + tile_data = pbi->tile_worker_data + tile_cols * tile_row + col; + vp9_tile_set_col(&tile, cm, col); + vp9_zero(tile_data->xd.left_context); + vp9_zero(tile_data->xd.left_seg_context); + for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; + mi_col += MI_BLOCK_SIZE) { + decode_partition(pbi, &tile_data->xd, mi_row, + mi_col, &tile_data->bit_reader, BLOCK_64X64, 4); + } + pbi->mb.corrupted |= tile_data->xd.corrupted; + if (pbi->mb.corrupted) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Failed to decode tile data"); + } + // Loopfilter one row. + if (cm->lf.filter_level && !cm->skip_loop_filter) { + const int lf_start = mi_row - MI_BLOCK_SIZE; + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + + // delay the loopfilter by 1 macroblock row. + if (lf_start < 0) continue; + + // decoding has completed: finish up the loop filter in this thread. + if (mi_row + MI_BLOCK_SIZE >= cm->mi_rows) continue; + + winterface->sync(&pbi->lf_worker); + lf_data->start = lf_start; + lf_data->stop = mi_row; + if (pbi->max_threads > 1) { + winterface->launch(&pbi->lf_worker); + } else { + winterface->execute(&pbi->lf_worker); + } + } + // After loopfiltering, the last 7 row pixels in each superblock row may + // still be changed by the longest loopfilter of the next superblock + // row. + if (pbi->frame_parallel_decode) + vp9_frameworker_broadcast(pbi->cur_buf, + mi_row << MI_BLOCK_SIZE_LOG2); + } + } + + // Loopfilter remaining rows in the frame. + if (cm->lf.filter_level && !cm->skip_loop_filter) { + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + winterface->sync(&pbi->lf_worker); + lf_data->start = lf_data->stop; + lf_data->stop = cm->mi_rows; + winterface->execute(&pbi->lf_worker); + } + + // Get last tile data. + tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1; + + if (pbi->frame_parallel_decode) + vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX); + return vpx_reader_find_end(&tile_data->bit_reader); +} + +// On entry 'tile_data->data_end' points to the end of the input frame, on exit +// it is updated to reflect the bitreader position of the final tile column if +// present in the tile buffer group or NULL otherwise. +static int tile_worker_hook(TileWorkerData *const tile_data, + VP9Decoder *const pbi) { + TileInfo *volatile tile = &tile_data->xd.tile; + const int final_col = (1 << pbi->common.log2_tile_cols) - 1; + const uint8_t *volatile bit_reader_end = NULL; + volatile int n = tile_data->buf_start; + tile_data->error_info.setjmp = 1; + + if (setjmp(tile_data->error_info.jmp)) { + tile_data->error_info.setjmp = 0; + tile_data->xd.corrupted = 1; + tile_data->data_end = NULL; + return 0; + } + + tile_data->xd.error_info = &tile_data->error_info; + tile_data->xd.corrupted = 0; + + do { + int mi_row, mi_col; + const TileBuffer *const buf = pbi->tile_buffers + n; + vp9_zero(tile_data->dqcoeff); + vp9_tile_init(tile, &pbi->common, 0, buf->col); + setup_token_decoder(buf->data, tile_data->data_end, buf->size, + &tile_data->error_info, &tile_data->bit_reader, + pbi->decrypt_cb, pbi->decrypt_state); + vp9_init_macroblockd(&pbi->common, &tile_data->xd, tile_data->dqcoeff); + + for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; + mi_row += MI_BLOCK_SIZE) { + vp9_zero(tile_data->xd.left_context); + vp9_zero(tile_data->xd.left_seg_context); + for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; + mi_col += MI_BLOCK_SIZE) { + decode_partition(pbi, &tile_data->xd, mi_row, mi_col, + &tile_data->bit_reader, BLOCK_64X64, 4); + } + } + + if (buf->col == final_col) { + bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader); + } + } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end); + + tile_data->data_end = bit_reader_end; + return !tile_data->xd.corrupted; +} + +// sorts in descending order +static int compare_tile_buffers(const void *a, const void *b) { + const TileBuffer *const buf1 = (const TileBuffer*)a; + const TileBuffer *const buf2 = (const TileBuffer*)b; + return (int)(buf2->size - buf1->size); +} + +static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { + VP9_COMMON *const cm = &pbi->common; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + const uint8_t *bit_reader_end = NULL; + const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + const int num_workers = VPXMIN(pbi->max_threads, tile_cols); + int n; + + assert(tile_cols <= (1 << 6)); + assert(tile_rows == 1); + (void)tile_rows; + + if (pbi->num_tile_workers == 0) { + const int num_threads = pbi->max_threads; + CHECK_MEM_ERROR(cm, pbi->tile_workers, + vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); + for (n = 0; n < num_threads; ++n) { + VPxWorker *const worker = &pbi->tile_workers[n]; + ++pbi->num_tile_workers; + + winterface->init(worker); + if (n < num_threads - 1 && !winterface->reset(worker)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Tile decoder thread creation failed"); + } + } + } + + // Reset tile decoding hook + for (n = 0; n < num_workers; ++n) { + VPxWorker *const worker = &pbi->tile_workers[n]; + TileWorkerData *const tile_data = + &pbi->tile_worker_data[n + pbi->total_tiles]; + winterface->sync(worker); + tile_data->xd = pbi->mb; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; + worker->hook = (VPxWorkerHook)tile_worker_hook; + worker->data1 = tile_data; + worker->data2 = pbi; + } + + // Note: this memset assumes above_context[0], [1] and [2] + // are allocated as part of the same buffer. + memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); + memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_mi_cols); + + vp9_reset_lfm(cm); + + // Load tile data into tile_buffers + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, + &pbi->tile_buffers); + + // Sort the buffers based on size in descending order. + qsort(pbi->tile_buffers, tile_cols, sizeof(pbi->tile_buffers[0]), + compare_tile_buffers); + + if (num_workers == tile_cols) { + // Rearrange the tile buffers such that the largest, and + // presumably the most difficult, tile will be decoded in the main thread. + // This should help minimize the number of instances where the main thread + // is waiting for a worker to complete. + const TileBuffer largest = pbi->tile_buffers[0]; + memmove(pbi->tile_buffers, pbi->tile_buffers + 1, + (tile_cols - 1) * sizeof(pbi->tile_buffers[0])); + pbi->tile_buffers[tile_cols - 1] = largest; + } else { + int start = 0, end = tile_cols - 2; + TileBuffer tmp; + + // Interleave the tiles to distribute the load between threads, assuming a + // larger tile implies it is more difficult to decode. + while (start < end) { + tmp = pbi->tile_buffers[start]; + pbi->tile_buffers[start] = pbi->tile_buffers[end]; + pbi->tile_buffers[end] = tmp; + start += 2; + end -= 2; + } + } + + // Initialize thread frame counts. + if (!cm->frame_parallel_decoding_mode) { + for (n = 0; n < num_workers; ++n) { + TileWorkerData *const tile_data = + (TileWorkerData*)pbi->tile_workers[n].data1; + vp9_zero(tile_data->counts); + } + } + + { + const int base = tile_cols / num_workers; + const int remain = tile_cols % num_workers; + int buf_start = 0; + + for (n = 0; n < num_workers; ++n) { + const int count = base + (remain + n) / num_workers; + VPxWorker *const worker = &pbi->tile_workers[n]; + TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; + + tile_data->buf_start = buf_start; + tile_data->buf_end = buf_start + count - 1; + tile_data->data_end = data_end; + buf_start += count; + + worker->had_error = 0; + if (n == num_workers - 1) { + assert(tile_data->buf_end == tile_cols - 1); + winterface->execute(worker); + } else { + winterface->launch(worker); + } + } + + for (; n > 0; --n) { + VPxWorker *const worker = &pbi->tile_workers[n - 1]; + TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; + // TODO(jzern): The tile may have specific error data associated with + // its vpx_internal_error_info which could be propagated to the main info + // in cm. Additionally once the threads have been synced and an error is + // detected, there's no point in continuing to decode tiles. + pbi->mb.corrupted |= !winterface->sync(worker); + if (!bit_reader_end) bit_reader_end = tile_data->data_end; + } + } + + // Accumulate thread frame counts. + if (!cm->frame_parallel_decoding_mode) { + for (n = 0; n < num_workers; ++n) { + TileWorkerData *const tile_data = + (TileWorkerData*)pbi->tile_workers[n].data1; + vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1); + } + } + + assert(bit_reader_end || pbi->mb.corrupted); + return bit_reader_end; +} + +static void error_handler(void *data) { + VP9_COMMON *const cm = (VP9_COMMON *)data; + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); +} + +static void read_bitdepth_colorspace_sampling( + VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { + if (cm->profile >= PROFILE_2) { + cm->bit_depth = vpx_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10; +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth = 1; +#endif + } else { + cm->bit_depth = VPX_BITS_8; +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth = 0; +#endif + } + cm->color_space = vpx_rb_read_literal(rb, 3); + if (cm->color_space != VPX_CS_SRGB) { + cm->color_range = (vpx_color_range_t)vpx_rb_read_bit(rb); + if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { + cm->subsampling_x = vpx_rb_read_bit(rb); + cm->subsampling_y = vpx_rb_read_bit(rb); + if (cm->subsampling_x == 1 && cm->subsampling_y == 1) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "4:2:0 color not supported in profile 1 or 3"); + if (vpx_rb_read_bit(rb)) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Reserved bit set"); + } else { + cm->subsampling_y = cm->subsampling_x = 1; + } + } else { + cm->color_range = VPX_CR_FULL_RANGE; + if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { + // Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed. + // 4:2:2 or 4:4:0 chroma sampling is not allowed. + cm->subsampling_y = cm->subsampling_x = 0; + if (vpx_rb_read_bit(rb)) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Reserved bit set"); + } else { + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "4:4:4 color not supported in profile 0 or 2"); + } + } +} + +static size_t read_uncompressed_header(VP9Decoder *pbi, + struct vpx_read_bit_buffer *rb) { + VP9_COMMON *const cm = &pbi->common; + BufferPool *const pool = cm->buffer_pool; + RefCntBuffer *const frame_bufs = pool->frame_bufs; + int i, mask, ref_index = 0; + size_t sz; + + cm->last_frame_type = cm->frame_type; + cm->last_intra_only = cm->intra_only; + + if (vpx_rb_read_literal(rb, 2) != VP9_FRAME_MARKER) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid frame marker"); + + cm->profile = vp9_read_profile(rb); +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->profile >= MAX_PROFILES) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Unsupported bitstream profile"); +#else + if (cm->profile >= PROFILE_2) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Unsupported bitstream profile"); +#endif + + cm->show_existing_frame = vpx_rb_read_bit(rb); + if (cm->show_existing_frame) { + // Show an existing frame directly. + const int frame_to_show = cm->ref_frame_map[vpx_rb_read_literal(rb, 3)]; + lock_buffer_pool(pool); + if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) { + unlock_buffer_pool(pool); + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Buffer %d does not contain a decoded frame", + frame_to_show); + } + + ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show); + unlock_buffer_pool(pool); + pbi->refresh_frame_flags = 0; + cm->lf.filter_level = 0; + cm->show_frame = 1; + + if (pbi->frame_parallel_decode) { + for (i = 0; i < REF_FRAMES; ++i) + cm->next_ref_frame_map[i] = cm->ref_frame_map[i]; + } + return 0; + } + + cm->frame_type = (FRAME_TYPE) vpx_rb_read_bit(rb); + cm->show_frame = vpx_rb_read_bit(rb); + cm->error_resilient_mode = vpx_rb_read_bit(rb); + + if (cm->frame_type == KEY_FRAME) { + if (!vp9_read_sync_code(rb)) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid frame sync code"); + + read_bitdepth_colorspace_sampling(cm, rb); + pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1; + + for (i = 0; i < REFS_PER_FRAME; ++i) { + cm->frame_refs[i].idx = INVALID_IDX; + cm->frame_refs[i].buf = NULL; + } + + setup_frame_size(cm, rb); + if (pbi->need_resync) { + memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + pbi->need_resync = 0; + } + } else { + cm->intra_only = cm->show_frame ? 0 : vpx_rb_read_bit(rb); + + cm->reset_frame_context = cm->error_resilient_mode ? + 0 : vpx_rb_read_literal(rb, 2); + + if (cm->intra_only) { + if (!vp9_read_sync_code(rb)) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid frame sync code"); + if (cm->profile > PROFILE_0) { + read_bitdepth_colorspace_sampling(cm, rb); + } else { + // NOTE: The intra-only frame header does not include the specification + // of either the color format or color sub-sampling in profile 0. VP9 + // specifies that the default color format should be YUV 4:2:0 in this + // case (normative). + cm->color_space = VPX_CS_BT_601; + cm->color_range = VPX_CR_STUDIO_RANGE; + cm->subsampling_y = cm->subsampling_x = 1; + cm->bit_depth = VPX_BITS_8; +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth = 0; +#endif + } + + pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES); + setup_frame_size(cm, rb); + if (pbi->need_resync) { + memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + pbi->need_resync = 0; + } + } else if (pbi->need_resync != 1) { /* Skip if need resync */ + pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES); + for (i = 0; i < REFS_PER_FRAME; ++i) { + const int ref = vpx_rb_read_literal(rb, REF_FRAMES_LOG2); + const int idx = cm->ref_frame_map[ref]; + RefBuffer *const ref_frame = &cm->frame_refs[i]; + ref_frame->idx = idx; + ref_frame->buf = &frame_bufs[idx].buf; + cm->ref_frame_sign_bias[LAST_FRAME + i] = vpx_rb_read_bit(rb); + } + + setup_frame_size_with_refs(cm, rb); + + cm->allow_high_precision_mv = vpx_rb_read_bit(rb); + cm->interp_filter = read_interp_filter(rb); + + for (i = 0; i < REFS_PER_FRAME; ++i) { + RefBuffer *const ref_buf = &cm->frame_refs[i]; +#if CONFIG_VP9_HIGHBITDEPTH + vp9_setup_scale_factors_for_frame(&ref_buf->sf, + ref_buf->buf->y_crop_width, + ref_buf->buf->y_crop_height, + cm->width, cm->height, + cm->use_highbitdepth); +#else + vp9_setup_scale_factors_for_frame(&ref_buf->sf, + ref_buf->buf->y_crop_width, + ref_buf->buf->y_crop_height, + cm->width, cm->height); +#endif + } + } + } +#if CONFIG_VP9_HIGHBITDEPTH + get_frame_new_buffer(cm)->bit_depth = cm->bit_depth; +#endif + get_frame_new_buffer(cm)->color_space = cm->color_space; + get_frame_new_buffer(cm)->color_range = cm->color_range; + get_frame_new_buffer(cm)->render_width = cm->render_width; + get_frame_new_buffer(cm)->render_height = cm->render_height; + + if (pbi->need_resync) { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Keyframe / intra-only frame required to reset decoder" + " state"); + } + + if (!cm->error_resilient_mode) { + cm->refresh_frame_context = vpx_rb_read_bit(rb); + cm->frame_parallel_decoding_mode = vpx_rb_read_bit(rb); + if (!cm->frame_parallel_decoding_mode) + vp9_zero(cm->counts); + } else { + cm->refresh_frame_context = 0; + cm->frame_parallel_decoding_mode = 1; + } + + // This flag will be overridden by the call to vp9_setup_past_independence + // below, forcing the use of context 0 for those frame types. + cm->frame_context_idx = vpx_rb_read_literal(rb, FRAME_CONTEXTS_LOG2); + + // Generate next_ref_frame_map. + lock_buffer_pool(pool); + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { + if (mask & 1) { + cm->next_ref_frame_map[ref_index] = cm->new_fb_idx; + ++frame_bufs[cm->new_fb_idx].ref_count; + } else { + cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; + } + // Current thread holds the reference frame. + if (cm->ref_frame_map[ref_index] >= 0) + ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; + ++ref_index; + } + + for (; ref_index < REF_FRAMES; ++ref_index) { + cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; + // Current thread holds the reference frame. + if (cm->ref_frame_map[ref_index] >= 0) + ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; + } + unlock_buffer_pool(pool); + pbi->hold_ref_buf = 1; + + if (frame_is_intra_only(cm) || cm->error_resilient_mode) + vp9_setup_past_independence(cm); + + setup_loopfilter(&cm->lf, rb); + setup_quantization(cm, &pbi->mb, rb); + setup_segmentation(&cm->seg, rb); + setup_segmentation_dequant(cm); + + setup_tile_info(cm, rb); + sz = vpx_rb_read_literal(rb, 16); + + if (sz == 0) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid header size"); + + return sz; +} + +static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, + size_t partition_size) { + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + FRAME_CONTEXT *const fc = cm->fc; + vpx_reader r; + int k; + + if (vpx_reader_init(&r, data, partition_size, pbi->decrypt_cb, + pbi->decrypt_state)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate bool decoder 0"); + + cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r); + if (cm->tx_mode == TX_MODE_SELECT) + read_tx_mode_probs(&fc->tx_probs, &r); + read_coef_probs(fc, cm->tx_mode, &r); + + for (k = 0; k < SKIP_CONTEXTS; ++k) + vp9_diff_update_prob(&r, &fc->skip_probs[k]); + + if (!frame_is_intra_only(cm)) { + nmv_context *const nmvc = &fc->nmvc; + int i, j; + + read_inter_mode_probs(fc, &r); + + if (cm->interp_filter == SWITCHABLE) + read_switchable_interp_probs(fc, &r); + + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) + vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]); + + cm->reference_mode = read_frame_reference_mode(cm, &r); + if (cm->reference_mode != SINGLE_REFERENCE) + setup_compound_reference_mode(cm); + read_frame_reference_mode_probs(cm, &r); + + for (j = 0; j < BLOCK_SIZE_GROUPS; j++) + for (i = 0; i < INTRA_MODES - 1; ++i) + vp9_diff_update_prob(&r, &fc->y_mode_prob[j][i]); + + for (j = 0; j < PARTITION_CONTEXTS; ++j) + for (i = 0; i < PARTITION_TYPES - 1; ++i) + vp9_diff_update_prob(&r, &fc->partition_prob[j][i]); + + read_mv_probs(nmvc, cm->allow_high_precision_mv, &r); + } + + return vpx_reader_has_error(&r); +} + +static struct vpx_read_bit_buffer *init_read_bit_buffer( + VP9Decoder *pbi, + struct vpx_read_bit_buffer *rb, + const uint8_t *data, + const uint8_t *data_end, + uint8_t clear_data[MAX_VP9_HEADER_SIZE]) { + rb->bit_offset = 0; + rb->error_handler = error_handler; + rb->error_handler_data = &pbi->common; + if (pbi->decrypt_cb) { + const int n = (int)VPXMIN(MAX_VP9_HEADER_SIZE, data_end - data); + pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n); + rb->bit_buffer = clear_data; + rb->bit_buffer_end = clear_data + n; + } else { + rb->bit_buffer = data; + rb->bit_buffer_end = data_end; + } + return rb; +} + +//------------------------------------------------------------------------------ + +int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb) { + return vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 && + vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 && + vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2; +} + +void vp9_read_frame_size(struct vpx_read_bit_buffer *rb, + int *width, int *height) { + *width = vpx_rb_read_literal(rb, 16) + 1; + *height = vpx_rb_read_literal(rb, 16) + 1; +} + +BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb) { + int profile = vpx_rb_read_bit(rb); + profile |= vpx_rb_read_bit(rb) << 1; + if (profile > 2) + profile += vpx_rb_read_bit(rb); + return (BITSTREAM_PROFILE) profile; +} + +void vp9_decode_frame(VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + const uint8_t **p_data_end) { + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + struct vpx_read_bit_buffer rb; + int context_updated = 0; + uint8_t clear_data[MAX_VP9_HEADER_SIZE]; + const size_t first_partition_size = read_uncompressed_header(pbi, + init_read_bit_buffer(pbi, &rb, data, data_end, clear_data)); + const int tile_rows = 1 << cm->log2_tile_rows; + const int tile_cols = 1 << cm->log2_tile_cols; + YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); + xd->cur_buf = new_fb; + + if (!first_partition_size) { + // showing a frame directly + *p_data_end = data + (cm->profile <= PROFILE_2 ? 1 : 2); + return; + } + + data += vpx_rb_bytes_read(&rb); + if (!read_is_valid(data, first_partition_size, data_end)) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt header length"); + + cm->use_prev_frame_mvs = !cm->error_resilient_mode && + cm->width == cm->last_width && + cm->height == cm->last_height && + !cm->last_intra_only && + cm->last_show_frame && + (cm->last_frame_type != KEY_FRAME); + + vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); + + *cm->fc = cm->frame_contexts[cm->frame_context_idx]; + if (!cm->fc->initialized) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Uninitialized entropy context."); + + xd->corrupted = 0; + new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); + if (new_fb->corrupted) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Decode failed. Frame data header is corrupted."); + + if (cm->lf.filter_level && !cm->skip_loop_filter) { + vp9_loop_filter_frame_init(cm, cm->lf.filter_level); + } + + // If encoded in frame parallel mode, frame context is ready after decoding + // the frame header. + if (pbi->frame_parallel_decode && cm->frame_parallel_decoding_mode) { + VPxWorker *const worker = pbi->frame_worker_owner; + FrameWorkerData *const frame_worker_data = worker->data1; + if (cm->refresh_frame_context) { + context_updated = 1; + cm->frame_contexts[cm->frame_context_idx] = *cm->fc; + } + vp9_frameworker_lock_stats(worker); + pbi->cur_buf->row = -1; + pbi->cur_buf->col = -1; + frame_worker_data->frame_context_ready = 1; + // Signal the main thread that context is ready. + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); + } + + if (pbi->tile_worker_data == NULL || + (tile_cols * tile_rows) != pbi->total_tiles) { + const int num_tile_workers = tile_cols * tile_rows + + ((pbi->max_threads > 1) ? pbi->max_threads : 0); + const size_t twd_size = num_tile_workers * sizeof(*pbi->tile_worker_data); + // Ensure tile data offsets will be properly aligned. This may fail on + // platforms without DECLARE_ALIGNED(). + assert((sizeof(*pbi->tile_worker_data) % 16) == 0); + vpx_free(pbi->tile_worker_data); + CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, twd_size)); + pbi->total_tiles = tile_rows * tile_cols; + } + + if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) { + // Multi-threaded tile decoder + *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); + if (!xd->corrupted) { + if (!cm->skip_loop_filter) { + // If multiple threads are used to decode tiles, then we use those + // threads to do parallel loopfiltering. + vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, + cm->lf.filter_level, 0, 0, pbi->tile_workers, + pbi->num_tile_workers, &pbi->lf_row_sync); + } + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Decode failed. Frame data is corrupted."); + } + } else { + *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); + } + + if (!xd->corrupted) { + if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { + vp9_adapt_coef_probs(cm); + + if (!frame_is_intra_only(cm)) { + vp9_adapt_mode_probs(cm); + vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); + } + } + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Decode failed. Frame data is corrupted."); + } + + // Non frame parallel update frame context here. + if (cm->refresh_frame_context && !context_updated) + cm->frame_contexts[cm->frame_context_idx] = *cm->fc; +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h new file mode 100644 index 00000000000..ce33cbdbd91 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_DECODER_VP9_DECODEFRAME_H_ +#define VP9_DECODER_VP9_DECODEFRAME_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "vp9/common/vp9_enums.h" + +struct VP9Decoder; +struct vpx_read_bit_buffer; + +int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb); +void vp9_read_frame_size(struct vpx_read_bit_buffer *rb, + int *width, int *height); +BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb); + +void vp9_decode_frame(struct VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + const uint8_t **p_data_end); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DECODEFRAME_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c new file mode 100644 index 00000000000..ffc6839ad1a --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c @@ -0,0 +1,911 @@ +/* + Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_entropymv.h" +#include "vp9/common/vp9_mvref_common.h" +#include "vp9/common/vp9_pred_common.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_seg_common.h" + +#include "vp9/decoder/vp9_decodemv.h" +#include "vp9/decoder/vp9_decodeframe.h" + +#include "vpx_dsp/vpx_dsp_common.h" + +static PREDICTION_MODE read_intra_mode(vpx_reader *r, const vpx_prob *p) { + return (PREDICTION_MODE)vpx_read_tree(r, vp9_intra_mode_tree, p); +} + +static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, MACROBLOCKD *xd, + vpx_reader *r, int size_group) { + const PREDICTION_MODE y_mode = + read_intra_mode(r, cm->fc->y_mode_prob[size_group]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->y_mode[size_group][y_mode]; + return y_mode; +} + +static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, MACROBLOCKD *xd, + vpx_reader *r, + PREDICTION_MODE y_mode) { + const PREDICTION_MODE uv_mode = read_intra_mode(r, + cm->fc->uv_mode_prob[y_mode]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->uv_mode[y_mode][uv_mode]; + return uv_mode; +} + +static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, MACROBLOCKD *xd, + vpx_reader *r, int ctx) { + const int mode = vpx_read_tree(r, vp9_inter_mode_tree, + cm->fc->inter_mode_probs[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->inter_mode[ctx][mode]; + + return NEARESTMV + mode; +} + +static int read_segment_id(vpx_reader *r, const struct segmentation *seg) { + return vpx_read_tree(r, vp9_segment_tree, seg->tree_probs); +} + +static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, + TX_SIZE max_tx_size, vpx_reader *r) { + FRAME_COUNTS *counts = xd->counts; + const int ctx = get_tx_size_context(xd); + const vpx_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs); + int tx_size = vpx_read(r, tx_probs[0]); + if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { + tx_size += vpx_read(r, tx_probs[1]); + if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) + tx_size += vpx_read(r, tx_probs[2]); + } + + if (counts) + ++get_tx_counts(max_tx_size, ctx, &counts->tx)[tx_size]; + return (TX_SIZE)tx_size; +} + +static INLINE TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, + int allow_select, vpx_reader *r) { + TX_MODE tx_mode = cm->tx_mode; + BLOCK_SIZE bsize = xd->mi[0]->sb_type; + const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; + if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) + return read_selected_tx_size(cm, xd, max_tx_size, r); + else + return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]); +} + +static int dec_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids, + int mi_offset, int x_mis, int y_mis) { + int x, y, segment_id = INT_MAX; + + for (y = 0; y < y_mis; y++) + for (x = 0; x < x_mis; x++) + segment_id = + VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); + + assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); + return segment_id; +} + +static void set_segment_id(VP9_COMMON *cm, int mi_offset, + int x_mis, int y_mis, int segment_id) { + int x, y; + + assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); + + for (y = 0; y < y_mis; y++) + for (x = 0; x < x_mis; x++) + cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id; +} + +static void copy_segment_id(const VP9_COMMON *cm, + const uint8_t *last_segment_ids, + uint8_t *current_segment_ids, + int mi_offset, int x_mis, int y_mis) { + int x, y; + + for (y = 0; y < y_mis; y++) + for (x = 0; x < x_mis; x++) + current_segment_ids[mi_offset + y * cm->mi_cols + x] = last_segment_ids ? + last_segment_ids[mi_offset + y * cm->mi_cols + x] : 0; +} + +static int read_intra_segment_id(VP9_COMMON *const cm, int mi_offset, + int x_mis, int y_mis, + vpx_reader *r) { + struct segmentation *const seg = &cm->seg; + int segment_id; + + if (!seg->enabled) + return 0; // Default for disabled segmentation + + if (!seg->update_map) { + copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map, + mi_offset, x_mis, y_mis); + return 0; + } + + segment_id = read_segment_id(r, seg); + set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); + return segment_id; +} + +static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { + struct segmentation *const seg = &cm->seg; + MODE_INFO *const mi = xd->mi[0]; + int predicted_segment_id, segment_id; + const int mi_offset = mi_row * cm->mi_cols + mi_col; + + if (!seg->enabled) + return 0; // Default for disabled segmentation + + predicted_segment_id = cm->last_frame_seg_map ? + dec_get_segment_id(cm, cm->last_frame_seg_map, mi_offset, x_mis, y_mis) : + 0; + + if (!seg->update_map) { + copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map, + mi_offset, x_mis, y_mis); + return predicted_segment_id; + } + + if (seg->temporal_update) { + const vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd); + mi->seg_id_predicted = vpx_read(r, pred_prob); + segment_id = mi->seg_id_predicted ? predicted_segment_id + : read_segment_id(r, seg); + } else { + segment_id = read_segment_id(r, seg); + } + set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); + return segment_id; +} + +static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, + int segment_id, vpx_reader *r) { + if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { + return 1; + } else { + const int ctx = vp9_get_skip_context(xd); + const int skip = vpx_read(r, cm->fc->skip_probs[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->skip[ctx][skip]; + return skip; + } +} + +static void read_intra_frame_mode_info(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { + MODE_INFO *const mi = xd->mi[0]; + const MODE_INFO *above_mi = xd->above_mi; + const MODE_INFO *left_mi = xd->left_mi; + const BLOCK_SIZE bsize = mi->sb_type; + int i; + const int mi_offset = mi_row * cm->mi_cols + mi_col; + + mi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r); + mi->skip = read_skip(cm, xd, mi->segment_id, r); + mi->tx_size = read_tx_size(cm, xd, 1, r); + mi->ref_frame[0] = INTRA_FRAME; + mi->ref_frame[1] = NONE; + + switch (bsize) { + case BLOCK_4X4: + for (i = 0; i < 4; ++i) + mi->bmi[i].as_mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i)); + mi->mode = mi->bmi[3].as_mode; + break; + case BLOCK_4X8: + mi->bmi[0].as_mode = mi->bmi[2].as_mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); + mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1)); + break; + case BLOCK_8X4: + mi->bmi[0].as_mode = mi->bmi[1].as_mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); + mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2)); + break; + default: + mi->mode = read_intra_mode(r, + get_y_mode_probs(mi, above_mi, left_mi, 0)); + } + + mi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mi->mode]); +} + +static int read_mv_component(vpx_reader *r, + const nmv_component *mvcomp, int usehp) { + int mag, d, fr, hp; + const int sign = vpx_read(r, mvcomp->sign); + const int mv_class = vpx_read_tree(r, vp9_mv_class_tree, mvcomp->classes); + const int class0 = mv_class == MV_CLASS_0; + + // Integer part + if (class0) { + d = vpx_read_tree(r, vp9_mv_class0_tree, mvcomp->class0); + mag = 0; + } else { + int i; + const int n = mv_class + CLASS0_BITS - 1; // number of bits + + d = 0; + for (i = 0; i < n; ++i) + d |= vpx_read(r, mvcomp->bits[i]) << i; + mag = CLASS0_SIZE << (mv_class + 2); + } + + // Fractional part + fr = vpx_read_tree(r, vp9_mv_fp_tree, class0 ? mvcomp->class0_fp[d] + : mvcomp->fp); + + // High precision part (if hp is not used, the default value of the hp is 1) + hp = usehp ? vpx_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp) + : 1; + + // Result + mag += ((d << 3) | (fr << 1) | hp) + 1; + return sign ? -mag : mag; +} + +static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref, + const nmv_context *ctx, + nmv_context_counts *counts, int allow_hp) { + const MV_JOINT_TYPE joint_type = + (MV_JOINT_TYPE)vpx_read_tree(r, vp9_mv_joint_tree, ctx->joints); + const int use_hp = allow_hp && use_mv_hp(ref); + MV diff = {0, 0}; + + if (mv_joint_vertical(joint_type)) + diff.row = read_mv_component(r, &ctx->comps[0], use_hp); + + if (mv_joint_horizontal(joint_type)) + diff.col = read_mv_component(r, &ctx->comps[1], use_hp); + + vp9_inc_mv(&diff, counts); + + mv->row = ref->row + diff.row; + mv->col = ref->col + diff.col; +} + +static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm, + const MACROBLOCKD *xd, + vpx_reader *r) { + if (cm->reference_mode == REFERENCE_MODE_SELECT) { + const int ctx = vp9_get_reference_mode_context(cm, xd); + const REFERENCE_MODE mode = + (REFERENCE_MODE)vpx_read(r, cm->fc->comp_inter_prob[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->comp_inter[ctx][mode]; + return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE + } else { + return cm->reference_mode; + } +} + +// Read the referncence frame +static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, + vpx_reader *r, + int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { + FRAME_CONTEXT *const fc = cm->fc; + FRAME_COUNTS *counts = xd->counts; + + if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { + ref_frame[0] = (MV_REFERENCE_FRAME)get_segdata(&cm->seg, segment_id, + SEG_LVL_REF_FRAME); + ref_frame[1] = NONE; + } else { + const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r); + // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding + if (mode == COMPOUND_REFERENCE) { + const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); + const int bit = vpx_read(r, fc->comp_ref_prob[ctx]); + if (counts) + ++counts->comp_ref[ctx][bit]; + ref_frame[idx] = cm->comp_fixed_ref; + ref_frame[!idx] = cm->comp_var_ref[bit]; + } else if (mode == SINGLE_REFERENCE) { + const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); + const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]); + if (counts) + ++counts->single_ref[ctx0][0][bit0]; + if (bit0) { + const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); + const int bit1 = vpx_read(r, fc->single_ref_prob[ctx1][1]); + if (counts) + ++counts->single_ref[ctx1][1][bit1]; + ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; + } else { + ref_frame[0] = LAST_FRAME; + } + + ref_frame[1] = NONE; + } else { + assert(0 && "Invalid prediction mode."); + } + } +} + +// TODO(slavarnway): Move this decoder version of +// vp9_get_pred_context_switchable_interp() to vp9_pred_common.h and update the +// encoder. +// +// Returns a context number for the given MB prediction signal +static int dec_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + const MODE_INFO *const left_mi = xd->left_mi; + const int left_type = left_mi ? left_mi->interp_filter : SWITCHABLE_FILTERS; + const MODE_INFO *const above_mi = xd->above_mi; + const int above_type = above_mi ? above_mi->interp_filter + : SWITCHABLE_FILTERS; + + if (left_type == above_type) + return left_type; + else if (left_type == SWITCHABLE_FILTERS) + return above_type; + else if (above_type == SWITCHABLE_FILTERS) + return left_type; + else + return SWITCHABLE_FILTERS; +} + +static INLINE INTERP_FILTER read_switchable_interp_filter( + VP9_COMMON *const cm, MACROBLOCKD *const xd, + vpx_reader *r) { + const int ctx = dec_get_pred_context_switchable_interp(xd); + const INTERP_FILTER type = + (INTERP_FILTER)vpx_read_tree(r, vp9_switchable_interp_tree, + cm->fc->switchable_interp_prob[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->switchable_interp[ctx][type]; + return type; +} + +static void read_intra_block_mode_info(VP9_COMMON *const cm, + MACROBLOCKD *const xd, MODE_INFO *mi, + vpx_reader *r) { + const BLOCK_SIZE bsize = mi->sb_type; + int i; + + switch (bsize) { + case BLOCK_4X4: + for (i = 0; i < 4; ++i) + mi->bmi[i].as_mode = read_intra_mode_y(cm, xd, r, 0); + mi->mode = mi->bmi[3].as_mode; + break; + case BLOCK_4X8: + mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, xd, + r, 0); + mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = + read_intra_mode_y(cm, xd, r, 0); + break; + case BLOCK_8X4: + mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, xd, + r, 0); + mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = + read_intra_mode_y(cm, xd, r, 0); + break; + default: + mi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]); + } + + mi->uv_mode = read_intra_mode_uv(cm, xd, r, mi->mode); + + // Initialize interp_filter here so we do not have to check for inter block + // modes in dec_get_pred_context_switchable_interp() + mi->interp_filter = SWITCHABLE_FILTERS; + + mi->ref_frame[0] = INTRA_FRAME; + mi->ref_frame[1] = NONE; +} + +static INLINE int is_mv_valid(const MV *mv) { + return mv->row > MV_LOW && mv->row < MV_UPP && + mv->col > MV_LOW && mv->col < MV_UPP; +} + +static INLINE void copy_mv_pair(int_mv *dst, const int_mv *src) { + memcpy(dst, src, sizeof(*dst) * 2); +} + +static INLINE void zero_mv_pair(int_mv *dst) { + memset(dst, 0, sizeof(*dst) * 2); +} + +static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd, + PREDICTION_MODE mode, + int_mv mv[2], int_mv ref_mv[2], + int_mv near_nearest_mv[2], + int is_compound, int allow_hp, vpx_reader *r) { + int i; + int ret = 1; + + switch (mode) { + case NEWMV: { + FRAME_COUNTS *counts = xd->counts; + nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL; + for (i = 0; i < 1 + is_compound; ++i) { + read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts, + allow_hp); + ret = ret && is_mv_valid(&mv[i].as_mv); + } + break; + } + case NEARMV: + case NEARESTMV: { + copy_mv_pair(mv, near_nearest_mv); + break; + } + case ZEROMV: { + zero_mv_pair(mv); + break; + } + default: { + return 0; + } + } + return ret; +} + +static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int segment_id, vpx_reader *r) { + if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { + return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME; + } else { + const int ctx = get_intra_inter_context(xd); + const int is_inter = vpx_read(r, cm->fc->intra_inter_prob[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->intra_inter[ctx][is_inter]; + return is_inter; + } +} + +static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv, + int refmv_count) { + int i; + + // Make sure all the candidates are properly clamped etc + for (i = 0; i < refmv_count; ++i) { + lower_mv_precision(&mvlist[i].as_mv, allow_hp); + *best_mv = mvlist[i]; + } +} + +static void fpm_sync(void *const data, int mi_row) { + VP9Decoder *const pbi = (VP9Decoder *)data; + vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame, + mi_row << MI_BLOCK_SIZE_LOG2); +} + +// This macro is used to add a motion vector mv_ref list if it isn't +// already in the list. If it's the second motion vector or early_break +// it will also skip all additional processing and jump to Done! +#define ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done) \ + do { \ + if (refmv_count) { \ + if ((mv).as_int != (mv_ref_list)[0].as_int) { \ + (mv_ref_list)[(refmv_count)] = (mv); \ + refmv_count++; \ + goto Done; \ + } \ + } else { \ + (mv_ref_list)[(refmv_count)++] = (mv); \ + if (early_break) \ + goto Done; \ + } \ + } while (0) + +// If either reference frame is different, not INTRA, and they +// are different from each other scale and add the mv to our list. +#define IF_DIFF_REF_FRAME_ADD_MV_EB(mbmi, ref_frame, ref_sign_bias, \ + refmv_count, mv_ref_list, Done) \ + do { \ + if (is_inter_block(mbmi)) { \ + if ((mbmi)->ref_frame[0] != ref_frame) \ + ADD_MV_REF_LIST_EB(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, Done); \ + if (has_second_ref(mbmi) && \ + (mbmi)->ref_frame[1] != ref_frame && \ + (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ + ADD_MV_REF_LIST_EB(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, Done); \ + } \ + } while (0) + +// This function searches the neighborhood of a given MB/SB +// to try and find candidate reference vectors. +static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, + PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, + const POSITION *const mv_ref_search, + int_mv *mv_ref_list, + int mi_row, int mi_col, int block, int is_sub8x8, + find_mv_refs_sync sync, void *const data) { + const int *ref_sign_bias = cm->ref_frame_sign_bias; + int i, refmv_count = 0; + int different_ref_found = 0; + const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? + cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; + const TileInfo *const tile = &xd->tile; + // If mode is nearestmv or newmv (uses nearestmv as a reference) then stop + // searching after the first mv is found. + const int early_break = (mode != NEARMV); + + // Blank the reference vector list + memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); + + i = 0; + if (is_sub8x8) { + // If the size < 8x8 we get the mv from the bmi substructure for the + // nearest two blocks. + for (i = 0; i < 2; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + different_ref_found = 1; + + if (candidate_mi->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST_EB( + get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), + refmv_count, mv_ref_list, Done); + else if (candidate_mi->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST_EB( + get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), + refmv_count, mv_ref_list, Done); + } + } + } + + // Check the rest of the neighbors in much the same way + // as before except we don't need to keep track of sub blocks or + // mode counts. + for (; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + different_ref_found = 1; + + if (candidate->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST_EB(candidate->mv[0], refmv_count, mv_ref_list, Done); + else if (candidate->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST_EB(candidate->mv[1], refmv_count, mv_ref_list, Done); + } + } + + // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast + // on windows platform. The sync here is unnecessary if use_prev_frame_mvs + // is 0. But after removing it, there will be hang in the unit test on windows + // due to several threads waiting for a thread's signal. +#if defined(_WIN32) && !HAVE_PTHREAD_H + if (cm->frame_parallel_decode && sync != NULL) { + sync(data, mi_row); + } +#endif + + // Check the last frame's mode and mv info. + if (prev_frame_mvs) { + // Synchronize here for frame parallel decode if sync function is provided. + if (cm->frame_parallel_decode && sync != NULL) { + sync(data, mi_row); + } + + if (prev_frame_mvs->ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); + } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { + ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); + } + } + + // Since we couldn't find 2 mvs from the same reference frame + // go back through the neighbors and find motion vectors from + // different reference frames. + if (different_ref_found) { + for (i = 0; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + + // If the candidate is INTRA we don't want to consider its mv. + IF_DIFF_REF_FRAME_ADD_MV_EB(candidate, ref_frame, ref_sign_bias, + refmv_count, mv_ref_list, Done); + } + } + } + + // Since we still don't have a candidate we'll try the last frame. + if (prev_frame_mvs) { + if (prev_frame_mvs->ref_frame[0] != ref_frame && + prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { + int_mv mv = prev_frame_mvs->mv[0]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); + } + + if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && + prev_frame_mvs->ref_frame[1] != ref_frame && + prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { + int_mv mv = prev_frame_mvs->mv[1]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); + } + } + + if (mode == NEARMV) + refmv_count = MAX_MV_REF_CANDIDATES; + else + // we only care about the nearestmv for the remaining modes + refmv_count = 1; + + Done: + // Clamp vectors + for (i = 0; i < refmv_count; ++i) + clamp_mv_ref(&mv_ref_list[i].as_mv, xd); + + return refmv_count; +} + +static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, + const POSITION *const mv_ref_search, + PREDICTION_MODE b_mode, int block, + int ref, int mi_row, int mi_col, + int_mv *best_sub8x8) { + int_mv mv_list[MAX_MV_REF_CANDIDATES]; + MODE_INFO *const mi = xd->mi[0]; + b_mode_info *bmi = mi->bmi; + int n; + int refmv_count; + + assert(MAX_MV_REF_CANDIDATES == 2); + + refmv_count = dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], + mv_ref_search, mv_list, mi_row, mi_col, block, + 1, NULL, NULL); + + switch (block) { + case 0: + best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; + break; + case 1: + case 2: + if (b_mode == NEARESTMV) { + best_sub8x8->as_int = bmi[0].as_mv[ref].as_int; + } else { + best_sub8x8->as_int = 0; + for (n = 0; n < refmv_count; ++n) + if (bmi[0].as_mv[ref].as_int != mv_list[n].as_int) { + best_sub8x8->as_int = mv_list[n].as_int; + break; + } + } + break; + case 3: + if (b_mode == NEARESTMV) { + best_sub8x8->as_int = bmi[2].as_mv[ref].as_int; + } else { + int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; + candidates[0] = bmi[1].as_mv[ref]; + candidates[1] = bmi[0].as_mv[ref]; + candidates[2] = mv_list[0]; + candidates[3] = mv_list[1]; + best_sub8x8->as_int = 0; + for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) + if (bmi[2].as_mv[ref].as_int != candidates[n].as_int) { + best_sub8x8->as_int = candidates[n].as_int; + break; + } + } + break; + default: + assert(0 && "Invalid block index."); + } +} + +static uint8_t get_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd, + const POSITION *const mv_ref_search, + int mi_row, int mi_col) { + int i; + int context_counter = 0; + const TileInfo *const tile = &xd->tile; + + // Get mode count from nearest 2 blocks + for (i = 0; i < 2; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + // Keep counts for entropy encoding. + context_counter += mode_2_counter[candidate->mode]; + } + } + + return counter_to_context[context_counter]; +} + +static void read_inter_block_mode_info(VP9Decoder *const pbi, + MACROBLOCKD *const xd, + MODE_INFO *const mi, + int mi_row, int mi_col, vpx_reader *r) { + VP9_COMMON *const cm = &pbi->common; + const BLOCK_SIZE bsize = mi->sb_type; + const int allow_hp = cm->allow_high_precision_mv; + int_mv best_ref_mvs[2]; + int ref, is_compound; + uint8_t inter_mode_ctx; + const POSITION *const mv_ref_search = mv_ref_blocks[bsize]; + + read_ref_frames(cm, xd, r, mi->segment_id, mi->ref_frame); + is_compound = has_second_ref(mi); + inter_mode_ctx = get_mode_context(cm, xd, mv_ref_search, mi_row, mi_col); + + if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { + mi->mode = ZEROMV; + if (bsize < BLOCK_8X8) { + vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid usage of segement feature on small blocks"); + return; + } + } else { + if (bsize >= BLOCK_8X8) + mi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx); + else + // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV. + // Setting mode to NEARESTMV forces the search to stop after the nearestmv + // has been found. After b_modes have been read, mode will be overwritten + // by the last b_mode. + mi->mode = NEARESTMV; + + if (mi->mode != ZEROMV) { + for (ref = 0; ref < 1 + is_compound; ++ref) { + int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; + const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; + int refmv_count; + + refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, + tmp_mvs, mi_row, mi_col, -1, 0, + fpm_sync, (void *)pbi); + + dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref], + refmv_count); + } + } + } + + mi->interp_filter = (cm->interp_filter == SWITCHABLE) + ? read_switchable_interp_filter(cm, xd, r) + : cm->interp_filter; + + if (bsize < BLOCK_8X8) { + const int num_4x4_w = 1 << xd->bmode_blocks_wl; + const int num_4x4_h = 1 << xd->bmode_blocks_hl; + int idx, idy; + PREDICTION_MODE b_mode; + int_mv best_sub8x8[2]; + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { + const int j = idy * 2 + idx; + b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx); + + if (b_mode == NEARESTMV || b_mode == NEARMV) { + for (ref = 0; ref < 1 + is_compound; ++ref) + append_sub8x8_mvs_for_idx(cm, xd, mv_ref_search, b_mode, j, ref, + mi_row, mi_col, &best_sub8x8[ref]); + } + + if (!assign_mv(cm, xd, b_mode, mi->bmi[j].as_mv, best_ref_mvs, + best_sub8x8, is_compound, allow_hp, r)) { + xd->corrupted |= 1; + break; + } + + if (num_4x4_h == 2) + mi->bmi[j + 2] = mi->bmi[j]; + if (num_4x4_w == 2) + mi->bmi[j + 1] = mi->bmi[j]; + } + } + + mi->mode = b_mode; + + copy_mv_pair(mi->mv, mi->bmi[3].as_mv); + } else { + xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs, + best_ref_mvs, is_compound, allow_hp, r); + } +} + +static void read_inter_frame_mode_info(VP9Decoder *const pbi, + MACROBLOCKD *const xd, + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { + VP9_COMMON *const cm = &pbi->common; + MODE_INFO *const mi = xd->mi[0]; + int inter_block; + + mi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r, x_mis, + y_mis); + mi->skip = read_skip(cm, xd, mi->segment_id, r); + inter_block = read_is_inter_block(cm, xd, mi->segment_id, r); + mi->tx_size = read_tx_size(cm, xd, !mi->skip || !inter_block, r); + + if (inter_block) + read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r); + else + read_intra_block_mode_info(cm, xd, mi, r); +} + +static INLINE void copy_ref_frame_pair(MV_REFERENCE_FRAME *dst, + const MV_REFERENCE_FRAME *src) { + memcpy(dst, src, sizeof(*dst) * 2); +} + +void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { + VP9_COMMON *const cm = &pbi->common; + MODE_INFO *const mi = xd->mi[0]; + MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; + int w, h; + + if (frame_is_intra_only(cm)) { + read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r, x_mis, y_mis); + } else { + read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); + + for (h = 0; h < y_mis; ++h) { + for (w = 0; w < x_mis; ++w) { + MV_REF *const mv = frame_mvs + w; + copy_ref_frame_pair(mv->ref_frame, mi->ref_frame); + copy_mv_pair(mv->mv, mi->mv); + } + frame_mvs += cm->mi_cols; + } + } +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH + if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && + (xd->above_mi == NULL || xd->left_mi == NULL) && + !is_inter_block(mi) && need_top_left[mi->uv_mode]) + assert(0); +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h new file mode 100644 index 00000000000..45569ec81f5 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_DECODER_VP9_DECODEMV_H_ +#define VP9_DECODER_VP9_DECODEMV_H_ + +#include "vpx_dsp/bitreader.h" + +#include "vp9/decoder/vp9_decoder.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DECODEMV_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decoder.c b/thirdparty/libvpx/vp9/decoder/vp9_decoder.c new file mode 100644 index 00000000000..935c04f3aa5 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decoder.c @@ -0,0 +1,518 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "./vpx_scale_rtcd.h" + +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/system_state.h" +#include "vpx_ports/vpx_once.h" +#include "vpx_ports/vpx_timer.h" +#include "vpx_scale/vpx_scale.h" +#include "vpx_util/vpx_thread.h" + +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/common/vp9_loopfilter.h" +#include "vp9/common/vp9_onyxc_int.h" +#if CONFIG_VP9_POSTPROC +#include "vp9/common/vp9_postproc.h" +#endif +#include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_reconintra.h" + +#include "vp9/decoder/vp9_decodeframe.h" +#include "vp9/decoder/vp9_decoder.h" +#include "vp9/decoder/vp9_detokenize.h" + +static void initialize_dec(void) { + static volatile int init_done = 0; + + if (!init_done) { + vp9_rtcd(); + vpx_dsp_rtcd(); + vpx_scale_rtcd(); + vp9_init_intra_predictors(); + init_done = 1; + } +} + +static void vp9_dec_setup_mi(VP9_COMMON *cm) { + cm->mi = cm->mip + cm->mi_stride + 1; + cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; + memset(cm->mi_grid_base, 0, + cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); +} + +static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) { + cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip)); + if (!cm->mip) + return 1; + cm->mi_alloc_size = mi_size; + cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO*)); + if (!cm->mi_grid_base) + return 1; + return 0; +} + +static void vp9_dec_free_mi(VP9_COMMON *cm) { + vpx_free(cm->mip); + cm->mip = NULL; + vpx_free(cm->mi_grid_base); + cm->mi_grid_base = NULL; +} + +VP9Decoder *vp9_decoder_create(BufferPool *const pool) { + VP9Decoder *volatile const pbi = vpx_memalign(32, sizeof(*pbi)); + VP9_COMMON *volatile const cm = pbi ? &pbi->common : NULL; + + if (!cm) + return NULL; + + vp9_zero(*pbi); + + if (setjmp(cm->error.jmp)) { + cm->error.setjmp = 0; + vp9_decoder_remove(pbi); + return NULL; + } + + cm->error.setjmp = 1; + + CHECK_MEM_ERROR(cm, cm->fc, + (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc))); + CHECK_MEM_ERROR(cm, cm->frame_contexts, + (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, + sizeof(*cm->frame_contexts))); + + pbi->need_resync = 1; + once(initialize_dec); + + // Initialize the references to not point to any frame buffers. + memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); + + cm->current_video_frame = 0; + pbi->ready_for_new_data = 1; + pbi->common.buffer_pool = pool; + + cm->bit_depth = VPX_BITS_8; + cm->dequant_bit_depth = VPX_BITS_8; + + cm->alloc_mi = vp9_dec_alloc_mi; + cm->free_mi = vp9_dec_free_mi; + cm->setup_mi = vp9_dec_setup_mi; + + vp9_loop_filter_init(cm); + + cm->error.setjmp = 0; + + vpx_get_worker_interface()->init(&pbi->lf_worker); + + return pbi; +} + +void vp9_decoder_remove(VP9Decoder *pbi) { + int i; + + if (!pbi) + return; + + vpx_get_worker_interface()->end(&pbi->lf_worker); + vpx_free(pbi->lf_worker.data1); + + for (i = 0; i < pbi->num_tile_workers; ++i) { + VPxWorker *const worker = &pbi->tile_workers[i]; + vpx_get_worker_interface()->end(worker); + } + + vpx_free(pbi->tile_worker_data); + vpx_free(pbi->tile_workers); + + if (pbi->num_tile_workers > 0) { + vp9_loop_filter_dealloc(&pbi->lf_row_sync); + } + + vpx_free(pbi); +} + +static int equal_dimensions(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b) { + return a->y_height == b->y_height && a->y_width == b->y_width && + a->uv_height == b->uv_height && a->uv_width == b->uv_width; +} + +vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd) { + VP9_COMMON *cm = &pbi->common; + + /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the + * encoder is using the frame buffers for. This is just a stub to keep the + * vpxenc --test-decode functionality working, and will be replaced in a + * later commit that adds VP9-specific controls for this functionality. + */ + if (ref_frame_flag == VP9_LAST_FLAG) { + const YV12_BUFFER_CONFIG *const cfg = get_ref_frame(cm, 0); + if (cfg == NULL) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "No 'last' reference frame"); + return VPX_CODEC_ERROR; + } + if (!equal_dimensions(cfg, sd)) + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Incorrect buffer dimensions"); + else + vp8_yv12_copy_frame(cfg, sd); + } else { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Invalid reference frame"); + } + + return cm->error.error_code; +} + + +vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd) { + RefBuffer *ref_buf = NULL; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + + // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the + // encoder is using the frame buffers for. This is just a stub to keep the + // vpxenc --test-decode functionality working, and will be replaced in a + // later commit that adds VP9-specific controls for this functionality. + if (ref_frame_flag == VP9_LAST_FLAG) { + ref_buf = &cm->frame_refs[0]; + } else if (ref_frame_flag == VP9_GOLD_FLAG) { + ref_buf = &cm->frame_refs[1]; + } else if (ref_frame_flag == VP9_ALT_FLAG) { + ref_buf = &cm->frame_refs[2]; + } else { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Invalid reference frame"); + return cm->error.error_code; + } + + if (!equal_dimensions(ref_buf->buf, sd)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Incorrect buffer dimensions"); + } else { + int *ref_fb_ptr = &ref_buf->idx; + + // Find an empty frame buffer. + const int free_fb = get_free_fb(cm); + if (cm->new_fb_idx == INVALID_IDX) { + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Unable to find free frame buffer"); + return cm->error.error_code; + } + + // Decrease ref_count since it will be increased again in + // ref_cnt_fb() below. + --frame_bufs[free_fb].ref_count; + + // Manage the reference counters and copy image. + ref_cnt_fb(frame_bufs, ref_fb_ptr, free_fb); + ref_buf->buf = &frame_bufs[*ref_fb_ptr].buf; + vp8_yv12_copy_frame(sd, ref_buf->buf); + } + + return cm->error.error_code; +} + +/* If any buffer updating is signaled it should be done here. */ +static void swap_frame_buffers(VP9Decoder *pbi) { + int ref_index = 0, mask; + VP9_COMMON *const cm = &pbi->common; + BufferPool *const pool = cm->buffer_pool; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + + lock_buffer_pool(pool); + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { + const int old_idx = cm->ref_frame_map[ref_index]; + // Current thread releases the holding of reference frame. + decrease_ref_count(old_idx, frame_bufs, pool); + + // Release the reference frame in reference map. + if (mask & 1) { + decrease_ref_count(old_idx, frame_bufs, pool); + } + cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; + ++ref_index; + } + + // Current thread releases the holding of reference frame. + for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { + const int old_idx = cm->ref_frame_map[ref_index]; + decrease_ref_count(old_idx, frame_bufs, pool); + cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; + } + unlock_buffer_pool(pool); + pbi->hold_ref_buf = 0; + cm->frame_to_show = get_frame_new_buffer(cm); + + if (!pbi->frame_parallel_decode || !cm->show_frame) { + lock_buffer_pool(pool); + --frame_bufs[cm->new_fb_idx].ref_count; + unlock_buffer_pool(pool); + } + + // Invalidate these references until the next frame starts. + for (ref_index = 0; ref_index < 3; ref_index++) + cm->frame_refs[ref_index].idx = -1; +} + +int vp9_receive_compressed_data(VP9Decoder *pbi, + size_t size, const uint8_t **psource) { + VP9_COMMON *volatile const cm = &pbi->common; + BufferPool *volatile const pool = cm->buffer_pool; + RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs; + const uint8_t *source = *psource; + int retcode = 0; + cm->error.error_code = VPX_CODEC_OK; + + if (size == 0) { + // This is used to signal that we are missing frames. + // We do not know if the missing frame(s) was supposed to update + // any of the reference buffers, but we act conservative and + // mark only the last buffer as corrupted. + // + // TODO(jkoleszar): Error concealment is undefined and non-normative + // at this point, but if it becomes so, [0] may not always be the correct + // thing to do here. + if (cm->frame_refs[0].idx > 0) { + assert(cm->frame_refs[0].buf != NULL); + cm->frame_refs[0].buf->corrupted = 1; + } + } + + pbi->ready_for_new_data = 0; + + // Check if the previous frame was a frame without any references to it. + // Release frame buffer if not decoding in frame parallel mode. + if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 + && frame_bufs[cm->new_fb_idx].ref_count == 0) + pool->release_fb_cb(pool->cb_priv, + &frame_bufs[cm->new_fb_idx].raw_frame_buffer); + // Find a free frame buffer. Return error if can not find any. + cm->new_fb_idx = get_free_fb(cm); + if (cm->new_fb_idx == INVALID_IDX) { + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Unable to find free frame buffer"); + return cm->error.error_code; + } + + // Assign a MV array to the frame buffer. + cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; + + pbi->hold_ref_buf = 0; + if (pbi->frame_parallel_decode) { + VPxWorker *const worker = pbi->frame_worker_owner; + vp9_frameworker_lock_stats(worker); + frame_bufs[cm->new_fb_idx].frame_worker_owner = worker; + // Reset decoding progress. + pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; + pbi->cur_buf->row = -1; + pbi->cur_buf->col = -1; + vp9_frameworker_unlock_stats(worker); + } else { + pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; + } + + + if (setjmp(cm->error.jmp)) { + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + int i; + + cm->error.setjmp = 0; + pbi->ready_for_new_data = 1; + + // Synchronize all threads immediately as a subsequent decode call may + // cause a resize invalidating some allocations. + winterface->sync(&pbi->lf_worker); + for (i = 0; i < pbi->num_tile_workers; ++i) { + winterface->sync(&pbi->tile_workers[i]); + } + + lock_buffer_pool(pool); + // Release all the reference buffers if worker thread is holding them. + if (pbi->hold_ref_buf == 1) { + int ref_index = 0, mask; + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { + const int old_idx = cm->ref_frame_map[ref_index]; + // Current thread releases the holding of reference frame. + decrease_ref_count(old_idx, frame_bufs, pool); + + // Release the reference frame in reference map. + if (mask & 1) { + decrease_ref_count(old_idx, frame_bufs, pool); + } + ++ref_index; + } + + // Current thread releases the holding of reference frame. + for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { + const int old_idx = cm->ref_frame_map[ref_index]; + decrease_ref_count(old_idx, frame_bufs, pool); + } + pbi->hold_ref_buf = 0; + } + // Release current frame. + decrease_ref_count(cm->new_fb_idx, frame_bufs, pool); + unlock_buffer_pool(pool); + + vpx_clear_system_state(); + return -1; + } + + cm->error.setjmp = 1; + vp9_decode_frame(pbi, source, source + size, psource); + + swap_frame_buffers(pbi); + + vpx_clear_system_state(); + + if (!cm->show_existing_frame) { + cm->last_show_frame = cm->show_frame; + cm->prev_frame = cm->cur_frame; + if (cm->seg.enabled && !pbi->frame_parallel_decode) + vp9_swap_current_and_last_seg_map(cm); + } + + // Update progress in frame parallel decode. + if (pbi->frame_parallel_decode) { + // Need to lock the mutex here as another thread may + // be accessing this buffer. + VPxWorker *const worker = pbi->frame_worker_owner; + FrameWorkerData *const frame_worker_data = worker->data1; + vp9_frameworker_lock_stats(worker); + + if (cm->show_frame) { + cm->current_video_frame++; + } + frame_worker_data->frame_decoded = 1; + frame_worker_data->frame_context_ready = 1; + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); + } else { + cm->last_width = cm->width; + cm->last_height = cm->height; + if (cm->show_frame) { + cm->current_video_frame++; + } + } + + cm->error.setjmp = 0; + return retcode; +} + +int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, + vp9_ppflags_t *flags) { + VP9_COMMON *const cm = &pbi->common; + int ret = -1; +#if !CONFIG_VP9_POSTPROC + (void)*flags; +#endif + + if (pbi->ready_for_new_data == 1) + return ret; + + pbi->ready_for_new_data = 1; + + /* no raw frame to show!!! */ + if (!cm->show_frame) + return ret; + + pbi->ready_for_new_data = 1; + +#if CONFIG_VP9_POSTPROC + if (!cm->show_existing_frame) { + ret = vp9_post_proc_frame(cm, sd, flags); + } else { + *sd = *cm->frame_to_show; + ret = 0; + } +#else + *sd = *cm->frame_to_show; + ret = 0; +#endif /*!CONFIG_POSTPROC*/ + vpx_clear_system_state(); + return ret; +} + +vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, + size_t data_sz, + uint32_t sizes[8], int *count, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) { + // A chunk ending with a byte matching 0xc0 is an invalid chunk unless + // it is a super frame index. If the last byte of real video compression + // data is 0xc0 the encoder must add a 0 byte. If we have the marker but + // not the associated matching marker byte at the front of the index we have + // an invalid bitstream and need to return an error. + + uint8_t marker; + + assert(data_sz); + marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1); + *count = 0; + + if ((marker & 0xe0) == 0xc0) { + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const size_t index_sz = 2 + mag * frames; + + // This chunk is marked as having a superframe index but doesn't have + // enough data for it, thus it's an invalid superframe index. + if (data_sz < index_sz) + return VPX_CODEC_CORRUPT_FRAME; + + { + const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state, + data + data_sz - index_sz); + + // This chunk is marked as having a superframe index but doesn't have + // the matching marker byte at the front of the index therefore it's an + // invalid chunk. + if (marker != marker2) + return VPX_CODEC_CORRUPT_FRAME; + } + + { + // Found a valid superframe index. + uint32_t i, j; + const uint8_t *x = &data[data_sz - index_sz + 1]; + + // Frames has a maximum of 8 and mag has a maximum of 4. + uint8_t clear_buffer[32]; + assert(sizeof(clear_buffer) >= frames * mag); + if (decrypt_cb) { + decrypt_cb(decrypt_state, x, clear_buffer, frames * mag); + x = clear_buffer; + } + + for (i = 0; i < frames; ++i) { + uint32_t this_sz = 0; + + for (j = 0; j < mag; ++j) + this_sz |= ((uint32_t)(*x++)) << (j * 8); + sizes[i] = this_sz; + } + *count = frames; + } + } + return VPX_CODEC_OK; +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decoder.h b/thirdparty/libvpx/vp9/decoder/vp9_decoder.h new file mode 100644 index 00000000000..7111a36d370 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decoder.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_DECODER_VP9_DECODER_H_ +#define VP9_DECODER_VP9_DECODER_H_ + +#include "./vpx_config.h" + +#include "vpx/vpx_codec.h" +#include "vpx_dsp/bitreader.h" +#include "vpx_scale/yv12config.h" +#include "vpx_util/vpx_thread.h" + +#include "vp9/common/vp9_thread_common.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_ppflags.h" +#include "vp9/decoder/vp9_dthread.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct TileBuffer { + const uint8_t *data; + size_t size; + int col; // only used with multi-threaded decoding +} TileBuffer; + +typedef struct TileWorkerData { + const uint8_t *data_end; + int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive + vpx_reader bit_reader; + FRAME_COUNTS counts; + DECLARE_ALIGNED(16, MACROBLOCKD, xd); + /* dqcoeff are shared by all the planes. So planes must be decoded serially */ + DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); + struct vpx_internal_error_info error_info; +} TileWorkerData; + +typedef struct VP9Decoder { + DECLARE_ALIGNED(16, MACROBLOCKD, mb); + + DECLARE_ALIGNED(16, VP9_COMMON, common); + + int ready_for_new_data; + + int refresh_frame_flags; + + int frame_parallel_decode; // frame-based threading. + + // TODO(hkuang): Combine this with cur_buf in macroblockd as they are + // the same. + RefCntBuffer *cur_buf; // Current decoding frame buffer. + + VPxWorker *frame_worker_owner; // frame_worker that owns this pbi. + VPxWorker lf_worker; + VPxWorker *tile_workers; + TileWorkerData *tile_worker_data; + TileBuffer tile_buffers[64]; + int num_tile_workers; + int total_tiles; + + VP9LfSync lf_row_sync; + + vpx_decrypt_cb decrypt_cb; + void *decrypt_state; + + int max_threads; + int inv_tile_order; + int need_resync; // wait for key/intra-only frame. + int hold_ref_buf; // hold the reference buffer. +} VP9Decoder; + +int vp9_receive_compressed_data(struct VP9Decoder *pbi, + size_t size, const uint8_t **dest); + +int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, + vp9_ppflags_t *flags); + +vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd); + +vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd); + +static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb, + void *decrypt_state, + const uint8_t *data) { + if (decrypt_cb) { + uint8_t marker; + decrypt_cb(decrypt_state, data, &marker, 1); + return marker; + } + return *data; +} + +// This function is exposed for use in tests, as well as the inlined function +// "read_marker". +vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, + size_t data_sz, + uint32_t sizes[8], int *count, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state); + +struct VP9Decoder *vp9_decoder_create(BufferPool *const pool); + +void vp9_decoder_remove(struct VP9Decoder *pbi); + +static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, + BufferPool *const pool) { + if (idx >= 0 && frame_bufs[idx].ref_count > 0) { + --frame_bufs[idx].ref_count; + // A worker may only get a free framebuffer index when calling get_free_fb. + // But the private buffer is not set up until finish decoding header. + // So any error happens during decoding header, the frame_bufs will not + // have valid priv buffer. + if (frame_bufs[idx].ref_count == 0 && + frame_bufs[idx].raw_frame_buffer.priv) { + pool->release_fb_cb(pool->cb_priv, &frame_bufs[idx].raw_frame_buffer); + } + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DECODER_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c new file mode 100644 index 00000000000..47dc107fe21 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_entropy.h" +#if CONFIG_COEFFICIENT_RANGE_CHECKING +#include "vp9/common/vp9_idct.h" +#endif + +#include "vp9/decoder/vp9_detokenize.h" + +#define EOB_CONTEXT_NODE 0 +#define ZERO_CONTEXT_NODE 1 +#define ONE_CONTEXT_NODE 2 + +#define INCREMENT_COUNT(token) \ + do { \ + if (counts) \ + ++coef_counts[band][ctx][token]; \ + } while (0) + +static INLINE int read_coeff(const vpx_prob *probs, int n, vpx_reader *r) { + int i, val = 0; + for (i = 0; i < n; ++i) + val = (val << 1) | vpx_read(r, probs[i]); + return val; +} + +static int decode_coefs(const MACROBLOCKD *xd, + PLANE_TYPE type, + tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, + int ctx, const int16_t *scan, const int16_t *nb, + vpx_reader *r) { + FRAME_COUNTS *counts = xd->counts; + const int max_eob = 16 << (tx_size << 1); + const FRAME_CONTEXT *const fc = xd->fc; + const int ref = is_inter_block(xd->mi[0]); + int band, c = 0; + const vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = + fc->coef_probs[tx_size][type][ref]; + const vpx_prob *prob; + unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; + unsigned int (*eob_branch_count)[COEFF_CONTEXTS]; + uint8_t token_cache[32 * 32]; + const uint8_t *band_translate = get_band_translate(tx_size); + const int dq_shift = (tx_size == TX_32X32); + int v, token; + int16_t dqv = dq[0]; + const uint8_t *const cat6_prob = +#if CONFIG_VP9_HIGHBITDEPTH + (xd->bd == VPX_BITS_12) ? vp9_cat6_prob_high12 : + (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 : +#endif // CONFIG_VP9_HIGHBITDEPTH + vp9_cat6_prob; + const int cat6_bits = +#if CONFIG_VP9_HIGHBITDEPTH + (xd->bd == VPX_BITS_12) ? 18 : + (xd->bd == VPX_BITS_10) ? 16 : +#endif // CONFIG_VP9_HIGHBITDEPTH + 14; + + if (counts) { + coef_counts = counts->coef[tx_size][type][ref]; + eob_branch_count = counts->eob_branch[tx_size][type][ref]; + } + + while (c < max_eob) { + int val = -1; + band = *band_translate++; + prob = coef_probs[band][ctx]; + if (counts) + ++eob_branch_count[band][ctx]; + if (!vpx_read(r, prob[EOB_CONTEXT_NODE])) { + INCREMENT_COUNT(EOB_MODEL_TOKEN); + break; + } + + while (!vpx_read(r, prob[ZERO_CONTEXT_NODE])) { + INCREMENT_COUNT(ZERO_TOKEN); + dqv = dq[1]; + token_cache[scan[c]] = 0; + ++c; + if (c >= max_eob) + return c; // zero tokens at the end (no eob token) + ctx = get_coef_context(nb, token_cache, c); + band = *band_translate++; + prob = coef_probs[band][ctx]; + } + + if (!vpx_read(r, prob[ONE_CONTEXT_NODE])) { + INCREMENT_COUNT(ONE_TOKEN); + token = ONE_TOKEN; + val = 1; + } else { + INCREMENT_COUNT(TWO_TOKEN); + token = vpx_read_tree(r, vp9_coef_con_tree, + vp9_pareto8_full[prob[PIVOT_NODE] - 1]); + switch (token) { + case TWO_TOKEN: + case THREE_TOKEN: + case FOUR_TOKEN: + val = token; + break; + case CATEGORY1_TOKEN: + val = CAT1_MIN_VAL + read_coeff(vp9_cat1_prob, 1, r); + break; + case CATEGORY2_TOKEN: + val = CAT2_MIN_VAL + read_coeff(vp9_cat2_prob, 2, r); + break; + case CATEGORY3_TOKEN: + val = CAT3_MIN_VAL + read_coeff(vp9_cat3_prob, 3, r); + break; + case CATEGORY4_TOKEN: + val = CAT4_MIN_VAL + read_coeff(vp9_cat4_prob, 4, r); + break; + case CATEGORY5_TOKEN: + val = CAT5_MIN_VAL + read_coeff(vp9_cat5_prob, 5, r); + break; + case CATEGORY6_TOKEN: + val = CAT6_MIN_VAL + read_coeff(cat6_prob, cat6_bits, r); + break; + } + } + v = (val * dqv) >> dq_shift; +#if CONFIG_COEFFICIENT_RANGE_CHECKING +#if CONFIG_VP9_HIGHBITDEPTH + dqcoeff[scan[c]] = highbd_check_range((vpx_read_bit(r) ? -v : v), + xd->bd); +#else + dqcoeff[scan[c]] = check_range(vpx_read_bit(r) ? -v : v); +#endif // CONFIG_VP9_HIGHBITDEPTH +#else + dqcoeff[scan[c]] = vpx_read_bit(r) ? -v : v; +#endif // CONFIG_COEFFICIENT_RANGE_CHECKING + token_cache[scan[c]] = vp9_pt_energy_class[token]; + ++c; + ctx = get_coef_context(nb, token_cache, c); + dqv = dq[1]; + } + + return c; +} + +static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l, + int x, int y, unsigned int tx_size_in_blocks) { + if (xd->max_blocks_wide) { + if (tx_size_in_blocks + x > xd->max_blocks_wide) + *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8; + } + if (xd->max_blocks_high) { + if (tx_size_in_blocks + y > xd->max_blocks_high) + *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8; + } +} + +int vp9_decode_block_tokens(MACROBLOCKD *xd, int plane, const scan_order *sc, + int x, int y, TX_SIZE tx_size, vpx_reader *r, + int seg_id) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const int16_t *const dequant = pd->seg_dequant[seg_id]; + int eob; + ENTROPY_CONTEXT *a = pd->above_context + x; + ENTROPY_CONTEXT *l = pd->left_context + y; + int ctx; + int ctx_shift_a = 0; + int ctx_shift_l = 0; + + switch (tx_size) { + case TX_4X4: + ctx = a[0] != 0; + ctx += l[0] != 0; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + a[0] = l[0] = (eob > 0); + break; + case TX_8X8: + get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8); + ctx = !!*(const uint16_t *)a; + ctx += !!*(const uint16_t *)l; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a; + *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l; + break; + case TX_16X16: + get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16); + ctx = !!*(const uint32_t *)a; + ctx += !!*(const uint32_t *)l; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a; + *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l; + break; + case TX_32X32: + get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32); + // NOTE: casting to uint64_t here is safe because the default memory + // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte + // boundaries. + ctx = !!*(const uint64_t *)a; + ctx += !!*(const uint64_t *)l; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a; + *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l; + break; + default: + assert(0 && "Invalid transform size."); + eob = 0; + break; + } + + return eob; +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h new file mode 100644 index 00000000000..d242d4466e5 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_DECODER_VP9_DETOKENIZE_H_ +#define VP9_DECODER_VP9_DETOKENIZE_H_ + +#include "vpx_dsp/bitreader.h" +#include "vp9/decoder/vp9_decoder.h" +#include "vp9/common/vp9_scan.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int vp9_decode_block_tokens(MACROBLOCKD *xd, + int plane, const scan_order *sc, + int x, int y, + TX_SIZE tx_size, vpx_reader *r, + int seg_id); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DETOKENIZE_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c new file mode 100644 index 00000000000..05b38538aef --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c @@ -0,0 +1,76 @@ +/* + Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vp9/common/vp9_entropy.h" + +#include "vp9/decoder/vp9_dsubexp.h" + +static int inv_recenter_nonneg(int v, int m) { + if (v > 2 * m) + return v; + + return (v & 1) ? m - ((v + 1) >> 1) : m + (v >> 1); +} + +static int decode_uniform(vpx_reader *r) { + const int l = 8; + const int m = (1 << l) - 191; + const int v = vpx_read_literal(r, l - 1); + return v < m ? v : (v << 1) - m + vpx_read_bit(r); +} + +static int inv_remap_prob(int v, int m) { + static uint8_t inv_map_table[MAX_PROB] = { + 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189, + 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 74, 75, 76, + 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92, + 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, + 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, + 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, + 142, 143, 144, 145, 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, + 158, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, + 174, 175, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, + 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206, + 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 222, + 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, + 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 253 + }; + assert(v < (int)(sizeof(inv_map_table) / sizeof(inv_map_table[0]))); + v = inv_map_table[v]; + m--; + if ((m << 1) <= MAX_PROB) { + return 1 + inv_recenter_nonneg(v, m); + } else { + return MAX_PROB - inv_recenter_nonneg(v, MAX_PROB - 1 - m); + } +} + +static int decode_term_subexp(vpx_reader *r) { + if (!vpx_read_bit(r)) + return vpx_read_literal(r, 4); + if (!vpx_read_bit(r)) + return vpx_read_literal(r, 4) + 16; + if (!vpx_read_bit(r)) + return vpx_read_literal(r, 5) + 32; + return decode_uniform(r) + 64; +} + +void vp9_diff_update_prob(vpx_reader *r, vpx_prob* p) { + if (vpx_read(r, DIFF_UPDATE_PROB)) { + const int delp = decode_term_subexp(r); + *p = (vpx_prob)inv_remap_prob(delp, *p); + } +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h new file mode 100644 index 00000000000..a8bcc70be91 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_DECODER_VP9_DSUBEXP_H_ +#define VP9_DECODER_VP9_DSUBEXP_H_ + +#include "vpx_dsp/bitreader.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp9_diff_update_prob(vpx_reader *r, vpx_prob* p); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DSUBEXP_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dthread.c b/thirdparty/libvpx/vp9/decoder/vp9_dthread.c new file mode 100644 index 00000000000..14a71448fe7 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_dthread.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "vpx_mem/vpx_mem.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/decoder/vp9_dthread.h" +#include "vp9/decoder/vp9_decoder.h" + +// #define DEBUG_THREAD + +// TODO(hkuang): Clean up all the #ifdef in this file. +void vp9_frameworker_lock_stats(VPxWorker *const worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const worker_data = worker->data1; + pthread_mutex_lock(&worker_data->stats_mutex); +#else + (void)worker; +#endif +} + +void vp9_frameworker_unlock_stats(VPxWorker *const worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const worker_data = worker->data1; + pthread_mutex_unlock(&worker_data->stats_mutex); +#else + (void)worker; +#endif +} + +void vp9_frameworker_signal_stats(VPxWorker *const worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const worker_data = worker->data1; + +// TODO(hkuang): Fix the pthread_cond_broadcast in windows wrapper. +#if defined(_WIN32) && !HAVE_PTHREAD_H + pthread_cond_signal(&worker_data->stats_cond); +#else + pthread_cond_broadcast(&worker_data->stats_cond); +#endif + +#else + (void)worker; +#endif +} + +// This macro prevents thread_sanitizer from reporting known concurrent writes. +#if defined(__has_feature) +#if __has_feature(thread_sanitizer) +#define BUILDING_WITH_TSAN +#endif +#endif + +// TODO(hkuang): Remove worker parameter as it is only used in debug code. +void vp9_frameworker_wait(VPxWorker *const worker, RefCntBuffer *const ref_buf, + int row) { +#if CONFIG_MULTITHREAD + if (!ref_buf) + return; + +#ifndef BUILDING_WITH_TSAN + // The following line of code will get harmless tsan error but it is the key + // to get best performance. + if (ref_buf->row >= row && ref_buf->buf.corrupted != 1) return; +#endif + + { + // Find the worker thread that owns the reference frame. If the reference + // frame has been fully decoded, it may not have owner. + VPxWorker *const ref_worker = ref_buf->frame_worker_owner; + FrameWorkerData *const ref_worker_data = + (FrameWorkerData *)ref_worker->data1; + const VP9Decoder *const pbi = ref_worker_data->pbi; + +#ifdef DEBUG_THREAD + { + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + printf("%d %p worker is waiting for %d %p worker (%d) ref %d \r\n", + worker_data->worker_id, worker, ref_worker_data->worker_id, + ref_buf->frame_worker_owner, row, ref_buf->row); + } +#endif + + vp9_frameworker_lock_stats(ref_worker); + while (ref_buf->row < row && pbi->cur_buf == ref_buf && + ref_buf->buf.corrupted != 1) { + pthread_cond_wait(&ref_worker_data->stats_cond, + &ref_worker_data->stats_mutex); + } + + if (ref_buf->buf.corrupted == 1) { + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + vp9_frameworker_unlock_stats(ref_worker); + vpx_internal_error(&worker_data->pbi->common.error, + VPX_CODEC_CORRUPT_FRAME, + "Worker %p failed to decode frame", worker); + } + vp9_frameworker_unlock_stats(ref_worker); + } +#else + (void)worker; + (void)ref_buf; + (void)row; + (void)ref_buf; +#endif // CONFIG_MULTITHREAD +} + +void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row) { +#if CONFIG_MULTITHREAD + VPxWorker *worker = buf->frame_worker_owner; + +#ifdef DEBUG_THREAD + { + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id, + buf->frame_worker_owner, row); + } +#endif + + vp9_frameworker_lock_stats(worker); + buf->row = row; + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); +#else + (void)buf; + (void)row; +#endif // CONFIG_MULTITHREAD +} + +void vp9_frameworker_copy_context(VPxWorker *const dst_worker, + VPxWorker *const src_worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const src_worker_data = (FrameWorkerData *)src_worker->data1; + FrameWorkerData *const dst_worker_data = (FrameWorkerData *)dst_worker->data1; + VP9_COMMON *const src_cm = &src_worker_data->pbi->common; + VP9_COMMON *const dst_cm = &dst_worker_data->pbi->common; + int i; + + // Wait until source frame's context is ready. + vp9_frameworker_lock_stats(src_worker); + while (!src_worker_data->frame_context_ready) { + pthread_cond_wait(&src_worker_data->stats_cond, + &src_worker_data->stats_mutex); + } + + dst_cm->last_frame_seg_map = src_cm->seg.enabled ? + src_cm->current_frame_seg_map : src_cm->last_frame_seg_map; + dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync; + vp9_frameworker_unlock_stats(src_worker); + + dst_cm->bit_depth = src_cm->bit_depth; +#if CONFIG_VP9_HIGHBITDEPTH + dst_cm->use_highbitdepth = src_cm->use_highbitdepth; +#endif + dst_cm->prev_frame = src_cm->show_existing_frame ? + src_cm->prev_frame : src_cm->cur_frame; + dst_cm->last_width = !src_cm->show_existing_frame ? + src_cm->width : src_cm->last_width; + dst_cm->last_height = !src_cm->show_existing_frame ? + src_cm->height : src_cm->last_height; + dst_cm->subsampling_x = src_cm->subsampling_x; + dst_cm->subsampling_y = src_cm->subsampling_y; + dst_cm->frame_type = src_cm->frame_type; + dst_cm->last_show_frame = !src_cm->show_existing_frame ? + src_cm->show_frame : src_cm->last_show_frame; + for (i = 0; i < REF_FRAMES; ++i) + dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i]; + + memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr, + (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh)); + dst_cm->lf.last_sharpness_level = src_cm->lf.sharpness_level; + dst_cm->lf.filter_level = src_cm->lf.filter_level; + memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, MAX_REF_LF_DELTAS); + memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS); + dst_cm->seg = src_cm->seg; + memcpy(dst_cm->frame_contexts, src_cm->frame_contexts, + FRAME_CONTEXTS * sizeof(dst_cm->frame_contexts[0])); +#else + (void) dst_worker; + (void) src_worker; +#endif // CONFIG_MULTITHREAD +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dthread.h b/thirdparty/libvpx/vp9/decoder/vp9_dthread.h new file mode 100644 index 00000000000..ba7c38a511f --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_dthread.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_DECODER_VP9_DTHREAD_H_ +#define VP9_DECODER_VP9_DTHREAD_H_ + +#include "./vpx_config.h" +#include "vpx_util/vpx_thread.h" +#include "vpx/internal/vpx_codec_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9Common; +struct VP9Decoder; + +// WorkerData for the FrameWorker thread. It contains all the information of +// the worker and decode structures for decoding a frame. +typedef struct FrameWorkerData { + struct VP9Decoder *pbi; + const uint8_t *data; + const uint8_t *data_end; + size_t data_size; + void *user_priv; + int result; + int worker_id; + int received_frame; + + // scratch_buffer is used in frame parallel mode only. + // It is used to make a copy of the compressed data. + uint8_t *scratch_buffer; + size_t scratch_buffer_size; + +#if CONFIG_MULTITHREAD + pthread_mutex_t stats_mutex; + pthread_cond_t stats_cond; +#endif + + int frame_context_ready; // Current frame's context is ready to read. + int frame_decoded; // Finished decoding current frame. +} FrameWorkerData; + +void vp9_frameworker_lock_stats(VPxWorker *const worker); +void vp9_frameworker_unlock_stats(VPxWorker *const worker); +void vp9_frameworker_signal_stats(VPxWorker *const worker); + +// Wait until ref_buf has been decoded to row in real pixel unit. +// Note: worker may already finish decoding ref_buf and release it in order to +// start decoding next frame. So need to check whether worker is still decoding +// ref_buf. +void vp9_frameworker_wait(VPxWorker *const worker, RefCntBuffer *const ref_buf, + int row); + +// FrameWorker broadcasts its decoding progress so other workers that are +// waiting on it can resume decoding. +void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row); + +// Copy necessary decoding context from src worker to dst worker. +void vp9_frameworker_copy_context(VPxWorker *const dst_worker, + VPxWorker *const src_worker); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DTHREAD_H_ diff --git a/thirdparty/libvpx/vp9/vp9_dx_iface.c b/thirdparty/libvpx/vp9/vp9_dx_iface.c new file mode 100644 index 00000000000..6531e2c618f --- /dev/null +++ b/thirdparty/libvpx/vp9/vp9_dx_iface.c @@ -0,0 +1,1093 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vpx_config.h" +#include "./vpx_version.h" + +#include "vpx/internal/vpx_codec_internal.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" +#include "vpx_dsp/bitreader_buffer.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_util/vpx_thread.h" + +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/common/vp9_frame_buffers.h" + +#include "vp9/decoder/vp9_decodeframe.h" + +#include "vp9/vp9_dx_iface.h" +#include "vp9/vp9_iface_common.h" + +#define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) + +static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, + vpx_codec_priv_enc_mr_cfg_t *data) { + // This function only allocates space for the vpx_codec_alg_priv_t + // structure. More memory may be required at the time the stream + // information becomes known. + (void)data; + + if (!ctx->priv) { + vpx_codec_alg_priv_t *const priv = + (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); + if (priv == NULL) + return VPX_CODEC_MEM_ERROR; + + ctx->priv = (vpx_codec_priv_t *)priv; + ctx->priv->init_flags = ctx->init_flags; + priv->si.sz = sizeof(priv->si); + priv->flushed = 0; + // Only do frame parallel decode when threads > 1. + priv->frame_parallel_decode = + (ctx->config.dec && (ctx->config.dec->threads > 1) && + (ctx->init_flags & VPX_CODEC_USE_FRAME_THREADING)) ? 1 : 0; + if (ctx->config.dec) { + priv->cfg = *ctx->config.dec; + ctx->config.dec = &priv->cfg; + } + } + + return VPX_CODEC_OK; +} + +static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { + if (ctx->frame_workers != NULL) { + int i; + for (i = 0; i < ctx->num_frame_workers; ++i) { + VPxWorker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + vpx_get_worker_interface()->end(worker); + vp9_remove_common(&frame_worker_data->pbi->common); +#if CONFIG_VP9_POSTPROC + vp9_free_postproc_buffers(&frame_worker_data->pbi->common); +#endif + vp9_decoder_remove(frame_worker_data->pbi); + vpx_free(frame_worker_data->scratch_buffer); +#if CONFIG_MULTITHREAD + pthread_mutex_destroy(&frame_worker_data->stats_mutex); + pthread_cond_destroy(&frame_worker_data->stats_cond); +#endif + vpx_free(frame_worker_data); + } +#if CONFIG_MULTITHREAD + pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex); +#endif + } + + if (ctx->buffer_pool) { + vp9_free_ref_frame_buffers(ctx->buffer_pool); + vp9_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers); + } + + vpx_free(ctx->frame_workers); + vpx_free(ctx->buffer_pool); + vpx_free(ctx); + return VPX_CODEC_OK; +} + +static int parse_bitdepth_colorspace_sampling( + BITSTREAM_PROFILE profile, struct vpx_read_bit_buffer *rb) { + vpx_color_space_t color_space; + if (profile >= PROFILE_2) + rb->bit_offset += 1; // Bit-depth 10 or 12. + color_space = (vpx_color_space_t)vpx_rb_read_literal(rb, 3); + if (color_space != VPX_CS_SRGB) { + rb->bit_offset += 1; // [16,235] (including xvycc) vs [0,255] range. + if (profile == PROFILE_1 || profile == PROFILE_3) { + rb->bit_offset += 2; // subsampling x/y. + rb->bit_offset += 1; // unused. + } + } else { + if (profile == PROFILE_1 || profile == PROFILE_3) { + rb->bit_offset += 1; // unused + } else { + // RGB is only available in version 1. + return 0; + } + } + return 1; +} + +static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, + unsigned int data_sz, + vpx_codec_stream_info_t *si, + int *is_intra_only, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) { + int intra_only_flag = 0; + uint8_t clear_buffer[10]; + + if (data + data_sz <= data) + return VPX_CODEC_INVALID_PARAM; + + si->is_kf = 0; + si->w = si->h = 0; + + if (decrypt_cb) { + data_sz = VPXMIN(sizeof(clear_buffer), data_sz); + decrypt_cb(decrypt_state, data, clear_buffer, data_sz); + data = clear_buffer; + } + + // A maximum of 6 bits are needed to read the frame marker, profile and + // show_existing_frame. + if (data_sz < 1) + return VPX_CODEC_UNSUP_BITSTREAM; + + { + int show_frame; + int error_resilient; + struct vpx_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL }; + const int frame_marker = vpx_rb_read_literal(&rb, 2); + const BITSTREAM_PROFILE profile = vp9_read_profile(&rb); + + if (frame_marker != VP9_FRAME_MARKER) + return VPX_CODEC_UNSUP_BITSTREAM; + + if (profile >= MAX_PROFILES) + return VPX_CODEC_UNSUP_BITSTREAM; + + if (vpx_rb_read_bit(&rb)) { // show an existing frame + // If profile is > 2 and show_existing_frame is true, then at least 1 more + // byte (6+3=9 bits) is needed. + if (profile > 2 && data_sz < 2) + return VPX_CODEC_UNSUP_BITSTREAM; + vpx_rb_read_literal(&rb, 3); // Frame buffer to show. + return VPX_CODEC_OK; + } + + // For the rest of the function, a maximum of 9 more bytes are needed + // (computed by taking the maximum possible bits needed in each case). Note + // that this has to be updated if we read any more bits in this function. + if (data_sz < 10) + return VPX_CODEC_UNSUP_BITSTREAM; + + si->is_kf = !vpx_rb_read_bit(&rb); + show_frame = vpx_rb_read_bit(&rb); + error_resilient = vpx_rb_read_bit(&rb); + + if (si->is_kf) { + if (!vp9_read_sync_code(&rb)) + return VPX_CODEC_UNSUP_BITSTREAM; + + if (!parse_bitdepth_colorspace_sampling(profile, &rb)) + return VPX_CODEC_UNSUP_BITSTREAM; + vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); + } else { + intra_only_flag = show_frame ? 0 : vpx_rb_read_bit(&rb); + + rb.bit_offset += error_resilient ? 0 : 2; // reset_frame_context + + if (intra_only_flag) { + if (!vp9_read_sync_code(&rb)) + return VPX_CODEC_UNSUP_BITSTREAM; + if (profile > PROFILE_0) { + if (!parse_bitdepth_colorspace_sampling(profile, &rb)) + return VPX_CODEC_UNSUP_BITSTREAM; + } + rb.bit_offset += REF_FRAMES; // refresh_frame_flags + vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); + } + } + } + if (is_intra_only != NULL) + *is_intra_only = intra_only_flag; + return VPX_CODEC_OK; +} + +static vpx_codec_err_t decoder_peek_si(const uint8_t *data, + unsigned int data_sz, + vpx_codec_stream_info_t *si) { + return decoder_peek_si_internal(data, data_sz, si, NULL, NULL, NULL); +} + +static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx, + vpx_codec_stream_info_t *si) { + const size_t sz = (si->sz >= sizeof(vp9_stream_info_t)) + ? sizeof(vp9_stream_info_t) + : sizeof(vpx_codec_stream_info_t); + memcpy(si, &ctx->si, sz); + si->sz = (unsigned int)sz; + + return VPX_CODEC_OK; +} + +static void set_error_detail(vpx_codec_alg_priv_t *ctx, + const char *const error) { + ctx->base.err_detail = error; +} + +static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, + const struct vpx_internal_error_info *error) { + if (error->error_code) + set_error_detail(ctx, error->has_detail ? error->detail : NULL); + + return error->error_code; +} + +static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { + int i; + + for (i = 0; i < ctx->num_frame_workers; ++i) { + VPxWorker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + VP9_COMMON *const cm = &frame_worker_data->pbi->common; + BufferPool *const pool = cm->buffer_pool; + + cm->new_fb_idx = INVALID_IDX; + cm->byte_alignment = ctx->byte_alignment; + cm->skip_loop_filter = ctx->skip_loop_filter; + + if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { + pool->get_fb_cb = ctx->get_ext_fb_cb; + pool->release_fb_cb = ctx->release_ext_fb_cb; + pool->cb_priv = ctx->ext_priv; + } else { + pool->get_fb_cb = vp9_get_frame_buffer; + pool->release_fb_cb = vp9_release_frame_buffer; + + if (vp9_alloc_internal_frame_buffers(&pool->int_frame_buffers)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to initialize internal frame buffers"); + + pool->cb_priv = &pool->int_frame_buffers; + } + } +} + +static void set_default_ppflags(vp8_postproc_cfg_t *cfg) { + cfg->post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK; + cfg->deblocking_level = 4; + cfg->noise_level = 0; +} + +static void set_ppflags(const vpx_codec_alg_priv_t *ctx, + vp9_ppflags_t *flags) { + flags->post_proc_flag = + ctx->postproc_cfg.post_proc_flag; + + flags->deblocking_level = ctx->postproc_cfg.deblocking_level; + flags->noise_level = ctx->postproc_cfg.noise_level; +} + +static int frame_worker_hook(void *arg1, void *arg2) { + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1; + const uint8_t *data = frame_worker_data->data; + (void)arg2; + + frame_worker_data->result = + vp9_receive_compressed_data(frame_worker_data->pbi, + frame_worker_data->data_size, + &data); + frame_worker_data->data_end = data; + + if (frame_worker_data->pbi->frame_parallel_decode) { + // In frame parallel decoding, a worker thread must successfully decode all + // the compressed data. + if (frame_worker_data->result != 0 || + frame_worker_data->data + frame_worker_data->data_size - 1 > data) { + VPxWorker *const worker = frame_worker_data->pbi->frame_worker_owner; + BufferPool *const pool = frame_worker_data->pbi->common.buffer_pool; + // Signal all the other threads that are waiting for this frame. + vp9_frameworker_lock_stats(worker); + frame_worker_data->frame_context_ready = 1; + lock_buffer_pool(pool); + frame_worker_data->pbi->cur_buf->buf.corrupted = 1; + unlock_buffer_pool(pool); + frame_worker_data->pbi->need_resync = 1; + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); + return 0; + } + } else if (frame_worker_data->result != 0) { + // Check decode result in serial decode. + frame_worker_data->pbi->cur_buf->buf.corrupted = 1; + frame_worker_data->pbi->need_resync = 1; + } + return !frame_worker_data->result; +} + +static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { + int i; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + + ctx->last_show_frame = -1; + ctx->next_submit_worker_id = 0; + ctx->last_submit_worker_id = 0; + ctx->next_output_worker_id = 0; + ctx->frame_cache_read = 0; + ctx->frame_cache_write = 0; + ctx->num_cache_frames = 0; + ctx->need_resync = 1; + ctx->num_frame_workers = + (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1; + if (ctx->num_frame_workers > MAX_DECODE_THREADS) + ctx->num_frame_workers = MAX_DECODE_THREADS; + ctx->available_threads = ctx->num_frame_workers; + ctx->flushed = 0; + + ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool)); + if (ctx->buffer_pool == NULL) + return VPX_CODEC_MEM_ERROR; + +#if CONFIG_MULTITHREAD + if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) { + set_error_detail(ctx, "Failed to allocate buffer pool mutex"); + return VPX_CODEC_MEM_ERROR; + } +#endif + + ctx->frame_workers = (VPxWorker *) + vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers)); + if (ctx->frame_workers == NULL) { + set_error_detail(ctx, "Failed to allocate frame_workers"); + return VPX_CODEC_MEM_ERROR; + } + + for (i = 0; i < ctx->num_frame_workers; ++i) { + VPxWorker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *frame_worker_data = NULL; + winterface->init(worker); + worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData)); + if (worker->data1 == NULL) { + set_error_detail(ctx, "Failed to allocate frame_worker_data"); + return VPX_CODEC_MEM_ERROR; + } + frame_worker_data = (FrameWorkerData *)worker->data1; + frame_worker_data->pbi = vp9_decoder_create(ctx->buffer_pool); + if (frame_worker_data->pbi == NULL) { + set_error_detail(ctx, "Failed to allocate frame_worker_data"); + return VPX_CODEC_MEM_ERROR; + } + frame_worker_data->pbi->frame_worker_owner = worker; + frame_worker_data->worker_id = i; + frame_worker_data->scratch_buffer = NULL; + frame_worker_data->scratch_buffer_size = 0; + frame_worker_data->frame_context_ready = 0; + frame_worker_data->received_frame = 0; +#if CONFIG_MULTITHREAD + if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) { + set_error_detail(ctx, "Failed to allocate frame_worker_data mutex"); + return VPX_CODEC_MEM_ERROR; + } + + if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) { + set_error_detail(ctx, "Failed to allocate frame_worker_data cond"); + return VPX_CODEC_MEM_ERROR; + } +#endif + // If decoding in serial mode, FrameWorker thread could create tile worker + // thread or loopfilter thread. + frame_worker_data->pbi->max_threads = + (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0; + + frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order; + frame_worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode; + frame_worker_data->pbi->common.frame_parallel_decode = + ctx->frame_parallel_decode; + worker->hook = (VPxWorkerHook)frame_worker_hook; + if (!winterface->reset(worker)) { + set_error_detail(ctx, "Frame Worker thread creation failed"); + return VPX_CODEC_MEM_ERROR; + } + } + + // If postprocessing was enabled by the application and a + // configuration has not been provided, default it. + if (!ctx->postproc_cfg_set && + (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) + set_default_ppflags(&ctx->postproc_cfg); + + init_buffer_callbacks(ctx); + + return VPX_CODEC_OK; +} + +static INLINE void check_resync(vpx_codec_alg_priv_t *const ctx, + const VP9Decoder *const pbi) { + // Clear resync flag if worker got a key frame or intra only frame. + if (ctx->need_resync == 1 && pbi->need_resync == 0 && + (pbi->common.intra_only || pbi->common.frame_type == KEY_FRAME)) + ctx->need_resync = 0; +} + +static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, + const uint8_t **data, unsigned int data_sz, + void *user_priv, int64_t deadline) { + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + (void)deadline; + + // Determine the stream parameters. Note that we rely on peek_si to + // validate that we have a buffer that does not wrap around the top + // of the heap. + if (!ctx->si.h) { + int is_intra_only = 0; + const vpx_codec_err_t res = + decoder_peek_si_internal(*data, data_sz, &ctx->si, &is_intra_only, + ctx->decrypt_cb, ctx->decrypt_state); + if (res != VPX_CODEC_OK) + return res; + + if (!ctx->si.is_kf && !is_intra_only) + return VPX_CODEC_ERROR; + } + + if (!ctx->frame_parallel_decode) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + frame_worker_data->data = *data; + frame_worker_data->data_size = data_sz; + frame_worker_data->user_priv = user_priv; + frame_worker_data->received_frame = 1; + + // Set these even if already initialized. The caller may have changed the + // decrypt config between frames. + frame_worker_data->pbi->decrypt_cb = ctx->decrypt_cb; + frame_worker_data->pbi->decrypt_state = ctx->decrypt_state; + + worker->had_error = 0; + winterface->execute(worker); + + // Update data pointer after decode. + *data = frame_worker_data->data_end; + + if (worker->had_error) + return update_error_state(ctx, &frame_worker_data->pbi->common.error); + + check_resync(ctx, frame_worker_data->pbi); + } else { + VPxWorker *const worker = &ctx->frame_workers[ctx->next_submit_worker_id]; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + // Copy context from last worker thread to next worker thread. + if (ctx->next_submit_worker_id != ctx->last_submit_worker_id) + vp9_frameworker_copy_context( + &ctx->frame_workers[ctx->next_submit_worker_id], + &ctx->frame_workers[ctx->last_submit_worker_id]); + + frame_worker_data->pbi->ready_for_new_data = 0; + // Copy the compressed data into worker's internal buffer. + // TODO(hkuang): Will all the workers allocate the same size + // as the size of the first intra frame be better? This will + // avoid too many deallocate and allocate. + if (frame_worker_data->scratch_buffer_size < data_sz) { + frame_worker_data->scratch_buffer = + (uint8_t *)vpx_realloc(frame_worker_data->scratch_buffer, data_sz); + if (frame_worker_data->scratch_buffer == NULL) { + set_error_detail(ctx, "Failed to reallocate scratch buffer"); + return VPX_CODEC_MEM_ERROR; + } + frame_worker_data->scratch_buffer_size = data_sz; + } + frame_worker_data->data_size = data_sz; + memcpy(frame_worker_data->scratch_buffer, *data, data_sz); + + frame_worker_data->frame_decoded = 0; + frame_worker_data->frame_context_ready = 0; + frame_worker_data->received_frame = 1; + frame_worker_data->data = frame_worker_data->scratch_buffer; + frame_worker_data->user_priv = user_priv; + + if (ctx->next_submit_worker_id != ctx->last_submit_worker_id) + ctx->last_submit_worker_id = + (ctx->last_submit_worker_id + 1) % ctx->num_frame_workers; + + ctx->next_submit_worker_id = + (ctx->next_submit_worker_id + 1) % ctx->num_frame_workers; + --ctx->available_threads; + worker->had_error = 0; + winterface->launch(worker); + } + + return VPX_CODEC_OK; +} + +static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) { + YV12_BUFFER_CONFIG sd; + vp9_ppflags_t flags = {0, 0, 0}; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + ctx->next_output_worker_id = + (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; + // TODO(hkuang): Add worker error handling here. + winterface->sync(worker); + frame_worker_data->received_frame = 0; + ++ctx->available_threads; + + check_resync(ctx, frame_worker_data->pbi); + + if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { + VP9_COMMON *const cm = &frame_worker_data->pbi->common; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx; + yuvconfig2image(&ctx->frame_cache[ctx->frame_cache_write].img, &sd, + frame_worker_data->user_priv); + ctx->frame_cache[ctx->frame_cache_write].img.fb_priv = + frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; + ctx->frame_cache_write = + (ctx->frame_cache_write + 1) % FRAME_CACHE_SIZE; + ++ctx->num_cache_frames; + } +} + +static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, + const uint8_t *data, unsigned int data_sz, + void *user_priv, long deadline) { + const uint8_t *data_start = data; + const uint8_t * const data_end = data + data_sz; + vpx_codec_err_t res; + uint32_t frame_sizes[8]; + int frame_count; + + if (data == NULL && data_sz == 0) { + ctx->flushed = 1; + return VPX_CODEC_OK; + } + + // Reset flushed when receiving a valid frame. + ctx->flushed = 0; + + // Initialize the decoder workers on the first frame. + if (ctx->frame_workers == NULL) { + const vpx_codec_err_t res = init_decoder(ctx); + if (res != VPX_CODEC_OK) + return res; + } + + res = vp9_parse_superframe_index(data, data_sz, frame_sizes, &frame_count, + ctx->decrypt_cb, ctx->decrypt_state); + if (res != VPX_CODEC_OK) + return res; + + if (ctx->frame_parallel_decode) { + // Decode in frame parallel mode. When decoding in this mode, the frame + // passed to the decoder must be either a normal frame or a superframe with + // superframe index so the decoder could get each frame's start position + // in the superframe. + if (frame_count > 0) { + int i; + + for (i = 0; i < frame_count; ++i) { + const uint8_t *data_start_copy = data_start; + const uint32_t frame_size = frame_sizes[i]; + if (data_start < data + || frame_size > (uint32_t) (data_end - data_start)) { + set_error_detail(ctx, "Invalid frame size in index"); + return VPX_CODEC_CORRUPT_FRAME; + } + + if (ctx->available_threads == 0) { + // No more threads for decoding. Wait until the next output worker + // finishes decoding. Then copy the decoded frame into cache. + if (ctx->num_cache_frames < FRAME_CACHE_SIZE) { + wait_worker_and_cache_frame(ctx); + } else { + // TODO(hkuang): Add unit test to test this path. + set_error_detail(ctx, "Frame output cache is full."); + return VPX_CODEC_ERROR; + } + } + + res = decode_one(ctx, &data_start_copy, frame_size, user_priv, + deadline); + if (res != VPX_CODEC_OK) + return res; + data_start += frame_size; + } + } else { + if (ctx->available_threads == 0) { + // No more threads for decoding. Wait until the next output worker + // finishes decoding. Then copy the decoded frame into cache. + if (ctx->num_cache_frames < FRAME_CACHE_SIZE) { + wait_worker_and_cache_frame(ctx); + } else { + // TODO(hkuang): Add unit test to test this path. + set_error_detail(ctx, "Frame output cache is full."); + return VPX_CODEC_ERROR; + } + } + + res = decode_one(ctx, &data, data_sz, user_priv, deadline); + if (res != VPX_CODEC_OK) + return res; + } + } else { + // Decode in serial mode. + if (frame_count > 0) { + int i; + + for (i = 0; i < frame_count; ++i) { + const uint8_t *data_start_copy = data_start; + const uint32_t frame_size = frame_sizes[i]; + vpx_codec_err_t res; + if (data_start < data + || frame_size > (uint32_t) (data_end - data_start)) { + set_error_detail(ctx, "Invalid frame size in index"); + return VPX_CODEC_CORRUPT_FRAME; + } + + res = decode_one(ctx, &data_start_copy, frame_size, user_priv, + deadline); + if (res != VPX_CODEC_OK) + return res; + + data_start += frame_size; + } + } else { + while (data_start < data_end) { + const uint32_t frame_size = (uint32_t) (data_end - data_start); + const vpx_codec_err_t res = decode_one(ctx, &data_start, frame_size, + user_priv, deadline); + if (res != VPX_CODEC_OK) + return res; + + // Account for suboptimal termination by the encoder. + while (data_start < data_end) { + const uint8_t marker = read_marker(ctx->decrypt_cb, + ctx->decrypt_state, data_start); + if (marker) + break; + ++data_start; + } + } + } + } + + return res; +} + +static void release_last_output_frame(vpx_codec_alg_priv_t *ctx) { + RefCntBuffer *const frame_bufs = ctx->buffer_pool->frame_bufs; + // Decrease reference count of last output frame in frame parallel mode. + if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) { + BufferPool *const pool = ctx->buffer_pool; + lock_buffer_pool(pool); + decrease_ref_count(ctx->last_show_frame, frame_bufs, pool); + unlock_buffer_pool(pool); + } +} + +static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, + vpx_codec_iter_t *iter) { + vpx_image_t *img = NULL; + + // Only return frame when all the cpu are busy or + // application fluhsed the decoder in frame parallel decode. + if (ctx->frame_parallel_decode && ctx->available_threads > 0 && + !ctx->flushed) { + return NULL; + } + + // Output the frames in the cache first. + if (ctx->num_cache_frames > 0) { + release_last_output_frame(ctx); + ctx->last_show_frame = ctx->frame_cache[ctx->frame_cache_read].fb_idx; + if (ctx->need_resync) + return NULL; + img = &ctx->frame_cache[ctx->frame_cache_read].img; + ctx->frame_cache_read = (ctx->frame_cache_read + 1) % FRAME_CACHE_SIZE; + --ctx->num_cache_frames; + return img; + } + + // iter acts as a flip flop, so an image is only returned on the first + // call to get_frame. + if (*iter == NULL && ctx->frame_workers != NULL) { + do { + YV12_BUFFER_CONFIG sd; + vp9_ppflags_t flags = {0, 0, 0}; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + VPxWorker *const worker = + &ctx->frame_workers[ctx->next_output_worker_id]; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + ctx->next_output_worker_id = + (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; + if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) + set_ppflags(ctx, &flags); + // Wait for the frame from worker thread. + if (winterface->sync(worker)) { + // Check if worker has received any frames. + if (frame_worker_data->received_frame == 1) { + ++ctx->available_threads; + frame_worker_data->received_frame = 0; + check_resync(ctx, frame_worker_data->pbi); + } + if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { + VP9_COMMON *const cm = &frame_worker_data->pbi->common; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + release_last_output_frame(ctx); + ctx->last_show_frame = frame_worker_data->pbi->common.new_fb_idx; + if (ctx->need_resync) + return NULL; + yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv); + ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; + img = &ctx->img; + return img; + } + } else { + // Decoding failed. Release the worker thread. + frame_worker_data->received_frame = 0; + ++ctx->available_threads; + ctx->need_resync = 1; + if (ctx->flushed != 1) + return NULL; + } + } while (ctx->next_output_worker_id != ctx->next_submit_worker_id); + } + return NULL; +} + +static vpx_codec_err_t decoder_set_fb_fn( + vpx_codec_alg_priv_t *ctx, + vpx_get_frame_buffer_cb_fn_t cb_get, + vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { + if (cb_get == NULL || cb_release == NULL) { + return VPX_CODEC_INVALID_PARAM; + } else if (ctx->frame_workers == NULL) { + // If the decoder has already been initialized, do not accept changes to + // the frame buffer functions. + ctx->get_ext_fb_cb = cb_get; + ctx->release_ext_fb_cb = cb_release; + ctx->ext_priv = cb_priv; + return VPX_CODEC_OK; + } + + return VPX_CODEC_ERROR; +} + +static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, + va_list args) { + vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (data) { + vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data; + YV12_BUFFER_CONFIG sd; + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + image2yuvconfig(&frame->img, &sd); + return vp9_set_reference_dec(&frame_worker_data->pbi->common, + (VP9_REFFRAME)frame->frame_type, &sd); + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + +static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, + va_list args) { + vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (data) { + vpx_ref_frame_t *frame = (vpx_ref_frame_t *) data; + YV12_BUFFER_CONFIG sd; + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + image2yuvconfig(&frame->img, &sd); + return vp9_copy_reference_dec(frame_worker_data->pbi, + (VP9_REFFRAME)frame->frame_type, &sd); + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + +static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, + va_list args) { + vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (data) { + YV12_BUFFER_CONFIG* fb; + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + fb = get_ref_frame(&frame_worker_data->pbi->common, data->idx); + if (fb == NULL) return VPX_CODEC_ERROR; + yuvconfig2image(&data->img, fb, NULL); + return VPX_CODEC_OK; + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + +static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, + va_list args) { +#if CONFIG_VP9_POSTPROC + vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); + + if (data) { + ctx->postproc_cfg_set = 1; + ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); + return VPX_CODEC_OK; + } else { + return VPX_CODEC_INVALID_PARAM; + } +#else + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +#endif +} + +static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx, + va_list args) { + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +} + +static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const update_info = va_arg(args, int *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (update_info) { + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + *update_info = frame_worker_data->pbi->refresh_frame_flags; + return VPX_CODEC_OK; + } else { + return VPX_CODEC_ERROR; + } + } + + return VPX_CODEC_INVALID_PARAM; +} + +static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *corrupted = va_arg(args, int *); + + if (corrupted) { + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + RefCntBuffer *const frame_bufs = + frame_worker_data->pbi->common.buffer_pool->frame_bufs; + if (frame_worker_data->pbi->common.frame_to_show == NULL) + return VPX_CODEC_ERROR; + if (ctx->last_show_frame >= 0) + *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted; + return VPX_CODEC_OK; + } else { + return VPX_CODEC_ERROR; + } + } + + return VPX_CODEC_INVALID_PARAM; +} + +static vpx_codec_err_t ctrl_get_frame_size(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const frame_size = va_arg(args, int *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (frame_size) { + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + const VP9_COMMON *const cm = &frame_worker_data->pbi->common; + frame_size[0] = cm->width; + frame_size[1] = cm->height; + return VPX_CODEC_OK; + } else { + return VPX_CODEC_ERROR; + } + } + + return VPX_CODEC_INVALID_PARAM; +} + +static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const render_size = va_arg(args, int *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (render_size) { + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + const VP9_COMMON *const cm = &frame_worker_data->pbi->common; + render_size[0] = cm->render_width; + render_size[1] = cm->render_height; + return VPX_CODEC_OK; + } else { + return VPX_CODEC_ERROR; + } + } + + return VPX_CODEC_INVALID_PARAM; +} + +static vpx_codec_err_t ctrl_get_bit_depth(vpx_codec_alg_priv_t *ctx, + va_list args) { + unsigned int *const bit_depth = va_arg(args, unsigned int *); + VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; + + if (bit_depth) { + if (worker) { + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + const VP9_COMMON *const cm = &frame_worker_data->pbi->common; + *bit_depth = cm->bit_depth; + return VPX_CODEC_OK; + } else { + return VPX_CODEC_ERROR; + } + } + + return VPX_CODEC_INVALID_PARAM; +} + +static vpx_codec_err_t ctrl_set_invert_tile_order(vpx_codec_alg_priv_t *ctx, + va_list args) { + ctx->invert_tile_order = va_arg(args, int); + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_set_decryptor(vpx_codec_alg_priv_t *ctx, + va_list args) { + vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *); + ctx->decrypt_cb = init ? init->decrypt_cb : NULL; + ctx->decrypt_state = init ? init->decrypt_state : NULL; + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_set_byte_alignment(vpx_codec_alg_priv_t *ctx, + va_list args) { + const int legacy_byte_alignment = 0; + const int min_byte_alignment = 32; + const int max_byte_alignment = 1024; + const int byte_alignment = va_arg(args, int); + + if (byte_alignment != legacy_byte_alignment && + (byte_alignment < min_byte_alignment || + byte_alignment > max_byte_alignment || + (byte_alignment & (byte_alignment - 1)) != 0)) + return VPX_CODEC_INVALID_PARAM; + + ctx->byte_alignment = byte_alignment; + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + frame_worker_data->pbi->common.byte_alignment = byte_alignment; + } + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_set_skip_loop_filter(vpx_codec_alg_priv_t *ctx, + va_list args) { + ctx->skip_loop_filter = va_arg(args, int); + + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + frame_worker_data->pbi->common.skip_loop_filter = ctx->skip_loop_filter; + } + + return VPX_CODEC_OK; +} + +static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { + {VP8_COPY_REFERENCE, ctrl_copy_reference}, + + // Setters + {VP8_SET_REFERENCE, ctrl_set_reference}, + {VP8_SET_POSTPROC, ctrl_set_postproc}, + {VP8_SET_DBG_COLOR_REF_FRAME, ctrl_set_dbg_options}, + {VP8_SET_DBG_COLOR_MB_MODES, ctrl_set_dbg_options}, + {VP8_SET_DBG_COLOR_B_MODES, ctrl_set_dbg_options}, + {VP8_SET_DBG_DISPLAY_MV, ctrl_set_dbg_options}, + {VP9_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order}, + {VPXD_SET_DECRYPTOR, ctrl_set_decryptor}, + {VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment}, + {VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter}, + + // Getters + {VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates}, + {VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted}, + {VP9_GET_REFERENCE, ctrl_get_reference}, + {VP9D_GET_DISPLAY_SIZE, ctrl_get_render_size}, + {VP9D_GET_BIT_DEPTH, ctrl_get_bit_depth}, + {VP9D_GET_FRAME_SIZE, ctrl_get_frame_size}, + + { -1, NULL}, +}; + +#ifndef VERSION_STRING +#define VERSION_STRING +#endif +CODEC_INTERFACE(vpx_codec_vp9_dx) = { + "WebM Project VP9 Decoder" VERSION_STRING, + VPX_CODEC_INTERNAL_ABI_VERSION, + VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC | + VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // vpx_codec_caps_t + decoder_init, // vpx_codec_init_fn_t + decoder_destroy, // vpx_codec_destroy_fn_t + decoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t + { // NOLINT + decoder_peek_si, // vpx_codec_peek_si_fn_t + decoder_get_si, // vpx_codec_get_si_fn_t + decoder_decode, // vpx_codec_decode_fn_t + decoder_get_frame, // vpx_codec_frame_get_fn_t + decoder_set_fb_fn, // vpx_codec_set_fb_fn_t + }, + { // NOLINT + 0, + NULL, // vpx_codec_enc_cfg_map_t + NULL, // vpx_codec_encode_fn_t + NULL, // vpx_codec_get_cx_data_fn_t + NULL, // vpx_codec_enc_config_set_fn_t + NULL, // vpx_codec_get_global_headers_fn_t + NULL, // vpx_codec_get_preview_frame_fn_t + NULL // vpx_codec_enc_mr_get_mem_loc_fn_t + } +}; diff --git a/thirdparty/libvpx/vp9/vp9_dx_iface.h b/thirdparty/libvpx/vp9/vp9_dx_iface.h new file mode 100644 index 00000000000..e0e948e16ca --- /dev/null +++ b/thirdparty/libvpx/vp9/vp9_dx_iface.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_VP9_DX_IFACE_H_ +#define VP9_VP9_DX_IFACE_H_ + +#include "vp9/decoder/vp9_decoder.h" + +typedef vpx_codec_stream_info_t vp9_stream_info_t; + +// This limit is due to framebuffer numbers. +// TODO(hkuang): Remove this limit after implementing ondemand framebuffers. +#define FRAME_CACHE_SIZE 6 // Cache maximum 6 decoded frames. + +typedef struct cache_frame { + int fb_idx; + vpx_image_t img; +} cache_frame; + +struct vpx_codec_alg_priv { + vpx_codec_priv_t base; + vpx_codec_dec_cfg_t cfg; + vp9_stream_info_t si; + int postproc_cfg_set; + vp8_postproc_cfg_t postproc_cfg; + vpx_decrypt_cb decrypt_cb; + void *decrypt_state; + vpx_image_t img; + int img_avail; + int flushed; + int invert_tile_order; + int last_show_frame; // Index of last output frame. + int byte_alignment; + int skip_loop_filter; + + // Frame parallel related. + int frame_parallel_decode; // frame-based threading. + VPxWorker *frame_workers; + int num_frame_workers; + int next_submit_worker_id; + int last_submit_worker_id; + int next_output_worker_id; + int available_threads; + cache_frame frame_cache[FRAME_CACHE_SIZE]; + int frame_cache_write; + int frame_cache_read; + int num_cache_frames; + int need_resync; // wait for key/intra-only frame + // BufferPool that holds all reference frames. Shared by all the FrameWorkers. + BufferPool *buffer_pool; + + // External frame buffer info to save for VP9 common. + void *ext_priv; // Private data associated with the external frame buffers. + vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb; + vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb; +}; + +#endif // VP9_VP9_DX_IFACE_H_ diff --git a/thirdparty/libvpx/vp9/vp9_iface_common.h b/thirdparty/libvpx/vp9/vp9_iface_common.h new file mode 100644 index 00000000000..938d4224ba3 --- /dev/null +++ b/thirdparty/libvpx/vp9/vp9_iface_common.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VP9_VP9_IFACE_COMMON_H_ +#define VP9_VP9_IFACE_COMMON_H_ + +#include "vpx_ports/mem.h" + +static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, + void *user_priv) { + /** vpx_img_wrap() doesn't allow specifying independent strides for + * the Y, U, and V planes, nor other alignment adjustments that + * might be representable by a YV12_BUFFER_CONFIG, so we just + * initialize all the fields.*/ + int bps; + if (!yv12->subsampling_y) { + if (!yv12->subsampling_x) { + img->fmt = VPX_IMG_FMT_I444; + bps = 24; + } else { + img->fmt = VPX_IMG_FMT_I422; + bps = 16; + } + } else { + if (!yv12->subsampling_x) { + img->fmt = VPX_IMG_FMT_I440; + bps = 16; + } else { + img->fmt = VPX_IMG_FMT_I420; + bps = 12; + } + } + img->cs = yv12->color_space; + img->range = yv12->color_range; + img->bit_depth = 8; + img->w = yv12->y_stride; + img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); + img->d_w = yv12->y_crop_width; + img->d_h = yv12->y_crop_height; + img->r_w = yv12->render_width; + img->r_h = yv12->render_height; + img->x_chroma_shift = yv12->subsampling_x; + img->y_chroma_shift = yv12->subsampling_y; + img->planes[VPX_PLANE_Y] = yv12->y_buffer; + img->planes[VPX_PLANE_U] = yv12->u_buffer; + img->planes[VPX_PLANE_V] = yv12->v_buffer; + img->planes[VPX_PLANE_ALPHA] = NULL; + img->stride[VPX_PLANE_Y] = yv12->y_stride; + img->stride[VPX_PLANE_U] = yv12->uv_stride; + img->stride[VPX_PLANE_V] = yv12->uv_stride; + img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; +#if CONFIG_VP9_HIGHBITDEPTH + if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) { + // vpx_image_t uses byte strides and a pointer to the first byte + // of the image. + img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH); + img->bit_depth = yv12->bit_depth; + img->planes[VPX_PLANE_Y] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->y_buffer); + img->planes[VPX_PLANE_U] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->u_buffer); + img->planes[VPX_PLANE_V] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->v_buffer); + img->planes[VPX_PLANE_ALPHA] = NULL; + img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride; + img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride; + img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride; + img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + img->bps = bps; + img->user_priv = user_priv; + img->img_data = yv12->buffer_alloc; + img->img_data_owner = 0; + img->self_allocd = 0; +} + +static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, + YV12_BUFFER_CONFIG *yv12) { + yv12->y_buffer = img->planes[VPX_PLANE_Y]; + yv12->u_buffer = img->planes[VPX_PLANE_U]; + yv12->v_buffer = img->planes[VPX_PLANE_V]; + + yv12->y_crop_width = img->d_w; + yv12->y_crop_height = img->d_h; + yv12->render_width = img->r_w; + yv12->render_height = img->r_h; + yv12->y_width = img->d_w; + yv12->y_height = img->d_h; + + yv12->uv_width = img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 + : yv12->y_width; + yv12->uv_height = img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 + : yv12->y_height; + yv12->uv_crop_width = yv12->uv_width; + yv12->uv_crop_height = yv12->uv_height; + + yv12->y_stride = img->stride[VPX_PLANE_Y]; + yv12->uv_stride = img->stride[VPX_PLANE_U]; + yv12->color_space = img->cs; + yv12->color_range = img->range; + +#if CONFIG_VP9_HIGHBITDEPTH + if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + // In vpx_image_t + // planes point to uint8 address of start of data + // stride counts uint8s to reach next row + // In YV12_BUFFER_CONFIG + // y_buffer, u_buffer, v_buffer point to uint16 address of data + // stride and border counts in uint16s + // This means that all the address calculations in the main body of code + // should work correctly. + // However, before we do any pixel operations we need to cast the address + // to a uint16 ponter and double its value. + yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer); + yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer); + yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer); + yv12->y_stride >>= 1; + yv12->uv_stride >>= 1; + yv12->flags = YV12_FLAG_HIGHBITDEPTH; + } else { + yv12->flags = 0; + } + yv12->border = (yv12->y_stride - img->w) / 2; +#else + yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; +#endif // CONFIG_VP9_HIGHBITDEPTH + yv12->subsampling_x = img->x_chroma_shift; + yv12->subsampling_y = img->y_chroma_shift; + return VPX_CODEC_OK; +} + +#endif // VP9_VP9_IFACE_COMMON_H_ diff --git a/thirdparty/libvpx/vpx/internal/vpx_codec_internal.h b/thirdparty/libvpx/vpx/internal/vpx_codec_internal.h new file mode 100644 index 00000000000..7380fcc7e24 --- /dev/null +++ b/thirdparty/libvpx/vpx/internal/vpx_codec_internal.h @@ -0,0 +1,445 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/*!\file + * \brief Describes the decoder algorithm interface for algorithm + * implementations. + * + * This file defines the private structures and data types that are only + * relevant to implementing an algorithm, as opposed to using it. + * + * To create a decoder algorithm class, an interface structure is put + * into the global namespace: + *
+ *     my_codec.c:
+ *       vpx_codec_iface_t my_codec = {
+ *           "My Codec v1.0",
+ *           VPX_CODEC_ALG_ABI_VERSION,
+ *           ...
+ *       };
+ *     
+ * + * An application instantiates a specific decoder instance by using + * vpx_codec_init() and a pointer to the algorithm's interface structure: + *
+ *     my_app.c:
+ *       extern vpx_codec_iface_t my_codec;
+ *       {
+ *           vpx_codec_ctx_t algo;
+ *           res = vpx_codec_init(&algo, &my_codec);
+ *       }
+ *     
+ * + * Once initialized, the instance is manged using other functions from + * the vpx_codec_* family. + */ +#ifndef VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ +#define VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ +#include "../vpx_decoder.h" +#include "../vpx_encoder.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/*!\brief Current ABI version number + * + * \internal + * If this file is altered in any way that changes the ABI, this value + * must be bumped. Examples include, but are not limited to, changing + * types, removing or reassigning enums, adding/removing/rearranging + * fields to structures + */ +#define VPX_CODEC_INTERNAL_ABI_VERSION (5) /**<\hideinitializer*/ + +typedef struct vpx_codec_alg_priv vpx_codec_alg_priv_t; +typedef struct vpx_codec_priv_enc_mr_cfg vpx_codec_priv_enc_mr_cfg_t; + +/*!\brief init function pointer prototype + * + * Performs algorithm-specific initialization of the decoder context. This + * function is called by the generic vpx_codec_init() wrapper function, so + * plugins implementing this interface may trust the input parameters to be + * properly initialized. + * + * \param[in] ctx Pointer to this instance's context + * \retval #VPX_CODEC_OK + * The input stream was recognized and decoder initialized. + * \retval #VPX_CODEC_MEM_ERROR + * Memory operation failed. + */ +typedef vpx_codec_err_t (*vpx_codec_init_fn_t)(vpx_codec_ctx_t *ctx, + vpx_codec_priv_enc_mr_cfg_t *data); + +/*!\brief destroy function pointer prototype + * + * Performs algorithm-specific destruction of the decoder context. This + * function is called by the generic vpx_codec_destroy() wrapper function, + * so plugins implementing this interface may trust the input parameters + * to be properly initialized. + * + * \param[in] ctx Pointer to this instance's context + * \retval #VPX_CODEC_OK + * The input stream was recognized and decoder initialized. + * \retval #VPX_CODEC_MEM_ERROR + * Memory operation failed. + */ +typedef vpx_codec_err_t (*vpx_codec_destroy_fn_t)(vpx_codec_alg_priv_t *ctx); + +/*!\brief parse stream info function pointer prototype + * + * Performs high level parsing of the bitstream. This function is called by the + * generic vpx_codec_peek_stream_info() wrapper function, so plugins + * implementing this interface may trust the input parameters to be properly + * initialized. + * + * \param[in] data Pointer to a block of data to parse + * \param[in] data_sz Size of the data buffer + * \param[in,out] si Pointer to stream info to update. The size member + * \ref MUST be properly initialized, but \ref MAY be + * clobbered by the algorithm. This parameter \ref MAY + * be NULL. + * + * \retval #VPX_CODEC_OK + * Bitstream is parsable and stream information updated + */ +typedef vpx_codec_err_t (*vpx_codec_peek_si_fn_t)(const uint8_t *data, + unsigned int data_sz, + vpx_codec_stream_info_t *si); + +/*!\brief Return information about the current stream. + * + * Returns information about the stream that has been parsed during decoding. + * + * \param[in] ctx Pointer to this instance's context + * \param[in,out] si Pointer to stream info to update. The size member + * \ref MUST be properly initialized, but \ref MAY be + * clobbered by the algorithm. This parameter \ref MAY + * be NULL. + * + * \retval #VPX_CODEC_OK + * Bitstream is parsable and stream information updated + */ +typedef vpx_codec_err_t (*vpx_codec_get_si_fn_t)(vpx_codec_alg_priv_t *ctx, + vpx_codec_stream_info_t *si); + +/*!\brief control function pointer prototype + * + * This function is used to exchange algorithm specific data with the decoder + * instance. This can be used to implement features specific to a particular + * algorithm. + * + * This function is called by the generic vpx_codec_control() wrapper + * function, so plugins implementing this interface may trust the input + * parameters to be properly initialized. However, this interface does not + * provide type safety for the exchanged data or assign meanings to the + * control codes. Those details should be specified in the algorithm's + * header file. In particular, the ctrl_id parameter is guaranteed to exist + * in the algorithm's control mapping table, and the data parameter may be NULL. + * + * + * \param[in] ctx Pointer to this instance's context + * \param[in] ctrl_id Algorithm specific control identifier + * \param[in,out] data Data to exchange with algorithm instance. + * + * \retval #VPX_CODEC_OK + * The internal state data was deserialized. + */ +typedef vpx_codec_err_t (*vpx_codec_control_fn_t)(vpx_codec_alg_priv_t *ctx, + va_list ap); + +/*!\brief control function pointer mapping + * + * This structure stores the mapping between control identifiers and + * implementing functions. Each algorithm provides a list of these + * mappings. This list is searched by the vpx_codec_control() wrapper + * function to determine which function to invoke. The special + * value {0, NULL} is used to indicate end-of-list, and must be + * present. The special value {0, } can be used as a catch-all + * mapping. This implies that ctrl_id values chosen by the algorithm + * \ref MUST be non-zero. + */ +typedef const struct vpx_codec_ctrl_fn_map { + int ctrl_id; + vpx_codec_control_fn_t fn; +} vpx_codec_ctrl_fn_map_t; + +/*!\brief decode data function pointer prototype + * + * Processes a buffer of coded data. If the processing results in a new + * decoded frame becoming available, #VPX_CODEC_CB_PUT_SLICE and + * #VPX_CODEC_CB_PUT_FRAME events are generated as appropriate. This + * function is called by the generic vpx_codec_decode() wrapper function, + * so plugins implementing this interface may trust the input parameters + * to be properly initialized. + * + * \param[in] ctx Pointer to this instance's context + * \param[in] data Pointer to this block of new coded data. If + * NULL, a #VPX_CODEC_CB_PUT_FRAME event is posted + * for the previously decoded frame. + * \param[in] data_sz Size of the coded data, in bytes. + * + * \return Returns #VPX_CODEC_OK if the coded data was processed completely + * and future pictures can be decoded without error. Otherwise, + * see the descriptions of the other error codes in ::vpx_codec_err_t + * for recoverability capabilities. + */ +typedef vpx_codec_err_t (*vpx_codec_decode_fn_t)(vpx_codec_alg_priv_t *ctx, + const uint8_t *data, + unsigned int data_sz, + void *user_priv, + long deadline); + +/*!\brief Decoded frames iterator + * + * Iterates over a list of the frames available for display. The iterator + * storage should be initialized to NULL to start the iteration. Iteration is + * complete when this function returns NULL. + * + * The list of available frames becomes valid upon completion of the + * vpx_codec_decode call, and remains valid until the next call to vpx_codec_decode. + * + * \param[in] ctx Pointer to this instance's context + * \param[in out] iter Iterator storage, initialized to NULL + * + * \return Returns a pointer to an image, if one is ready for display. Frames + * produced will always be in PTS (presentation time stamp) order. + */ +typedef vpx_image_t *(*vpx_codec_get_frame_fn_t)(vpx_codec_alg_priv_t *ctx, + vpx_codec_iter_t *iter); + +/*!\brief Pass in external frame buffers for the decoder to use. + * + * Registers functions to be called when libvpx needs a frame buffer + * to decode the current frame and a function to be called when libvpx does + * not internally reference the frame buffer. This set function must + * be called before the first call to decode or libvpx will assume the + * default behavior of allocating frame buffers internally. + * + * \param[in] ctx Pointer to this instance's context + * \param[in] cb_get Pointer to the get callback function + * \param[in] cb_release Pointer to the release callback function + * \param[in] cb_priv Callback's private data + * + * \retval #VPX_CODEC_OK + * External frame buffers will be used by libvpx. + * \retval #VPX_CODEC_INVALID_PARAM + * One or more of the callbacks were NULL. + * \retval #VPX_CODEC_ERROR + * Decoder context not initialized, or algorithm not capable of + * using external frame buffers. + * + * \note + * When decoding VP9, the application may be required to pass in at least + * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame + * buffers. + */ +typedef vpx_codec_err_t (*vpx_codec_set_fb_fn_t)( + vpx_codec_alg_priv_t *ctx, + vpx_get_frame_buffer_cb_fn_t cb_get, + vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv); + + +typedef vpx_codec_err_t (*vpx_codec_encode_fn_t)(vpx_codec_alg_priv_t *ctx, + const vpx_image_t *img, + vpx_codec_pts_t pts, + unsigned long duration, + vpx_enc_frame_flags_t flags, + unsigned long deadline); +typedef const vpx_codec_cx_pkt_t *(*vpx_codec_get_cx_data_fn_t)(vpx_codec_alg_priv_t *ctx, + vpx_codec_iter_t *iter); + +typedef vpx_codec_err_t +(*vpx_codec_enc_config_set_fn_t)(vpx_codec_alg_priv_t *ctx, + const vpx_codec_enc_cfg_t *cfg); +typedef vpx_fixed_buf_t * +(*vpx_codec_get_global_headers_fn_t)(vpx_codec_alg_priv_t *ctx); + +typedef vpx_image_t * +(*vpx_codec_get_preview_frame_fn_t)(vpx_codec_alg_priv_t *ctx); + +typedef vpx_codec_err_t +(*vpx_codec_enc_mr_get_mem_loc_fn_t)(const vpx_codec_enc_cfg_t *cfg, + void **mem_loc); + +/*!\brief usage configuration mapping + * + * This structure stores the mapping between usage identifiers and + * configuration structures. Each algorithm provides a list of these + * mappings. This list is searched by the vpx_codec_enc_config_default() + * wrapper function to determine which config to return. The special value + * {-1, {0}} is used to indicate end-of-list, and must be present. At least + * one mapping must be present, in addition to the end-of-list. + * + */ +typedef const struct vpx_codec_enc_cfg_map { + int usage; + vpx_codec_enc_cfg_t cfg; +} vpx_codec_enc_cfg_map_t; + +/*!\brief Decoder algorithm interface interface + * + * All decoders \ref MUST expose a variable of this type. + */ +struct vpx_codec_iface { + const char *name; /**< Identification String */ + int abi_version; /**< Implemented ABI version */ + vpx_codec_caps_t caps; /**< Decoder capabilities */ + vpx_codec_init_fn_t init; /**< \copydoc ::vpx_codec_init_fn_t */ + vpx_codec_destroy_fn_t destroy; /**< \copydoc ::vpx_codec_destroy_fn_t */ + vpx_codec_ctrl_fn_map_t *ctrl_maps; /**< \copydoc ::vpx_codec_ctrl_fn_map_t */ + struct vpx_codec_dec_iface { + vpx_codec_peek_si_fn_t peek_si; /**< \copydoc ::vpx_codec_peek_si_fn_t */ + vpx_codec_get_si_fn_t get_si; /**< \copydoc ::vpx_codec_get_si_fn_t */ + vpx_codec_decode_fn_t decode; /**< \copydoc ::vpx_codec_decode_fn_t */ + vpx_codec_get_frame_fn_t get_frame; /**< \copydoc ::vpx_codec_get_frame_fn_t */ + vpx_codec_set_fb_fn_t set_fb_fn; /**< \copydoc ::vpx_codec_set_fb_fn_t */ + } dec; + struct vpx_codec_enc_iface { + int cfg_map_count; + vpx_codec_enc_cfg_map_t *cfg_maps; /**< \copydoc ::vpx_codec_enc_cfg_map_t */ + vpx_codec_encode_fn_t encode; /**< \copydoc ::vpx_codec_encode_fn_t */ + vpx_codec_get_cx_data_fn_t get_cx_data; /**< \copydoc ::vpx_codec_get_cx_data_fn_t */ + vpx_codec_enc_config_set_fn_t cfg_set; /**< \copydoc ::vpx_codec_enc_config_set_fn_t */ + vpx_codec_get_global_headers_fn_t get_glob_hdrs; /**< \copydoc ::vpx_codec_get_global_headers_fn_t */ + vpx_codec_get_preview_frame_fn_t get_preview; /**< \copydoc ::vpx_codec_get_preview_frame_fn_t */ + vpx_codec_enc_mr_get_mem_loc_fn_t mr_get_mem_loc; /**< \copydoc ::vpx_codec_enc_mr_get_mem_loc_fn_t */ + } enc; +}; + +/*!\brief Callback function pointer / user data pair storage */ +typedef struct vpx_codec_priv_cb_pair { + union { + vpx_codec_put_frame_cb_fn_t put_frame; + vpx_codec_put_slice_cb_fn_t put_slice; + } u; + void *user_priv; +} vpx_codec_priv_cb_pair_t; + + +/*!\brief Instance private storage + * + * This structure is allocated by the algorithm's init function. It can be + * extended in one of two ways. First, a second, algorithm specific structure + * can be allocated and the priv member pointed to it. Alternatively, this + * structure can be made the first member of the algorithm specific structure, + * and the pointer cast to the proper type. + */ +struct vpx_codec_priv { + const char *err_detail; + vpx_codec_flags_t init_flags; + struct { + vpx_codec_priv_cb_pair_t put_frame_cb; + vpx_codec_priv_cb_pair_t put_slice_cb; + } dec; + struct { + vpx_fixed_buf_t cx_data_dst_buf; + unsigned int cx_data_pad_before; + unsigned int cx_data_pad_after; + vpx_codec_cx_pkt_t cx_data_pkt; + unsigned int total_encoders; + } enc; +}; + +/* + * Multi-resolution encoding internal configuration + */ +struct vpx_codec_priv_enc_mr_cfg +{ + unsigned int mr_total_resolutions; + unsigned int mr_encoder_id; + struct vpx_rational mr_down_sampling_factor; + void* mr_low_res_mode_info; +}; + +#undef VPX_CTRL_USE_TYPE +#define VPX_CTRL_USE_TYPE(id, typ) \ + static VPX_INLINE typ id##__value(va_list args) {return va_arg(args, typ);} + +#undef VPX_CTRL_USE_TYPE_DEPRECATED +#define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) \ + static VPX_INLINE typ id##__value(va_list args) {return va_arg(args, typ);} + +#define CAST(id, arg) id##__value(arg) + +/* CODEC_INTERFACE convenience macro + * + * By convention, each codec interface is a struct with extern linkage, where + * the symbol is suffixed with _algo. A getter function is also defined to + * return a pointer to the struct, since in some cases it's easier to work + * with text symbols than data symbols (see issue #169). This function has + * the same name as the struct, less the _algo suffix. The CODEC_INTERFACE + * macro is provided to define this getter function automatically. + */ +#define CODEC_INTERFACE(id)\ + vpx_codec_iface_t* id(void) { return &id##_algo; }\ + vpx_codec_iface_t id##_algo + + +/* Internal Utility Functions + * + * The following functions are intended to be used inside algorithms as + * utilities for manipulating vpx_codec_* data structures. + */ +struct vpx_codec_pkt_list { + unsigned int cnt; + unsigned int max; + struct vpx_codec_cx_pkt pkts[1]; +}; + +#define vpx_codec_pkt_list_decl(n)\ + union {struct vpx_codec_pkt_list head;\ + struct {struct vpx_codec_pkt_list head;\ + struct vpx_codec_cx_pkt pkts[n];} alloc;} + +#define vpx_codec_pkt_list_init(m)\ + (m)->alloc.head.cnt = 0,\ + (m)->alloc.head.max = sizeof((m)->alloc.pkts) / sizeof((m)->alloc.pkts[0]) + +int +vpx_codec_pkt_list_add(struct vpx_codec_pkt_list *, + const struct vpx_codec_cx_pkt *); + +const vpx_codec_cx_pkt_t * +vpx_codec_pkt_list_get(struct vpx_codec_pkt_list *list, + vpx_codec_iter_t *iter); + + +#include +#include + +struct vpx_internal_error_info { + vpx_codec_err_t error_code; + int has_detail; + char detail[80]; + int setjmp; + jmp_buf jmp; +}; + +#define CLANG_ANALYZER_NORETURN +#if defined(__has_feature) +#if __has_feature(attribute_analyzer_noreturn) +#undef CLANG_ANALYZER_NORETURN +#define CLANG_ANALYZER_NORETURN __attribute__((analyzer_noreturn)) +#endif +#endif + +void vpx_internal_error(struct vpx_internal_error_info *info, + vpx_codec_err_t error, + const char *fmt, + ...) CLANG_ANALYZER_NORETURN; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ diff --git a/thirdparty/libvpx/vpx/internal/vpx_psnr.h b/thirdparty/libvpx/vpx/internal/vpx_psnr.h new file mode 100644 index 00000000000..07d81bb8d90 --- /dev/null +++ b/thirdparty/libvpx/vpx/internal/vpx_psnr.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_INTERNAL_VPX_PSNR_H_ +#define VPX_INTERNAL_VPX_PSNR_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t + +/*!\brief Converts SSE to PSNR + * + * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR). + * + * \param[in] samples Number of samples + * \param[in] peak Max sample value + * \param[in] sse Sum of squared errors + */ +double vpx_sse_to_psnr(double samples, double peak, double sse); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_INTERNAL_VPX_PSNR_H_ diff --git a/thirdparty/libvpx/vpx/src/vpx_codec.c b/thirdparty/libvpx/vpx/src/vpx_codec.c new file mode 100644 index 00000000000..5a495ce814b --- /dev/null +++ b/thirdparty/libvpx/vpx/src/vpx_codec.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/*!\file + * \brief Provides the high level interface to wrap decoder algorithms. + * + */ +#include +#include +#include "vpx/vpx_integer.h" +#include "vpx/internal/vpx_codec_internal.h" +#include "vpx_version.h" + +#define SAVE_STATUS(ctx,var) (ctx?(ctx->err = var):var) + +int vpx_codec_version(void) { + return VERSION_PACKED; +} + + +const char *vpx_codec_version_str(void) { + return VERSION_STRING_NOSP; +} + + +const char *vpx_codec_version_extra_str(void) { + return VERSION_EXTRA; +} + + +const char *vpx_codec_iface_name(vpx_codec_iface_t *iface) { + return iface ? iface->name : ""; +} + +const char *vpx_codec_err_to_string(vpx_codec_err_t err) { + switch (err) { + case VPX_CODEC_OK: + return "Success"; + case VPX_CODEC_ERROR: + return "Unspecified internal error"; + case VPX_CODEC_MEM_ERROR: + return "Memory allocation error"; + case VPX_CODEC_ABI_MISMATCH: + return "ABI version mismatch"; + case VPX_CODEC_INCAPABLE: + return "Codec does not implement requested capability"; + case VPX_CODEC_UNSUP_BITSTREAM: + return "Bitstream not supported by this decoder"; + case VPX_CODEC_UNSUP_FEATURE: + return "Bitstream required feature not supported by this decoder"; + case VPX_CODEC_CORRUPT_FRAME: + return "Corrupt frame detected"; + case VPX_CODEC_INVALID_PARAM: + return "Invalid parameter"; + case VPX_CODEC_LIST_END: + return "End of iterated list"; + } + + return "Unrecognized error code"; +} + +const char *vpx_codec_error(vpx_codec_ctx_t *ctx) { + return (ctx) ? vpx_codec_err_to_string(ctx->err) + : vpx_codec_err_to_string(VPX_CODEC_INVALID_PARAM); +} + +const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx) { + if (ctx && ctx->err) + return ctx->priv ? ctx->priv->err_detail : ctx->err_detail; + + return NULL; +} + + +vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx) { + vpx_codec_err_t res; + + if (!ctx) + res = VPX_CODEC_INVALID_PARAM; + else if (!ctx->iface || !ctx->priv) + res = VPX_CODEC_ERROR; + else { + ctx->iface->destroy((vpx_codec_alg_priv_t *)ctx->priv); + + ctx->iface = NULL; + ctx->name = NULL; + ctx->priv = NULL; + res = VPX_CODEC_OK; + } + + return SAVE_STATUS(ctx, res); +} + + +vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface) { + return (iface) ? iface->caps : 0; +} + + +vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx, + int ctrl_id, + ...) { + vpx_codec_err_t res; + + if (!ctx || !ctrl_id) + res = VPX_CODEC_INVALID_PARAM; + else if (!ctx->iface || !ctx->priv || !ctx->iface->ctrl_maps) + res = VPX_CODEC_ERROR; + else { + vpx_codec_ctrl_fn_map_t *entry; + + res = VPX_CODEC_ERROR; + + for (entry = ctx->iface->ctrl_maps; entry && entry->fn; entry++) { + if (!entry->ctrl_id || entry->ctrl_id == ctrl_id) { + va_list ap; + + va_start(ap, ctrl_id); + res = entry->fn((vpx_codec_alg_priv_t *)ctx->priv, ap); + va_end(ap); + break; + } + } + } + + return SAVE_STATUS(ctx, res); +} + +void vpx_internal_error(struct vpx_internal_error_info *info, + vpx_codec_err_t error, + const char *fmt, + ...) { + va_list ap; + + info->error_code = error; + info->has_detail = 0; + + if (fmt) { + size_t sz = sizeof(info->detail); + + info->has_detail = 1; + va_start(ap, fmt); + vsnprintf(info->detail, sz - 1, fmt, ap); + va_end(ap); + info->detail[sz - 1] = '\0'; + } + + if (info->setjmp) + longjmp(info->jmp, info->error_code); +} diff --git a/thirdparty/libvpx/vpx/src/vpx_decoder.c b/thirdparty/libvpx/vpx/src/vpx_decoder.c new file mode 100644 index 00000000000..802d8edd8a4 --- /dev/null +++ b/thirdparty/libvpx/vpx/src/vpx_decoder.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/*!\file + * \brief Provides the high level interface to wrap decoder algorithms. + * + */ +#include +#include "vpx/internal/vpx_codec_internal.h" + +#define SAVE_STATUS(ctx,var) (ctx?(ctx->err = var):var) + +static vpx_codec_alg_priv_t *get_alg_priv(vpx_codec_ctx_t *ctx) { + return (vpx_codec_alg_priv_t *)ctx->priv; +} + +vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx, + vpx_codec_iface_t *iface, + const vpx_codec_dec_cfg_t *cfg, + vpx_codec_flags_t flags, + int ver) { + vpx_codec_err_t res; + + if (ver != VPX_DECODER_ABI_VERSION) + res = VPX_CODEC_ABI_MISMATCH; + else if (!ctx || !iface) + res = VPX_CODEC_INVALID_PARAM; + else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION) + res = VPX_CODEC_ABI_MISMATCH; + else if ((flags & VPX_CODEC_USE_POSTPROC) && !(iface->caps & VPX_CODEC_CAP_POSTPROC)) + res = VPX_CODEC_INCAPABLE; + else if ((flags & VPX_CODEC_USE_ERROR_CONCEALMENT) && + !(iface->caps & VPX_CODEC_CAP_ERROR_CONCEALMENT)) + res = VPX_CODEC_INCAPABLE; + else if ((flags & VPX_CODEC_USE_INPUT_FRAGMENTS) && + !(iface->caps & VPX_CODEC_CAP_INPUT_FRAGMENTS)) + res = VPX_CODEC_INCAPABLE; + else if (!(iface->caps & VPX_CODEC_CAP_DECODER)) + res = VPX_CODEC_INCAPABLE; + else { + memset(ctx, 0, sizeof(*ctx)); + ctx->iface = iface; + ctx->name = iface->name; + ctx->priv = NULL; + ctx->init_flags = flags; + ctx->config.dec = cfg; + + res = ctx->iface->init(ctx, NULL); + if (res) { + ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL; + vpx_codec_destroy(ctx); + } + } + + return SAVE_STATUS(ctx, res); +} + + +vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t *iface, + const uint8_t *data, + unsigned int data_sz, + vpx_codec_stream_info_t *si) { + vpx_codec_err_t res; + + if (!iface || !data || !data_sz || !si + || si->sz < sizeof(vpx_codec_stream_info_t)) + res = VPX_CODEC_INVALID_PARAM; + else { + /* Set default/unknown values */ + si->w = 0; + si->h = 0; + + res = iface->dec.peek_si(data, data_sz, si); + } + + return res; +} + + +vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx, + vpx_codec_stream_info_t *si) { + vpx_codec_err_t res; + + if (!ctx || !si || si->sz < sizeof(vpx_codec_stream_info_t)) + res = VPX_CODEC_INVALID_PARAM; + else if (!ctx->iface || !ctx->priv) + res = VPX_CODEC_ERROR; + else { + /* Set default/unknown values */ + si->w = 0; + si->h = 0; + + res = ctx->iface->dec.get_si(get_alg_priv(ctx), si); + } + + return SAVE_STATUS(ctx, res); +} + + +vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, + const uint8_t *data, + unsigned int data_sz, + void *user_priv, + long deadline) { + vpx_codec_err_t res; + + /* Sanity checks */ + /* NULL data ptr allowed if data_sz is 0 too */ + if (!ctx || (!data && data_sz) || (data && !data_sz)) + res = VPX_CODEC_INVALID_PARAM; + else if (!ctx->iface || !ctx->priv) + res = VPX_CODEC_ERROR; + else { + res = ctx->iface->dec.decode(get_alg_priv(ctx), data, data_sz, user_priv, + deadline); + } + + return SAVE_STATUS(ctx, res); +} + +vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx, + vpx_codec_iter_t *iter) { + vpx_image_t *img; + + if (!ctx || !iter || !ctx->iface || !ctx->priv) + img = NULL; + else + img = ctx->iface->dec.get_frame(get_alg_priv(ctx), iter); + + return img; +} + + +vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx, + vpx_codec_put_frame_cb_fn_t cb, + void *user_priv) { + vpx_codec_err_t res; + + if (!ctx || !cb) + res = VPX_CODEC_INVALID_PARAM; + else if (!ctx->iface || !ctx->priv + || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME)) + res = VPX_CODEC_ERROR; + else { + ctx->priv->dec.put_frame_cb.u.put_frame = cb; + ctx->priv->dec.put_frame_cb.user_priv = user_priv; + res = VPX_CODEC_OK; + } + + return SAVE_STATUS(ctx, res); +} + + +vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx, + vpx_codec_put_slice_cb_fn_t cb, + void *user_priv) { + vpx_codec_err_t res; + + if (!ctx || !cb) + res = VPX_CODEC_INVALID_PARAM; + else if (!ctx->iface || !ctx->priv + || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_SLICE)) + res = VPX_CODEC_ERROR; + else { + ctx->priv->dec.put_slice_cb.u.put_slice = cb; + ctx->priv->dec.put_slice_cb.user_priv = user_priv; + res = VPX_CODEC_OK; + } + + return SAVE_STATUS(ctx, res); +} + +vpx_codec_err_t vpx_codec_set_frame_buffer_functions( + vpx_codec_ctx_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get, + vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { + vpx_codec_err_t res; + + if (!ctx || !cb_get || !cb_release) { + res = VPX_CODEC_INVALID_PARAM; + } else if (!ctx->iface || !ctx->priv || + !(ctx->iface->caps & VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER)) { + res = VPX_CODEC_ERROR; + } else { + res = ctx->iface->dec.set_fb_fn(get_alg_priv(ctx), cb_get, cb_release, + cb_priv); + } + + return SAVE_STATUS(ctx, res); +} diff --git a/thirdparty/libvpx/vpx/src/vpx_image.c b/thirdparty/libvpx/vpx/src/vpx_image.c new file mode 100644 index 00000000000..9aae12c794b --- /dev/null +++ b/thirdparty/libvpx/vpx/src/vpx_image.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "vpx/vpx_image.h" +#include "vpx/vpx_integer.h" +#include "vpx_mem/vpx_mem.h" + +static vpx_image_t *img_alloc_helper(vpx_image_t *img, + vpx_img_fmt_t fmt, + unsigned int d_w, + unsigned int d_h, + unsigned int buf_align, + unsigned int stride_align, + unsigned char *img_data) { + unsigned int h, w, s, xcs, ycs, bps; + unsigned int stride_in_bytes; + int align; + + /* Treat align==0 like align==1 */ + if (!buf_align) + buf_align = 1; + + /* Validate alignment (must be power of 2) */ + if (buf_align & (buf_align - 1)) + goto fail; + + /* Treat align==0 like align==1 */ + if (!stride_align) + stride_align = 1; + + /* Validate alignment (must be power of 2) */ + if (stride_align & (stride_align - 1)) + goto fail; + + /* Get sample size for this format */ + switch (fmt) { + case VPX_IMG_FMT_RGB32: + case VPX_IMG_FMT_RGB32_LE: + case VPX_IMG_FMT_ARGB: + case VPX_IMG_FMT_ARGB_LE: + bps = 32; + break; + case VPX_IMG_FMT_RGB24: + case VPX_IMG_FMT_BGR24: + bps = 24; + break; + case VPX_IMG_FMT_RGB565: + case VPX_IMG_FMT_RGB565_LE: + case VPX_IMG_FMT_RGB555: + case VPX_IMG_FMT_RGB555_LE: + case VPX_IMG_FMT_UYVY: + case VPX_IMG_FMT_YUY2: + case VPX_IMG_FMT_YVYU: + bps = 16; + break; + case VPX_IMG_FMT_I420: + case VPX_IMG_FMT_YV12: + case VPX_IMG_FMT_VPXI420: + case VPX_IMG_FMT_VPXYV12: + bps = 12; + break; + case VPX_IMG_FMT_I422: + case VPX_IMG_FMT_I440: + bps = 16; + break; + case VPX_IMG_FMT_I444: + bps = 24; + break; + case VPX_IMG_FMT_I42016: + bps = 24; + break; + case VPX_IMG_FMT_I42216: + case VPX_IMG_FMT_I44016: + bps = 32; + break; + case VPX_IMG_FMT_I44416: + bps = 48; + break; + default: + bps = 16; + break; + } + + /* Get chroma shift values for this format */ + switch (fmt) { + case VPX_IMG_FMT_I420: + case VPX_IMG_FMT_YV12: + case VPX_IMG_FMT_VPXI420: + case VPX_IMG_FMT_VPXYV12: + case VPX_IMG_FMT_I422: + case VPX_IMG_FMT_I42016: + case VPX_IMG_FMT_I42216: + xcs = 1; + break; + default: + xcs = 0; + break; + } + + switch (fmt) { + case VPX_IMG_FMT_I420: + case VPX_IMG_FMT_I440: + case VPX_IMG_FMT_YV12: + case VPX_IMG_FMT_VPXI420: + case VPX_IMG_FMT_VPXYV12: + case VPX_IMG_FMT_I42016: + case VPX_IMG_FMT_I44016: + ycs = 1; + break; + default: + ycs = 0; + break; + } + + /* Calculate storage sizes given the chroma subsampling */ + align = (1 << xcs) - 1; + w = (d_w + align) & ~align; + align = (1 << ycs) - 1; + h = (d_h + align) & ~align; + s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8; + s = (s + stride_align - 1) & ~(stride_align - 1); + stride_in_bytes = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s; + + /* Allocate the new image */ + if (!img) { + img = (vpx_image_t *)calloc(1, sizeof(vpx_image_t)); + + if (!img) + goto fail; + + img->self_allocd = 1; + } else { + memset(img, 0, sizeof(vpx_image_t)); + } + + img->img_data = img_data; + + if (!img_data) { + const uint64_t alloc_size = (fmt & VPX_IMG_FMT_PLANAR) ? + (uint64_t)h * s * bps / 8 : (uint64_t)h * s; + + if (alloc_size != (size_t)alloc_size) + goto fail; + + img->img_data = (uint8_t *)vpx_memalign(buf_align, (size_t)alloc_size); + img->img_data_owner = 1; + } + + if (!img->img_data) + goto fail; + + img->fmt = fmt; + img->bit_depth = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 16 : 8; + img->w = w; + img->h = h; + img->x_chroma_shift = xcs; + img->y_chroma_shift = ycs; + img->bps = bps; + + /* Calculate strides */ + img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = stride_in_bytes; + img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = stride_in_bytes >> xcs; + + /* Default viewport to entire image */ + if (!vpx_img_set_rect(img, 0, 0, d_w, d_h)) + return img; + +fail: + vpx_img_free(img); + return NULL; +} + +vpx_image_t *vpx_img_alloc(vpx_image_t *img, + vpx_img_fmt_t fmt, + unsigned int d_w, + unsigned int d_h, + unsigned int align) { + return img_alloc_helper(img, fmt, d_w, d_h, align, align, NULL); +} + +vpx_image_t *vpx_img_wrap(vpx_image_t *img, + vpx_img_fmt_t fmt, + unsigned int d_w, + unsigned int d_h, + unsigned int stride_align, + unsigned char *img_data) { + /* By setting buf_align = 1, we don't change buffer alignment in this + * function. */ + return img_alloc_helper(img, fmt, d_w, d_h, 1, stride_align, img_data); +} + +int vpx_img_set_rect(vpx_image_t *img, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h) { + unsigned char *data; + + if (x + w <= img->w && y + h <= img->h) { + img->d_w = w; + img->d_h = h; + + /* Calculate plane pointers */ + if (!(img->fmt & VPX_IMG_FMT_PLANAR)) { + img->planes[VPX_PLANE_PACKED] = + img->img_data + x * img->bps / 8 + y * img->stride[VPX_PLANE_PACKED]; + } else { + const int bytes_per_sample = + (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1; + data = img->img_data; + + if (img->fmt & VPX_IMG_FMT_HAS_ALPHA) { + img->planes[VPX_PLANE_ALPHA] = + data + x * bytes_per_sample + y * img->stride[VPX_PLANE_ALPHA]; + data += img->h * img->stride[VPX_PLANE_ALPHA]; + } + + img->planes[VPX_PLANE_Y] = data + x * bytes_per_sample + + y * img->stride[VPX_PLANE_Y]; + data += img->h * img->stride[VPX_PLANE_Y]; + + if (!(img->fmt & VPX_IMG_FMT_UV_FLIP)) { + img->planes[VPX_PLANE_U] = + data + (x >> img->x_chroma_shift) * bytes_per_sample + + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; + data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; + img->planes[VPX_PLANE_V] = + data + (x >> img->x_chroma_shift) * bytes_per_sample + + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; + } else { + img->planes[VPX_PLANE_V] = + data + (x >> img->x_chroma_shift) * bytes_per_sample + + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; + data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; + img->planes[VPX_PLANE_U] = + data + (x >> img->x_chroma_shift) * bytes_per_sample + + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; + } + } + return 0; + } + return -1; +} + +void vpx_img_flip(vpx_image_t *img) { + /* Note: In the calculation pointer adjustment calculation, we want the + * rhs to be promoted to a signed type. Section 6.3.1.8 of the ISO C99 + * standard indicates that if the adjustment parameter is unsigned, the + * stride parameter will be promoted to unsigned, causing errors when + * the lhs is a larger type than the rhs. + */ + img->planes[VPX_PLANE_Y] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_Y]; + img->stride[VPX_PLANE_Y] = -img->stride[VPX_PLANE_Y]; + + img->planes[VPX_PLANE_U] += (signed)((img->d_h >> img->y_chroma_shift) - 1) + * img->stride[VPX_PLANE_U]; + img->stride[VPX_PLANE_U] = -img->stride[VPX_PLANE_U]; + + img->planes[VPX_PLANE_V] += (signed)((img->d_h >> img->y_chroma_shift) - 1) + * img->stride[VPX_PLANE_V]; + img->stride[VPX_PLANE_V] = -img->stride[VPX_PLANE_V]; + + img->planes[VPX_PLANE_ALPHA] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_ALPHA]; + img->stride[VPX_PLANE_ALPHA] = -img->stride[VPX_PLANE_ALPHA]; +} + +void vpx_img_free(vpx_image_t *img) { + if (img) { + if (img->img_data && img->img_data_owner) + vpx_free(img->img_data); + + if (img->self_allocd) + free(img); + } +} diff --git a/thirdparty/libvpx/vpx/src/vpx_psnr.c b/thirdparty/libvpx/vpx/src/vpx_psnr.c new file mode 100644 index 00000000000..05843acb61f --- /dev/null +++ b/thirdparty/libvpx/vpx/src/vpx_psnr.c @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vpx/internal/vpx_psnr.h" + +#define MAX_PSNR 100.0 + +double vpx_sse_to_psnr(double samples, double peak, double sse) { + if (sse > 0.0) { + const double psnr = 10.0 * log10(samples * peak * peak / sse); + return psnr > MAX_PSNR ? MAX_PSNR : psnr; + } else { + return MAX_PSNR; + } +} diff --git a/thirdparty/libvpx/vpx/vp8.h b/thirdparty/libvpx/vpx/vp8.h new file mode 100644 index 00000000000..8a035f97707 --- /dev/null +++ b/thirdparty/libvpx/vpx/vp8.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/*!\defgroup vp8 VP8 + * \ingroup codecs + * VP8 is vpx's newest video compression algorithm that uses motion + * compensated prediction, Discrete Cosine Transform (DCT) coding of the + * prediction error signal and context dependent entropy coding techniques + * based on arithmetic principles. It features: + * - YUV 4:2:0 image format + * - Macro-block based coding (16x16 luma plus two 8x8 chroma) + * - 1/4 (1/8) pixel accuracy motion compensated prediction + * - 4x4 DCT transform + * - 128 level linear quantizer + * - In loop deblocking filter + * - Context-based entropy coding + * + * @{ + */ +/*!\file + * \brief Provides controls common to both the VP8 encoder and decoder. + */ +#ifndef VPX_VP8_H_ +#define VPX_VP8_H_ + +#include "./vpx_codec.h" +#include "./vpx_image.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/*!\brief Control functions + * + * The set of macros define the control functions of VP8 interface + */ +enum vp8_com_control_id { + VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */ + VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */ + VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */ + VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */ + VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */ + VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */ + VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */ + + /* TODO(jkoleszar): The encoder incorrectly reuses some of these values (5+) + * for its control ids. These should be migrated to something like the + * VP8_DECODER_CTRL_ID_START range next time we're ready to break the ABI. + */ + VP9_GET_REFERENCE = 128, /**< get a pointer to a reference frame */ + VP8_COMMON_CTRL_ID_MAX, + VP8_DECODER_CTRL_ID_START = 256 +}; + +/*!\brief post process flags + * + * The set of macros define VP8 decoder post processing flags + */ +enum vp8_postproc_level { + VP8_NOFILTERING = 0, + VP8_DEBLOCK = 1 << 0, + VP8_DEMACROBLOCK = 1 << 1, + VP8_ADDNOISE = 1 << 2, + VP8_DEBUG_TXT_FRAME_INFO = 1 << 3, /**< print frame information */ + VP8_DEBUG_TXT_MBLK_MODES = 1 << 4, /**< print macro block modes over each macro block */ + VP8_DEBUG_TXT_DC_DIFF = 1 << 5, /**< print dc diff for each macro block */ + VP8_DEBUG_TXT_RATE_INFO = 1 << 6, /**< print video rate info (encoder only) */ + VP8_MFQE = 1 << 10 +}; + +/*!\brief post process flags + * + * This define a structure that describe the post processing settings. For + * the best objective measure (using the PSNR metric) set post_proc_flag + * to VP8_DEBLOCK and deblocking_level to 1. + */ + +typedef struct vp8_postproc_cfg { + int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */ + int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */ + int noise_level; /**< the strength of additive noise, valid range [0, 16] */ +} vp8_postproc_cfg_t; + +/*!\brief reference frame type + * + * The set of macros define the type of VP8 reference frames + */ +typedef enum vpx_ref_frame_type { + VP8_LAST_FRAME = 1, + VP8_GOLD_FRAME = 2, + VP8_ALTR_FRAME = 4 +} vpx_ref_frame_type_t; + +/*!\brief reference frame data struct + * + * Define the data struct to access vp8 reference frames. + */ +typedef struct vpx_ref_frame { + vpx_ref_frame_type_t frame_type; /**< which reference frame */ + vpx_image_t img; /**< reference frame data in image format */ +} vpx_ref_frame_t; + +/*!\brief VP9 specific reference frame data struct + * + * Define the data struct to access vp9 reference frames. + */ +typedef struct vp9_ref_frame { + int idx; /**< frame index to get (input) */ + vpx_image_t img; /**< img structure to populate (output) */ +} vp9_ref_frame_t; + +/*!\cond */ +/*!\brief vp8 decoder control function parameter type + * + * defines the data type for each of VP8 decoder control function requires + */ +VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *) +#define VPX_CTRL_VP8_SET_REFERENCE +VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *) +#define VPX_CTRL_VP8_COPY_REFERENCE +VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *) +#define VPX_CTRL_VP8_SET_POSTPROC +VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int) +#define VPX_CTRL_VP8_SET_DBG_COLOR_REF_FRAME +VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int) +#define VPX_CTRL_VP8_SET_DBG_COLOR_MB_MODES +VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int) +#define VPX_CTRL_VP8_SET_DBG_COLOR_B_MODES +VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int) +#define VPX_CTRL_VP8_SET_DBG_DISPLAY_MV +VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *) +#define VPX_CTRL_VP9_GET_REFERENCE + +/*!\endcond */ +/*! @} - end defgroup vp8 */ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VP8_H_ diff --git a/thirdparty/libvpx/vpx/vp8dx.h b/thirdparty/libvpx/vpx/vp8dx.h new file mode 100644 index 00000000000..67c97bb6c9a --- /dev/null +++ b/thirdparty/libvpx/vpx/vp8dx.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/*!\defgroup vp8_decoder WebM VP8/VP9 Decoder + * \ingroup vp8 + * + * @{ + */ +/*!\file + * \brief Provides definitions for using VP8 or VP9 within the vpx Decoder + * interface. + */ +#ifndef VPX_VP8DX_H_ +#define VPX_VP8DX_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Include controls common to both the encoder and decoder */ +#include "./vp8.h" + +/*!\name Algorithm interface for VP8 + * + * This interface provides the capability to decode VP8 streams. + * @{ + */ +extern vpx_codec_iface_t vpx_codec_vp8_dx_algo; +extern vpx_codec_iface_t *vpx_codec_vp8_dx(void); +/*!@} - end algorithm interface member group*/ + +/*!\name Algorithm interface for VP9 + * + * This interface provides the capability to decode VP9 streams. + * @{ + */ +extern vpx_codec_iface_t vpx_codec_vp9_dx_algo; +extern vpx_codec_iface_t *vpx_codec_vp9_dx(void); +/*!@} - end algorithm interface member group*/ + +/*!\enum vp8_dec_control_id + * \brief VP8 decoder control functions + * + * This set of macros define the control functions available for the VP8 + * decoder interface. + * + * \sa #vpx_codec_control + */ +enum vp8_dec_control_id { + /** control function to get info on which reference frames were updated + * by the last decode + */ + VP8D_GET_LAST_REF_UPDATES = VP8_DECODER_CTRL_ID_START, + + /** check if the indicated frame is corrupted */ + VP8D_GET_FRAME_CORRUPTED, + + /** control function to get info on which reference frames were used + * by the last decode + */ + VP8D_GET_LAST_REF_USED, + + /** decryption function to decrypt encoded buffer data immediately + * before decoding. Takes a vpx_decrypt_init, which contains + * a callback function and opaque context pointer. + */ + VPXD_SET_DECRYPTOR, + VP8D_SET_DECRYPTOR = VPXD_SET_DECRYPTOR, + + /** control function to get the dimensions that the current frame is decoded + * at. This may be different to the intended display size for the frame as + * specified in the wrapper or frame header (see VP9D_GET_DISPLAY_SIZE). */ + VP9D_GET_FRAME_SIZE, + + /** control function to get the current frame's intended display dimensions + * (as specified in the wrapper or frame header). This may be different to + * the decoded dimensions of this frame (see VP9D_GET_FRAME_SIZE). */ + VP9D_GET_DISPLAY_SIZE, + + /** control function to get the bit depth of the stream. */ + VP9D_GET_BIT_DEPTH, + + /** control function to set the byte alignment of the planes in the reference + * buffers. Valid values are power of 2, from 32 to 1024. A value of 0 sets + * legacy alignment. I.e. Y plane is aligned to 32 bytes, U plane directly + * follows Y plane, and V plane directly follows U plane. Default value is 0. + */ + VP9_SET_BYTE_ALIGNMENT, + + /** control function to invert the decoding order to from right to left. The + * function is used in a test to confirm the decoding independence of tile + * columns. The function may be used in application where this order + * of decoding is desired. + * + * TODO(yaowu): Rework the unit test that uses this control, and in a future + * release, this test-only control shall be removed. + */ + VP9_INVERT_TILE_DECODE_ORDER, + + /** control function to set the skip loop filter flag. Valid values are + * integers. The decoder will skip the loop filter when its value is set to + * nonzero. If the loop filter is skipped the decoder may accumulate decode + * artifacts. The default value is 0. + */ + VP9_SET_SKIP_LOOP_FILTER, + + VP8_DECODER_CTRL_ID_MAX +}; + +/** Decrypt n bytes of data from input -> output, using the decrypt_state + * passed in VPXD_SET_DECRYPTOR. + */ +typedef void (*vpx_decrypt_cb)(void *decrypt_state, const unsigned char *input, + unsigned char *output, int count); + +/*!\brief Structure to hold decryption state + * + * Defines a structure to hold the decryption state and access function. + */ +typedef struct vpx_decrypt_init { + /*! Decrypt callback. */ + vpx_decrypt_cb decrypt_cb; + + /*! Decryption state. */ + void *decrypt_state; +} vpx_decrypt_init; + +/*!\brief A deprecated alias for vpx_decrypt_init. + */ +typedef vpx_decrypt_init vp8_decrypt_init; + + +/*!\cond */ +/*!\brief VP8 decoder control function parameter type + * + * Defines the data types that VP8D control functions take. Note that + * additional common controls are defined in vp8.h + * + */ + + +VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *) +#define VPX_CTRL_VP8D_GET_LAST_REF_UPDATES +VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *) +#define VPX_CTRL_VP8D_GET_FRAME_CORRUPTED +VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_USED, int *) +#define VPX_CTRL_VP8D_GET_LAST_REF_USED +VPX_CTRL_USE_TYPE(VPXD_SET_DECRYPTOR, vpx_decrypt_init *) +#define VPX_CTRL_VPXD_SET_DECRYPTOR +VPX_CTRL_USE_TYPE(VP8D_SET_DECRYPTOR, vpx_decrypt_init *) +#define VPX_CTRL_VP8D_SET_DECRYPTOR +VPX_CTRL_USE_TYPE(VP9D_GET_DISPLAY_SIZE, int *) +#define VPX_CTRL_VP9D_GET_DISPLAY_SIZE +VPX_CTRL_USE_TYPE(VP9D_GET_BIT_DEPTH, unsigned int *) +#define VPX_CTRL_VP9D_GET_BIT_DEPTH +VPX_CTRL_USE_TYPE(VP9D_GET_FRAME_SIZE, int *) +#define VPX_CTRL_VP9D_GET_FRAME_SIZE +VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int) +#define VPX_CTRL_VP9_INVERT_TILE_DECODE_ORDER + +/*!\endcond */ +/*! @} - end defgroup vp8_decoder */ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VP8DX_H_ diff --git a/thirdparty/libvpx/vpx/vpx_codec.h b/thirdparty/libvpx/vpx/vpx_codec.h new file mode 100644 index 00000000000..b6037bb4d7a --- /dev/null +++ b/thirdparty/libvpx/vpx/vpx_codec.h @@ -0,0 +1,479 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/*!\defgroup codec Common Algorithm Interface + * This abstraction allows applications to easily support multiple video + * formats with minimal code duplication. This section describes the interface + * common to all codecs (both encoders and decoders). + * @{ + */ + +/*!\file + * \brief Describes the codec algorithm interface to applications. + * + * This file describes the interface between an application and a + * video codec algorithm. + * + * An application instantiates a specific codec instance by using + * vpx_codec_init() and a pointer to the algorithm's interface structure: + *
+ *     my_app.c:
+ *       extern vpx_codec_iface_t my_codec;
+ *       {
+ *           vpx_codec_ctx_t algo;
+ *           res = vpx_codec_init(&algo, &my_codec);
+ *       }
+ *     
+ * + * Once initialized, the instance is manged using other functions from + * the vpx_codec_* family. + */ +#ifndef VPX_VPX_CODEC_H_ +#define VPX_VPX_CODEC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./vpx_integer.h" +#include "./vpx_image.h" + + /*!\brief Decorator indicating a function is deprecated */ +#ifndef DEPRECATED +#if defined(__GNUC__) && __GNUC__ +#define DEPRECATED __attribute__ ((deprecated)) +#elif defined(_MSC_VER) +#define DEPRECATED +#else +#define DEPRECATED +#endif +#endif /* DEPRECATED */ + +#ifndef DECLSPEC_DEPRECATED +#if defined(__GNUC__) && __GNUC__ +#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ +#elif defined(_MSC_VER) +#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */ +#else +#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ +#endif +#endif /* DECLSPEC_DEPRECATED */ + + /*!\brief Decorator indicating a function is potentially unused */ +#ifdef UNUSED +#elif defined(__GNUC__) || defined(__clang__) +#define UNUSED __attribute__ ((unused)) +#else +#define UNUSED +#endif + + /*!\brief Current ABI version number + * + * \internal + * If this file is altered in any way that changes the ABI, this value + * must be bumped. Examples include, but are not limited to, changing + * types, removing or reassigning enums, adding/removing/rearranging + * fields to structures + */ +#define VPX_CODEC_ABI_VERSION (3 + VPX_IMAGE_ABI_VERSION) /**<\hideinitializer*/ + + /*!\brief Algorithm return codes */ + typedef enum { + /*!\brief Operation completed without error */ + VPX_CODEC_OK, + + /*!\brief Unspecified error */ + VPX_CODEC_ERROR, + + /*!\brief Memory operation failed */ + VPX_CODEC_MEM_ERROR, + + /*!\brief ABI version mismatch */ + VPX_CODEC_ABI_MISMATCH, + + /*!\brief Algorithm does not have required capability */ + VPX_CODEC_INCAPABLE, + + /*!\brief The given bitstream is not supported. + * + * The bitstream was unable to be parsed at the highest level. The decoder + * is unable to proceed. This error \ref SHOULD be treated as fatal to the + * stream. */ + VPX_CODEC_UNSUP_BITSTREAM, + + /*!\brief Encoded bitstream uses an unsupported feature + * + * The decoder does not implement a feature required by the encoder. This + * return code should only be used for features that prevent future + * pictures from being properly decoded. This error \ref MAY be treated as + * fatal to the stream or \ref MAY be treated as fatal to the current GOP. + */ + VPX_CODEC_UNSUP_FEATURE, + + /*!\brief The coded data for this stream is corrupt or incomplete + * + * There was a problem decoding the current frame. This return code + * should only be used for failures that prevent future pictures from + * being properly decoded. This error \ref MAY be treated as fatal to the + * stream or \ref MAY be treated as fatal to the current GOP. If decoding + * is continued for the current GOP, artifacts may be present. + */ + VPX_CODEC_CORRUPT_FRAME, + + /*!\brief An application-supplied parameter is not valid. + * + */ + VPX_CODEC_INVALID_PARAM, + + /*!\brief An iterator reached the end of list. + * + */ + VPX_CODEC_LIST_END + + } + vpx_codec_err_t; + + + /*! \brief Codec capabilities bitfield + * + * Each codec advertises the capabilities it supports as part of its + * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces + * or functionality, and are not required to be supported. + * + * The available flags are specified by VPX_CODEC_CAP_* defines. + */ + typedef long vpx_codec_caps_t; +#define VPX_CODEC_CAP_DECODER 0x1 /**< Is a decoder */ +#define VPX_CODEC_CAP_ENCODER 0x2 /**< Is an encoder */ + + + /*! \brief Initialization-time Feature Enabling + * + * Certain codec features must be known at initialization time, to allow for + * proper memory allocation. + * + * The available flags are specified by VPX_CODEC_USE_* defines. + */ + typedef long vpx_codec_flags_t; + + + /*!\brief Codec interface structure. + * + * Contains function pointers and other data private to the codec + * implementation. This structure is opaque to the application. + */ + typedef const struct vpx_codec_iface vpx_codec_iface_t; + + + /*!\brief Codec private data structure. + * + * Contains data private to the codec implementation. This structure is opaque + * to the application. + */ + typedef struct vpx_codec_priv vpx_codec_priv_t; + + + /*!\brief Iterator + * + * Opaque storage used for iterating over lists. + */ + typedef const void *vpx_codec_iter_t; + + + /*!\brief Codec context structure + * + * All codecs \ref MUST support this context structure fully. In general, + * this data should be considered private to the codec algorithm, and + * not be manipulated or examined by the calling application. Applications + * may reference the 'name' member to get a printable description of the + * algorithm. + */ + typedef struct vpx_codec_ctx { + const char *name; /**< Printable interface name */ + vpx_codec_iface_t *iface; /**< Interface pointers */ + vpx_codec_err_t err; /**< Last returned error */ + const char *err_detail; /**< Detailed info, if available */ + vpx_codec_flags_t init_flags; /**< Flags passed at init time */ + union { + /**< Decoder Configuration Pointer */ + const struct vpx_codec_dec_cfg *dec; + /**< Encoder Configuration Pointer */ + const struct vpx_codec_enc_cfg *enc; + const void *raw; + } config; /**< Configuration pointer aliasing union */ + vpx_codec_priv_t *priv; /**< Algorithm private storage */ + } vpx_codec_ctx_t; + + /*!\brief Bit depth for codec + * * + * This enumeration determines the bit depth of the codec. + */ + typedef enum vpx_bit_depth { + VPX_BITS_8 = 8, /**< 8 bits */ + VPX_BITS_10 = 10, /**< 10 bits */ + VPX_BITS_12 = 12, /**< 12 bits */ + } vpx_bit_depth_t; + + /* + * Library Version Number Interface + * + * For example, see the following sample return values: + * vpx_codec_version() (1<<16 | 2<<8 | 3) + * vpx_codec_version_str() "v1.2.3-rc1-16-gec6a1ba" + * vpx_codec_version_extra_str() "rc1-16-gec6a1ba" + */ + + /*!\brief Return the version information (as an integer) + * + * Returns a packed encoding of the library version number. This will only include + * the major.minor.patch component of the version number. Note that this encoded + * value should be accessed through the macros provided, as the encoding may change + * in the future. + * + */ + int vpx_codec_version(void); +#define VPX_VERSION_MAJOR(v) ((v>>16)&0xff) /**< extract major from packed version */ +#define VPX_VERSION_MINOR(v) ((v>>8)&0xff) /**< extract minor from packed version */ +#define VPX_VERSION_PATCH(v) ((v>>0)&0xff) /**< extract patch from packed version */ + + /*!\brief Return the version major number */ +#define vpx_codec_version_major() ((vpx_codec_version()>>16)&0xff) + + /*!\brief Return the version minor number */ +#define vpx_codec_version_minor() ((vpx_codec_version()>>8)&0xff) + + /*!\brief Return the version patch number */ +#define vpx_codec_version_patch() ((vpx_codec_version()>>0)&0xff) + + + /*!\brief Return the version information (as a string) + * + * Returns a printable string containing the full library version number. This may + * contain additional text following the three digit version number, as to indicate + * release candidates, prerelease versions, etc. + * + */ + const char *vpx_codec_version_str(void); + + + /*!\brief Return the version information (as a string) + * + * Returns a printable "extra string". This is the component of the string returned + * by vpx_codec_version_str() following the three digit version number. + * + */ + const char *vpx_codec_version_extra_str(void); + + + /*!\brief Return the build configuration + * + * Returns a printable string containing an encoded version of the build + * configuration. This may be useful to vpx support. + * + */ + const char *vpx_codec_build_config(void); + + + /*!\brief Return the name for a given interface + * + * Returns a human readable string for name of the given codec interface. + * + * \param[in] iface Interface pointer + * + */ + const char *vpx_codec_iface_name(vpx_codec_iface_t *iface); + + + /*!\brief Convert error number to printable string + * + * Returns a human readable string for the last error returned by the + * algorithm. The returned error will be one line and will not contain + * any newline characters. + * + * + * \param[in] err Error number. + * + */ + const char *vpx_codec_err_to_string(vpx_codec_err_t err); + + + /*!\brief Retrieve error synopsis for codec context + * + * Returns a human readable string for the last error returned by the + * algorithm. The returned error will be one line and will not contain + * any newline characters. + * + * + * \param[in] ctx Pointer to this instance's context. + * + */ + const char *vpx_codec_error(vpx_codec_ctx_t *ctx); + + + /*!\brief Retrieve detailed error information for codec context + * + * Returns a human readable string providing detailed information about + * the last error. + * + * \param[in] ctx Pointer to this instance's context. + * + * \retval NULL + * No detailed information is available. + */ + const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx); + + + /* REQUIRED FUNCTIONS + * + * The following functions are required to be implemented for all codecs. + * They represent the base case functionality expected of all codecs. + */ + + /*!\brief Destroy a codec instance + * + * Destroys a codec context, freeing any associated memory buffers. + * + * \param[in] ctx Pointer to this instance's context + * + * \retval #VPX_CODEC_OK + * The codec algorithm initialized. + * \retval #VPX_CODEC_MEM_ERROR + * Memory allocation failed. + */ + vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx); + + + /*!\brief Get the capabilities of an algorithm. + * + * Retrieves the capabilities bitfield from the algorithm's interface. + * + * \param[in] iface Pointer to the algorithm interface + * + */ + vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface); + + + /*!\brief Control algorithm + * + * This function is used to exchange algorithm specific data with the codec + * instance. This can be used to implement features specific to a particular + * algorithm. + * + * This wrapper function dispatches the request to the helper function + * associated with the given ctrl_id. It tries to call this function + * transparently, but will return #VPX_CODEC_ERROR if the request could not + * be dispatched. + * + * Note that this function should not be used directly. Call the + * #vpx_codec_control wrapper macro instead. + * + * \param[in] ctx Pointer to this instance's context + * \param[in] ctrl_id Algorithm specific control identifier + * + * \retval #VPX_CODEC_OK + * The control request was processed. + * \retval #VPX_CODEC_ERROR + * The control request was not processed. + * \retval #VPX_CODEC_INVALID_PARAM + * The data was not valid. + */ + vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx, + int ctrl_id, + ...); +#if defined(VPX_DISABLE_CTRL_TYPECHECKS) && VPX_DISABLE_CTRL_TYPECHECKS +# define vpx_codec_control(ctx,id,data) vpx_codec_control_(ctx,id,data) +# define VPX_CTRL_USE_TYPE(id, typ) +# define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) +# define VPX_CTRL_VOID(id, typ) + +#else + /*!\brief vpx_codec_control wrapper macro + * + * This macro allows for type safe conversions across the variadic parameter + * to vpx_codec_control_(). + * + * \internal + * It works by dispatching the call to the control function through a wrapper + * function named with the id parameter. + */ +# define vpx_codec_control(ctx,id,data) vpx_codec_control_##id(ctx,id,data)\ + /**<\hideinitializer*/ + + + /*!\brief vpx_codec_control type definition macro + * + * This macro allows for type safe conversions across the variadic parameter + * to vpx_codec_control_(). It defines the type of the argument for a given + * control identifier. + * + * \internal + * It defines a static function with + * the correctly typed arguments as a wrapper to the type-unsafe internal + * function. + */ +# define VPX_CTRL_USE_TYPE(id, typ) \ + static vpx_codec_err_t \ + vpx_codec_control_##id(vpx_codec_ctx_t*, int, typ) UNUSED;\ + \ + static vpx_codec_err_t \ + vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id, typ data) {\ + return vpx_codec_control_(ctx, ctrl_id, data);\ + } /**<\hideinitializer*/ + + + /*!\brief vpx_codec_control deprecated type definition macro + * + * Like #VPX_CTRL_USE_TYPE, but indicates that the specified control is + * deprecated and should not be used. Consult the documentation for your + * codec for more information. + * + * \internal + * It defines a static function with the correctly typed arguments as a + * wrapper to the type-unsafe internal function. + */ +# define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) \ + DECLSPEC_DEPRECATED static vpx_codec_err_t \ + vpx_codec_control_##id(vpx_codec_ctx_t*, int, typ) DEPRECATED UNUSED;\ + \ + DECLSPEC_DEPRECATED static vpx_codec_err_t \ + vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id, typ data) {\ + return vpx_codec_control_(ctx, ctrl_id, data);\ + } /**<\hideinitializer*/ + + + /*!\brief vpx_codec_control void type definition macro + * + * This macro allows for type safe conversions across the variadic parameter + * to vpx_codec_control_(). It indicates that a given control identifier takes + * no argument. + * + * \internal + * It defines a static function without a data argument as a wrapper to the + * type-unsafe internal function. + */ +# define VPX_CTRL_VOID(id) \ + static vpx_codec_err_t \ + vpx_codec_control_##id(vpx_codec_ctx_t*, int) UNUSED;\ + \ + static vpx_codec_err_t \ + vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id) {\ + return vpx_codec_control_(ctx, ctrl_id);\ + } /**<\hideinitializer*/ + + +#endif + + /*!@} - end defgroup codec*/ +#ifdef __cplusplus +} +#endif +#endif // VPX_VPX_CODEC_H_ + diff --git a/thirdparty/libvpx/vpx/vpx_decoder.h b/thirdparty/libvpx/vpx/vpx_decoder.h new file mode 100644 index 00000000000..62fd9197564 --- /dev/null +++ b/thirdparty/libvpx/vpx/vpx_decoder.h @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VPX_VPX_DECODER_H_ +#define VPX_VPX_DECODER_H_ + +/*!\defgroup decoder Decoder Algorithm Interface + * \ingroup codec + * This abstraction allows applications using this decoder to easily support + * multiple video formats with minimal code duplication. This section describes + * the interface common to all decoders. + * @{ + */ + +/*!\file + * \brief Describes the decoder algorithm interface to applications. + * + * This file describes the interface between an application and a + * video decoder algorithm. + * + */ +#ifdef __cplusplus +extern "C" { +#endif + +#include "./vpx_codec.h" +#include "./vpx_frame_buffer.h" + + /*!\brief Current ABI version number + * + * \internal + * If this file is altered in any way that changes the ABI, this value + * must be bumped. Examples include, but are not limited to, changing + * types, removing or reassigning enums, adding/removing/rearranging + * fields to structures + */ +#define VPX_DECODER_ABI_VERSION (3 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ + + /*! \brief Decoder capabilities bitfield + * + * Each decoder advertises the capabilities it supports as part of its + * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces + * or functionality, and are not required to be supported by a decoder. + * + * The available flags are specified by VPX_CODEC_CAP_* defines. + */ +#define VPX_CODEC_CAP_PUT_SLICE 0x10000 /**< Will issue put_slice callbacks */ +#define VPX_CODEC_CAP_PUT_FRAME 0x20000 /**< Will issue put_frame callbacks */ +#define VPX_CODEC_CAP_POSTPROC 0x40000 /**< Can postprocess decoded frame */ +#define VPX_CODEC_CAP_ERROR_CONCEALMENT 0x80000 /**< Can conceal errors due to + packet loss */ +#define VPX_CODEC_CAP_INPUT_FRAGMENTS 0x100000 /**< Can receive encoded frames + one fragment at a time */ + + /*! \brief Initialization-time Feature Enabling + * + * Certain codec features must be known at initialization time, to allow for + * proper memory allocation. + * + * The available flags are specified by VPX_CODEC_USE_* defines. + */ +#define VPX_CODEC_CAP_FRAME_THREADING 0x200000 /**< Can support frame-based + multi-threading */ +#define VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000 /**< Can support external + frame buffers */ + +#define VPX_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */ +#define VPX_CODEC_USE_ERROR_CONCEALMENT 0x20000 /**< Conceal errors in decoded + frames */ +#define VPX_CODEC_USE_INPUT_FRAGMENTS 0x40000 /**< The input frame should be + passed to the decoder one + fragment at a time */ +#define VPX_CODEC_USE_FRAME_THREADING 0x80000 /**< Enable frame-based + multi-threading */ + + /*!\brief Stream properties + * + * This structure is used to query or set properties of the decoded + * stream. Algorithms may extend this structure with data specific + * to their bitstream by setting the sz member appropriately. + */ + typedef struct vpx_codec_stream_info { + unsigned int sz; /**< Size of this structure */ + unsigned int w; /**< Width (or 0 for unknown/default) */ + unsigned int h; /**< Height (or 0 for unknown/default) */ + unsigned int is_kf; /**< Current frame is a keyframe */ + } vpx_codec_stream_info_t; + + /* REQUIRED FUNCTIONS + * + * The following functions are required to be implemented for all decoders. + * They represent the base case functionality expected of all decoders. + */ + + + /*!\brief Initialization Configurations + * + * This structure is used to pass init time configuration options to the + * decoder. + */ + typedef struct vpx_codec_dec_cfg { + unsigned int threads; /**< Maximum number of threads to use, default 1 */ + unsigned int w; /**< Width */ + unsigned int h; /**< Height */ + } vpx_codec_dec_cfg_t; /**< alias for struct vpx_codec_dec_cfg */ + + + /*!\brief Initialize a decoder instance + * + * Initializes a decoder context using the given interface. Applications + * should call the vpx_codec_dec_init convenience macro instead of this + * function directly, to ensure that the ABI version number parameter + * is properly initialized. + * + * If the library was configured with --disable-multithread, this call + * is not thread safe and should be guarded with a lock if being used + * in a multithreaded context. + * + * \param[in] ctx Pointer to this instance's context. + * \param[in] iface Pointer to the algorithm interface to use. + * \param[in] cfg Configuration to use, if known. May be NULL. + * \param[in] flags Bitfield of VPX_CODEC_USE_* flags + * \param[in] ver ABI version number. Must be set to + * VPX_DECODER_ABI_VERSION + * \retval #VPX_CODEC_OK + * The decoder algorithm initialized. + * \retval #VPX_CODEC_MEM_ERROR + * Memory allocation failed. + */ + vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx, + vpx_codec_iface_t *iface, + const vpx_codec_dec_cfg_t *cfg, + vpx_codec_flags_t flags, + int ver); + + /*!\brief Convenience macro for vpx_codec_dec_init_ver() + * + * Ensures the ABI version parameter is properly set. + */ +#define vpx_codec_dec_init(ctx, iface, cfg, flags) \ + vpx_codec_dec_init_ver(ctx, iface, cfg, flags, VPX_DECODER_ABI_VERSION) + + + /*!\brief Parse stream info from a buffer + * + * Performs high level parsing of the bitstream. Construction of a decoder + * context is not necessary. Can be used to determine if the bitstream is + * of the proper format, and to extract information from the stream. + * + * \param[in] iface Pointer to the algorithm interface + * \param[in] data Pointer to a block of data to parse + * \param[in] data_sz Size of the data buffer + * \param[in,out] si Pointer to stream info to update. The size member + * \ref MUST be properly initialized, but \ref MAY be + * clobbered by the algorithm. This parameter \ref MAY + * be NULL. + * + * \retval #VPX_CODEC_OK + * Bitstream is parsable and stream information updated + */ + vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t *iface, + const uint8_t *data, + unsigned int data_sz, + vpx_codec_stream_info_t *si); + + + /*!\brief Return information about the current stream. + * + * Returns information about the stream that has been parsed during decoding. + * + * \param[in] ctx Pointer to this instance's context + * \param[in,out] si Pointer to stream info to update. The size member + * \ref MUST be properly initialized, but \ref MAY be + * clobbered by the algorithm. This parameter \ref MAY + * be NULL. + * + * \retval #VPX_CODEC_OK + * Bitstream is parsable and stream information updated + */ + vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx, + vpx_codec_stream_info_t *si); + + + /*!\brief Decode data + * + * Processes a buffer of coded data. If the processing results in a new + * decoded frame becoming available, PUT_SLICE and PUT_FRAME events may be + * generated, as appropriate. Encoded data \ref MUST be passed in DTS (decode + * time stamp) order. Frames produced will always be in PTS (presentation + * time stamp) order. + * If the decoder is configured with VPX_CODEC_USE_INPUT_FRAGMENTS enabled, + * data and data_sz can contain a fragment of the encoded frame. Fragment + * \#n must contain at least partition \#n, but can also contain subsequent + * partitions (\#n+1 - \#n+i), and if so, fragments \#n+1, .., \#n+i must + * be empty. When no more data is available, this function should be called + * with NULL as data and 0 as data_sz. The memory passed to this function + * must be available until the frame has been decoded. + * + * \param[in] ctx Pointer to this instance's context + * \param[in] data Pointer to this block of new coded data. If + * NULL, a VPX_CODEC_CB_PUT_FRAME event is posted + * for the previously decoded frame. + * \param[in] data_sz Size of the coded data, in bytes. + * \param[in] user_priv Application specific data to associate with + * this frame. + * \param[in] deadline Soft deadline the decoder should attempt to meet, + * in us. Set to zero for unlimited. + * + * \return Returns #VPX_CODEC_OK if the coded data was processed completely + * and future pictures can be decoded without error. Otherwise, + * see the descriptions of the other error codes in ::vpx_codec_err_t + * for recoverability capabilities. + */ + vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, + const uint8_t *data, + unsigned int data_sz, + void *user_priv, + long deadline); + + + /*!\brief Decoded frames iterator + * + * Iterates over a list of the frames available for display. The iterator + * storage should be initialized to NULL to start the iteration. Iteration is + * complete when this function returns NULL. + * + * The list of available frames becomes valid upon completion of the + * vpx_codec_decode call, and remains valid until the next call to vpx_codec_decode. + * + * \param[in] ctx Pointer to this instance's context + * \param[in,out] iter Iterator storage, initialized to NULL + * + * \return Returns a pointer to an image, if one is ready for display. Frames + * produced will always be in PTS (presentation time stamp) order. + */ + vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx, + vpx_codec_iter_t *iter); + + + /*!\defgroup cap_put_frame Frame-Based Decoding Functions + * + * The following functions are required to be implemented for all decoders + * that advertise the VPX_CODEC_CAP_PUT_FRAME capability. Calling these functions + * for codecs that don't advertise this capability will result in an error + * code being returned, usually VPX_CODEC_ERROR + * @{ + */ + + /*!\brief put frame callback prototype + * + * This callback is invoked by the decoder to notify the application of + * the availability of decoded image data. + */ + typedef void (*vpx_codec_put_frame_cb_fn_t)(void *user_priv, + const vpx_image_t *img); + + + /*!\brief Register for notification of frame completion. + * + * Registers a given function to be called when a decoded frame is + * available. + * + * \param[in] ctx Pointer to this instance's context + * \param[in] cb Pointer to the callback function + * \param[in] user_priv User's private data + * + * \retval #VPX_CODEC_OK + * Callback successfully registered. + * \retval #VPX_CODEC_ERROR + * Decoder context not initialized, or algorithm not capable of + * posting slice completion. + */ + vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx, + vpx_codec_put_frame_cb_fn_t cb, + void *user_priv); + + + /*!@} - end defgroup cap_put_frame */ + + /*!\defgroup cap_put_slice Slice-Based Decoding Functions + * + * The following functions are required to be implemented for all decoders + * that advertise the VPX_CODEC_CAP_PUT_SLICE capability. Calling these functions + * for codecs that don't advertise this capability will result in an error + * code being returned, usually VPX_CODEC_ERROR + * @{ + */ + + /*!\brief put slice callback prototype + * + * This callback is invoked by the decoder to notify the application of + * the availability of partially decoded image data. The + */ + typedef void (*vpx_codec_put_slice_cb_fn_t)(void *user_priv, + const vpx_image_t *img, + const vpx_image_rect_t *valid, + const vpx_image_rect_t *update); + + + /*!\brief Register for notification of slice completion. + * + * Registers a given function to be called when a decoded slice is + * available. + * + * \param[in] ctx Pointer to this instance's context + * \param[in] cb Pointer to the callback function + * \param[in] user_priv User's private data + * + * \retval #VPX_CODEC_OK + * Callback successfully registered. + * \retval #VPX_CODEC_ERROR + * Decoder context not initialized, or algorithm not capable of + * posting slice completion. + */ + vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx, + vpx_codec_put_slice_cb_fn_t cb, + void *user_priv); + + + /*!@} - end defgroup cap_put_slice*/ + + /*!\defgroup cap_external_frame_buffer External Frame Buffer Functions + * + * The following section is required to be implemented for all decoders + * that advertise the VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER capability. + * Calling this function for codecs that don't advertise this capability + * will result in an error code being returned, usually VPX_CODEC_ERROR. + * + * \note + * Currently this only works with VP9. + * @{ + */ + + /*!\brief Pass in external frame buffers for the decoder to use. + * + * Registers functions to be called when libvpx needs a frame buffer + * to decode the current frame and a function to be called when libvpx does + * not internally reference the frame buffer. This set function must + * be called before the first call to decode or libvpx will assume the + * default behavior of allocating frame buffers internally. + * + * \param[in] ctx Pointer to this instance's context + * \param[in] cb_get Pointer to the get callback function + * \param[in] cb_release Pointer to the release callback function + * \param[in] cb_priv Callback's private data + * + * \retval #VPX_CODEC_OK + * External frame buffers will be used by libvpx. + * \retval #VPX_CODEC_INVALID_PARAM + * One or more of the callbacks were NULL. + * \retval #VPX_CODEC_ERROR + * Decoder context not initialized, or algorithm not capable of + * using external frame buffers. + * + * \note + * When decoding VP9, the application may be required to pass in at least + * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame + * buffers. + */ + vpx_codec_err_t vpx_codec_set_frame_buffer_functions( + vpx_codec_ctx_t *ctx, + vpx_get_frame_buffer_cb_fn_t cb_get, + vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv); + + /*!@} - end defgroup cap_external_frame_buffer */ + + /*!@} - end defgroup decoder*/ +#ifdef __cplusplus +} +#endif +#endif // VPX_VPX_DECODER_H_ + diff --git a/thirdparty/libvpx/vpx/vpx_encoder.h b/thirdparty/libvpx/vpx/vpx_encoder.h new file mode 100644 index 00000000000..955e8735193 --- /dev/null +++ b/thirdparty/libvpx/vpx/vpx_encoder.h @@ -0,0 +1,1043 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VPX_VPX_ENCODER_H_ +#define VPX_VPX_ENCODER_H_ + +/*!\defgroup encoder Encoder Algorithm Interface + * \ingroup codec + * This abstraction allows applications using this encoder to easily support + * multiple video formats with minimal code duplication. This section describes + * the interface common to all encoders. + * @{ + */ + +/*!\file + * \brief Describes the encoder algorithm interface to applications. + * + * This file describes the interface between an application and a + * video encoder algorithm. + * + */ +#ifdef __cplusplus +extern "C" { +#endif + +#include "./vpx_codec.h" + + /*! Temporal Scalability: Maximum length of the sequence defining frame + * layer membership + */ +#define VPX_TS_MAX_PERIODICITY 16 + + /*! Temporal Scalability: Maximum number of coding layers */ +#define VPX_TS_MAX_LAYERS 5 + + /*!\deprecated Use #VPX_TS_MAX_PERIODICITY instead. */ +#define MAX_PERIODICITY VPX_TS_MAX_PERIODICITY + +/*! Temporal+Spatial Scalability: Maximum number of coding layers */ +#define VPX_MAX_LAYERS 12 // 3 temporal + 4 spatial layers are allowed. + +/*!\deprecated Use #VPX_MAX_LAYERS instead. */ +#define MAX_LAYERS VPX_MAX_LAYERS // 3 temporal + 4 spatial layers allowed. + +/*! Spatial Scalability: Maximum number of coding layers */ +#define VPX_SS_MAX_LAYERS 5 + +/*! Spatial Scalability: Default number of coding layers */ +#define VPX_SS_DEFAULT_LAYERS 1 + + /*!\brief Current ABI version number + * + * \internal + * If this file is altered in any way that changes the ABI, this value + * must be bumped. Examples include, but are not limited to, changing + * types, removing or reassigning enums, adding/removing/rearranging + * fields to structures + */ +#define VPX_ENCODER_ABI_VERSION (5 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ + + + /*! \brief Encoder capabilities bitfield + * + * Each encoder advertises the capabilities it supports as part of its + * ::vpx_codec_iface_t interface structure. Capabilities are extra + * interfaces or functionality, and are not required to be supported + * by an encoder. + * + * The available flags are specified by VPX_CODEC_CAP_* defines. + */ +#define VPX_CODEC_CAP_PSNR 0x10000 /**< Can issue PSNR packets */ + + /*! Can output one partition at a time. Each partition is returned in its + * own VPX_CODEC_CX_FRAME_PKT, with the FRAME_IS_FRAGMENT flag set for + * every partition but the last. In this mode all frames are always + * returned partition by partition. + */ +#define VPX_CODEC_CAP_OUTPUT_PARTITION 0x20000 + +/*! Can support input images at greater than 8 bitdepth. + */ +#define VPX_CODEC_CAP_HIGHBITDEPTH 0x40000 + + /*! \brief Initialization-time Feature Enabling + * + * Certain codec features must be known at initialization time, to allow + * for proper memory allocation. + * + * The available flags are specified by VPX_CODEC_USE_* defines. + */ +#define VPX_CODEC_USE_PSNR 0x10000 /**< Calculate PSNR on each frame */ +#define VPX_CODEC_USE_OUTPUT_PARTITION 0x20000 /**< Make the encoder output one + partition at a time. */ +#define VPX_CODEC_USE_HIGHBITDEPTH 0x40000 /**< Use high bitdepth */ + + + /*!\brief Generic fixed size buffer structure + * + * This structure is able to hold a reference to any fixed size buffer. + */ + typedef struct vpx_fixed_buf { + void *buf; /**< Pointer to the data */ + size_t sz; /**< Length of the buffer, in chars */ + } vpx_fixed_buf_t; /**< alias for struct vpx_fixed_buf */ + + + /*!\brief Time Stamp Type + * + * An integer, which when multiplied by the stream's time base, provides + * the absolute time of a sample. + */ + typedef int64_t vpx_codec_pts_t; + + + /*!\brief Compressed Frame Flags + * + * This type represents a bitfield containing information about a compressed + * frame that may be useful to an application. The most significant 16 bits + * can be used by an algorithm to provide additional detail, for example to + * support frame types that are codec specific (MPEG-1 D-frames for example) + */ + typedef uint32_t vpx_codec_frame_flags_t; +#define VPX_FRAME_IS_KEY 0x1 /**< frame is the start of a GOP */ +#define VPX_FRAME_IS_DROPPABLE 0x2 /**< frame can be dropped without affecting + the stream (no future frame depends on + this one) */ +#define VPX_FRAME_IS_INVISIBLE 0x4 /**< frame should be decoded but will not + be shown */ +#define VPX_FRAME_IS_FRAGMENT 0x8 /**< this is a fragment of the encoded + frame */ + + /*!\brief Error Resilient flags + * + * These flags define which error resilient features to enable in the + * encoder. The flags are specified through the + * vpx_codec_enc_cfg::g_error_resilient variable. + */ + typedef uint32_t vpx_codec_er_flags_t; +#define VPX_ERROR_RESILIENT_DEFAULT 0x1 /**< Improve resiliency against + losses of whole frames */ +#define VPX_ERROR_RESILIENT_PARTITIONS 0x2 /**< The frame partitions are + independently decodable by the + bool decoder, meaning that + partitions can be decoded even + though earlier partitions have + been lost. Note that intra + prediction is still done over + the partition boundary. */ + + /*!\brief Encoder output packet variants + * + * This enumeration lists the different kinds of data packets that can be + * returned by calls to vpx_codec_get_cx_data(). Algorithms \ref MAY + * extend this list to provide additional functionality. + */ + enum vpx_codec_cx_pkt_kind { + VPX_CODEC_CX_FRAME_PKT, /**< Compressed video frame */ + VPX_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */ + VPX_CODEC_FPMB_STATS_PKT, /**< first pass mb statistics for this frame */ + VPX_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */ + // Spatial SVC is still experimental and may be removed before the next ABI + // bump. +#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) + VPX_CODEC_SPATIAL_SVC_LAYER_SIZES, /**< Sizes for each layer in this frame*/ + VPX_CODEC_SPATIAL_SVC_LAYER_PSNR, /**< PSNR for each layer in this frame*/ +#endif + VPX_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions */ + }; + + + /*!\brief Encoder output packet + * + * This structure contains the different kinds of output data the encoder + * may produce while compressing a frame. + */ + typedef struct vpx_codec_cx_pkt { + enum vpx_codec_cx_pkt_kind kind; /**< packet variant */ + union { + struct { + void *buf; /**< compressed data buffer */ + size_t sz; /**< length of compressed data */ + vpx_codec_pts_t pts; /**< time stamp to show frame + (in timebase units) */ + unsigned long duration; /**< duration to show frame + (in timebase units) */ + vpx_codec_frame_flags_t flags; /**< flags for this frame */ + int partition_id; /**< the partition id + defines the decoding order + of the partitions. Only + applicable when "output partition" + mode is enabled. First partition + has id 0.*/ + + } frame; /**< data for compressed frame packet */ + vpx_fixed_buf_t twopass_stats; /**< data for two-pass packet */ + vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */ + struct vpx_psnr_pkt { + unsigned int samples[4]; /**< Number of samples, total/y/u/v */ + uint64_t sse[4]; /**< sum squared error, total/y/u/v */ + double psnr[4]; /**< PSNR, total/y/u/v */ + } psnr; /**< data for PSNR packet */ + vpx_fixed_buf_t raw; /**< data for arbitrary packets */ + // Spatial SVC is still experimental and may be removed before the next + // ABI bump. +#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) + size_t layer_sizes[VPX_SS_MAX_LAYERS]; + struct vpx_psnr_pkt layer_psnr[VPX_SS_MAX_LAYERS]; +#endif + + /* This packet size is fixed to allow codecs to extend this + * interface without having to manage storage for raw packets, + * i.e., if it's smaller than 128 bytes, you can store in the + * packet list directly. + */ + char pad[128 - sizeof(enum vpx_codec_cx_pkt_kind)]; /**< fixed sz */ + } data; /**< packet data */ + } vpx_codec_cx_pkt_t; /**< alias for struct vpx_codec_cx_pkt */ + + + /*!\brief Encoder return output buffer callback + * + * This callback function, when registered, returns with packets when each + * spatial layer is encoded. + */ + // putting the definitions here for now. (agrange: find if there + // is a better place for this) + typedef void (* vpx_codec_enc_output_cx_pkt_cb_fn_t)(vpx_codec_cx_pkt_t *pkt, + void *user_data); + + /*!\brief Callback function pointer / user data pair storage */ + typedef struct vpx_codec_enc_output_cx_cb_pair { + vpx_codec_enc_output_cx_pkt_cb_fn_t output_cx_pkt; /**< Callback function */ + void *user_priv; /**< Pointer to private data */ + } vpx_codec_priv_output_cx_pkt_cb_pair_t; + + /*!\brief Rational Number + * + * This structure holds a fractional value. + */ + typedef struct vpx_rational { + int num; /**< fraction numerator */ + int den; /**< fraction denominator */ + } vpx_rational_t; /**< alias for struct vpx_rational */ + + + /*!\brief Multi-pass Encoding Pass */ + enum vpx_enc_pass { + VPX_RC_ONE_PASS, /**< Single pass mode */ + VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */ + VPX_RC_LAST_PASS /**< Final pass of multi-pass mode */ + }; + + + /*!\brief Rate control mode */ + enum vpx_rc_mode { + VPX_VBR, /**< Variable Bit Rate (VBR) mode */ + VPX_CBR, /**< Constant Bit Rate (CBR) mode */ + VPX_CQ, /**< Constrained Quality (CQ) mode */ + VPX_Q, /**< Constant Quality (Q) mode */ + }; + + + /*!\brief Keyframe placement mode. + * + * This enumeration determines whether keyframes are placed automatically by + * the encoder or whether this behavior is disabled. Older releases of this + * SDK were implemented such that VPX_KF_FIXED meant keyframes were disabled. + * This name is confusing for this behavior, so the new symbols to be used + * are VPX_KF_AUTO and VPX_KF_DISABLED. + */ + enum vpx_kf_mode { + VPX_KF_FIXED, /**< deprecated, implies VPX_KF_DISABLED */ + VPX_KF_AUTO, /**< Encoder determines optimal placement automatically */ + VPX_KF_DISABLED = 0 /**< Encoder does not place keyframes. */ + }; + + + /*!\brief Encoded Frame Flags + * + * This type indicates a bitfield to be passed to vpx_codec_encode(), defining + * per-frame boolean values. By convention, bits common to all codecs will be + * named VPX_EFLAG_*, and bits specific to an algorithm will be named + * /algo/_eflag_*. The lower order 16 bits are reserved for common use. + */ + typedef long vpx_enc_frame_flags_t; +#define VPX_EFLAG_FORCE_KF (1<<0) /**< Force this frame to be a keyframe */ + + + /*!\brief Encoder configuration structure + * + * This structure contains the encoder settings that have common representations + * across all codecs. This doesn't imply that all codecs support all features, + * however. + */ + typedef struct vpx_codec_enc_cfg { + /* + * generic settings (g) + */ + + /*!\brief Algorithm specific "usage" value + * + * Algorithms may define multiple values for usage, which may convey the + * intent of how the application intends to use the stream. If this value + * is non-zero, consult the documentation for the codec to determine its + * meaning. + */ + unsigned int g_usage; + + + /*!\brief Maximum number of threads to use + * + * For multi-threaded implementations, use no more than this number of + * threads. The codec may use fewer threads than allowed. The value + * 0 is equivalent to the value 1. + */ + unsigned int g_threads; + + + /*!\brief Bitstream profile to use + * + * Some codecs support a notion of multiple bitstream profiles. Typically + * this maps to a set of features that are turned on or off. Often the + * profile to use is determined by the features of the intended decoder. + * Consult the documentation for the codec to determine the valid values + * for this parameter, or set to zero for a sane default. + */ + unsigned int g_profile; /**< profile of bitstream to use */ + + + + /*!\brief Width of the frame + * + * This value identifies the presentation resolution of the frame, + * in pixels. Note that the frames passed as input to the encoder must + * have this resolution. Frames will be presented by the decoder in this + * resolution, independent of any spatial resampling the encoder may do. + */ + unsigned int g_w; + + + /*!\brief Height of the frame + * + * This value identifies the presentation resolution of the frame, + * in pixels. Note that the frames passed as input to the encoder must + * have this resolution. Frames will be presented by the decoder in this + * resolution, independent of any spatial resampling the encoder may do. + */ + unsigned int g_h; + + /*!\brief Bit-depth of the codec + * + * This value identifies the bit_depth of the codec, + * Only certain bit-depths are supported as identified in the + * vpx_bit_depth_t enum. + */ + vpx_bit_depth_t g_bit_depth; + + /*!\brief Bit-depth of the input frames + * + * This value identifies the bit_depth of the input frames in bits. + * Note that the frames passed as input to the encoder must have + * this bit-depth. + */ + unsigned int g_input_bit_depth; + + /*!\brief Stream timebase units + * + * Indicates the smallest interval of time, in seconds, used by the stream. + * For fixed frame rate material, or variable frame rate material where + * frames are timed at a multiple of a given clock (ex: video capture), + * the \ref RECOMMENDED method is to set the timebase to the reciprocal + * of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the + * pts to correspond to the frame number, which can be handy. For + * re-encoding video from containers with absolute time timestamps, the + * \ref RECOMMENDED method is to set the timebase to that of the parent + * container or multimedia framework (ex: 1/1000 for ms, as in FLV). + */ + struct vpx_rational g_timebase; + + + /*!\brief Enable error resilient modes. + * + * The error resilient bitfield indicates to the encoder which features + * it should enable to take measures for streaming over lossy or noisy + * links. + */ + vpx_codec_er_flags_t g_error_resilient; + + + /*!\brief Multi-pass Encoding Mode + * + * This value should be set to the current phase for multi-pass encoding. + * For single pass, set to #VPX_RC_ONE_PASS. + */ + enum vpx_enc_pass g_pass; + + + /*!\brief Allow lagged encoding + * + * If set, this value allows the encoder to consume a number of input + * frames before producing output frames. This allows the encoder to + * base decisions for the current frame on future frames. This does + * increase the latency of the encoding pipeline, so it is not appropriate + * in all situations (ex: realtime encoding). + * + * Note that this is a maximum value -- the encoder may produce frames + * sooner than the given limit. Set this value to 0 to disable this + * feature. + */ + unsigned int g_lag_in_frames; + + + /* + * rate control settings (rc) + */ + + /*!\brief Temporal resampling configuration, if supported by the codec. + * + * Temporal resampling allows the codec to "drop" frames as a strategy to + * meet its target data rate. This can cause temporal discontinuities in + * the encoded video, which may appear as stuttering during playback. This + * trade-off is often acceptable, but for many applications is not. It can + * be disabled in these cases. + * + * Note that not all codecs support this feature. All vpx VPx codecs do. + * For other codecs, consult the documentation for that algorithm. + * + * This threshold is described as a percentage of the target data buffer. + * When the data buffer falls below this percentage of fullness, a + * dropped frame is indicated. Set the threshold to zero (0) to disable + * this feature. + */ + unsigned int rc_dropframe_thresh; + + + /*!\brief Enable/disable spatial resampling, if supported by the codec. + * + * Spatial resampling allows the codec to compress a lower resolution + * version of the frame, which is then upscaled by the encoder to the + * correct presentation resolution. This increases visual quality at + * low data rates, at the expense of CPU time on the encoder/decoder. + */ + unsigned int rc_resize_allowed; + + /*!\brief Internal coded frame width. + * + * If spatial resampling is enabled this specifies the width of the + * encoded frame. + */ + unsigned int rc_scaled_width; + + /*!\brief Internal coded frame height. + * + * If spatial resampling is enabled this specifies the height of the + * encoded frame. + */ + unsigned int rc_scaled_height; + + /*!\brief Spatial resampling up watermark. + * + * This threshold is described as a percentage of the target data buffer. + * When the data buffer rises above this percentage of fullness, the + * encoder will step up to a higher resolution version of the frame. + */ + unsigned int rc_resize_up_thresh; + + + /*!\brief Spatial resampling down watermark. + * + * This threshold is described as a percentage of the target data buffer. + * When the data buffer falls below this percentage of fullness, the + * encoder will step down to a lower resolution version of the frame. + */ + unsigned int rc_resize_down_thresh; + + + /*!\brief Rate control algorithm to use. + * + * Indicates whether the end usage of this stream is to be streamed over + * a bandwidth constrained link, indicating that Constant Bit Rate (CBR) + * mode should be used, or whether it will be played back on a high + * bandwidth link, as from a local disk, where higher variations in + * bitrate are acceptable. + */ + enum vpx_rc_mode rc_end_usage; + + + /*!\brief Two-pass stats buffer. + * + * A buffer containing all of the stats packets produced in the first + * pass, concatenated. + */ + vpx_fixed_buf_t rc_twopass_stats_in; + + /*!\brief first pass mb stats buffer. + * + * A buffer containing all of the first pass mb stats packets produced + * in the first pass, concatenated. + */ + vpx_fixed_buf_t rc_firstpass_mb_stats_in; + + /*!\brief Target data rate + * + * Target bandwidth to use for this stream, in kilobits per second. + */ + unsigned int rc_target_bitrate; + + + /* + * quantizer settings + */ + + + /*!\brief Minimum (Best Quality) Quantizer + * + * The quantizer is the most direct control over the quality of the + * encoded image. The range of valid values for the quantizer is codec + * specific. Consult the documentation for the codec to determine the + * values to use. To determine the range programmatically, call + * vpx_codec_enc_config_default() with a usage value of 0. + */ + unsigned int rc_min_quantizer; + + + /*!\brief Maximum (Worst Quality) Quantizer + * + * The quantizer is the most direct control over the quality of the + * encoded image. The range of valid values for the quantizer is codec + * specific. Consult the documentation for the codec to determine the + * values to use. To determine the range programmatically, call + * vpx_codec_enc_config_default() with a usage value of 0. + */ + unsigned int rc_max_quantizer; + + + /* + * bitrate tolerance + */ + + + /*!\brief Rate control adaptation undershoot control + * + * This value, expressed as a percentage of the target bitrate, + * controls the maximum allowed adaptation speed of the codec. + * This factor controls the maximum amount of bits that can + * be subtracted from the target bitrate in order to compensate + * for prior overshoot. + * + * Valid values in the range 0-1000. + */ + unsigned int rc_undershoot_pct; + + + /*!\brief Rate control adaptation overshoot control + * + * This value, expressed as a percentage of the target bitrate, + * controls the maximum allowed adaptation speed of the codec. + * This factor controls the maximum amount of bits that can + * be added to the target bitrate in order to compensate for + * prior undershoot. + * + * Valid values in the range 0-1000. + */ + unsigned int rc_overshoot_pct; + + + /* + * decoder buffer model parameters + */ + + + /*!\brief Decoder Buffer Size + * + * This value indicates the amount of data that may be buffered by the + * decoding application. Note that this value is expressed in units of + * time (milliseconds). For example, a value of 5000 indicates that the + * client will buffer (at least) 5000ms worth of encoded data. Use the + * target bitrate (#rc_target_bitrate) to convert to bits/bytes, if + * necessary. + */ + unsigned int rc_buf_sz; + + + /*!\brief Decoder Buffer Initial Size + * + * This value indicates the amount of data that will be buffered by the + * decoding application prior to beginning playback. This value is + * expressed in units of time (milliseconds). Use the target bitrate + * (#rc_target_bitrate) to convert to bits/bytes, if necessary. + */ + unsigned int rc_buf_initial_sz; + + + /*!\brief Decoder Buffer Optimal Size + * + * This value indicates the amount of data that the encoder should try + * to maintain in the decoder's buffer. This value is expressed in units + * of time (milliseconds). Use the target bitrate (#rc_target_bitrate) + * to convert to bits/bytes, if necessary. + */ + unsigned int rc_buf_optimal_sz; + + + /* + * 2 pass rate control parameters + */ + + + /*!\brief Two-pass mode CBR/VBR bias + * + * Bias, expressed on a scale of 0 to 100, for determining target size + * for the current frame. The value 0 indicates the optimal CBR mode + * value should be used. The value 100 indicates the optimal VBR mode + * value should be used. Values in between indicate which way the + * encoder should "lean." + */ + unsigned int rc_2pass_vbr_bias_pct; /**< RC mode bias between CBR and VBR(0-100: 0->CBR, 100->VBR) */ + + + /*!\brief Two-pass mode per-GOP minimum bitrate + * + * This value, expressed as a percentage of the target bitrate, indicates + * the minimum bitrate to be used for a single GOP (aka "section") + */ + unsigned int rc_2pass_vbr_minsection_pct; + + + /*!\brief Two-pass mode per-GOP maximum bitrate + * + * This value, expressed as a percentage of the target bitrate, indicates + * the maximum bitrate to be used for a single GOP (aka "section") + */ + unsigned int rc_2pass_vbr_maxsection_pct; + + + /* + * keyframing settings (kf) + */ + + /*!\brief Keyframe placement mode + * + * This value indicates whether the encoder should place keyframes at a + * fixed interval, or determine the optimal placement automatically + * (as governed by the #kf_min_dist and #kf_max_dist parameters) + */ + enum vpx_kf_mode kf_mode; + + + /*!\brief Keyframe minimum interval + * + * This value, expressed as a number of frames, prevents the encoder from + * placing a keyframe nearer than kf_min_dist to the previous keyframe. At + * least kf_min_dist frames non-keyframes will be coded before the next + * keyframe. Set kf_min_dist equal to kf_max_dist for a fixed interval. + */ + unsigned int kf_min_dist; + + + /*!\brief Keyframe maximum interval + * + * This value, expressed as a number of frames, forces the encoder to code + * a keyframe if one has not been coded in the last kf_max_dist frames. + * A value of 0 implies all frames will be keyframes. Set kf_min_dist + * equal to kf_max_dist for a fixed interval. + */ + unsigned int kf_max_dist; + + /* + * Spatial scalability settings (ss) + */ + + /*!\brief Number of spatial coding layers. + * + * This value specifies the number of spatial coding layers to be used. + */ + unsigned int ss_number_layers; + + /*!\brief Enable auto alt reference flags for each spatial layer. + * + * These values specify if auto alt reference frame is enabled for each + * spatial layer. + */ + int ss_enable_auto_alt_ref[VPX_SS_MAX_LAYERS]; + + /*!\brief Target bitrate for each spatial layer. + * + * These values specify the target coding bitrate to be used for each + * spatial layer. + */ + unsigned int ss_target_bitrate[VPX_SS_MAX_LAYERS]; + + /*!\brief Number of temporal coding layers. + * + * This value specifies the number of temporal layers to be used. + */ + unsigned int ts_number_layers; + + /*!\brief Target bitrate for each temporal layer. + * + * These values specify the target coding bitrate to be used for each + * temporal layer. + */ + unsigned int ts_target_bitrate[VPX_TS_MAX_LAYERS]; + + /*!\brief Frame rate decimation factor for each temporal layer. + * + * These values specify the frame rate decimation factors to apply + * to each temporal layer. + */ + unsigned int ts_rate_decimator[VPX_TS_MAX_LAYERS]; + + /*!\brief Length of the sequence defining frame temporal layer membership. + * + * This value specifies the length of the sequence that defines the + * membership of frames to temporal layers. For example, if the + * ts_periodicity = 8, then the frames are assigned to coding layers with a + * repeated sequence of length 8. + */ + unsigned int ts_periodicity; + + /*!\brief Template defining the membership of frames to temporal layers. + * + * This array defines the membership of frames to temporal coding layers. + * For a 2-layer encoding that assigns even numbered frames to one temporal + * layer (0) and odd numbered frames to a second temporal layer (1) with + * ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1). + */ + unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY]; + + /*!\brief Target bitrate for each spatial/temporal layer. + * + * These values specify the target coding bitrate to be used for each + * spatial/temporal layer. + * + */ + unsigned int layer_target_bitrate[VPX_MAX_LAYERS]; + + /*!\brief Temporal layering mode indicating which temporal layering scheme to use. + * + * The value (refer to VP9E_TEMPORAL_LAYERING_MODE) specifies the + * temporal layering mode to use. + * + */ + int temporal_layering_mode; + } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */ + + /*!\brief vp9 svc extra configure parameters + * + * This defines max/min quantizers and scale factors for each layer + * + */ + typedef struct vpx_svc_parameters { + int max_quantizers[VPX_MAX_LAYERS]; /**< Max Q for each layer */ + int min_quantizers[VPX_MAX_LAYERS]; /**< Min Q for each layer */ + int scaling_factor_num[VPX_MAX_LAYERS]; /**< Scaling factor-numerator */ + int scaling_factor_den[VPX_MAX_LAYERS]; /**< Scaling factor-denominator */ + int temporal_layering_mode; /**< Temporal layering mode */ + } vpx_svc_extra_cfg_t; + + + /*!\brief Initialize an encoder instance + * + * Initializes a encoder context using the given interface. Applications + * should call the vpx_codec_enc_init convenience macro instead of this + * function directly, to ensure that the ABI version number parameter + * is properly initialized. + * + * If the library was configured with --disable-multithread, this call + * is not thread safe and should be guarded with a lock if being used + * in a multithreaded context. + * + * \param[in] ctx Pointer to this instance's context. + * \param[in] iface Pointer to the algorithm interface to use. + * \param[in] cfg Configuration to use, if known. May be NULL. + * \param[in] flags Bitfield of VPX_CODEC_USE_* flags + * \param[in] ver ABI version number. Must be set to + * VPX_ENCODER_ABI_VERSION + * \retval #VPX_CODEC_OK + * The decoder algorithm initialized. + * \retval #VPX_CODEC_MEM_ERROR + * Memory allocation failed. + */ + vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t *ctx, + vpx_codec_iface_t *iface, + const vpx_codec_enc_cfg_t *cfg, + vpx_codec_flags_t flags, + int ver); + + + /*!\brief Convenience macro for vpx_codec_enc_init_ver() + * + * Ensures the ABI version parameter is properly set. + */ +#define vpx_codec_enc_init(ctx, iface, cfg, flags) \ + vpx_codec_enc_init_ver(ctx, iface, cfg, flags, VPX_ENCODER_ABI_VERSION) + + + /*!\brief Initialize multi-encoder instance + * + * Initializes multi-encoder context using the given interface. + * Applications should call the vpx_codec_enc_init_multi convenience macro + * instead of this function directly, to ensure that the ABI version number + * parameter is properly initialized. + * + * \param[in] ctx Pointer to this instance's context. + * \param[in] iface Pointer to the algorithm interface to use. + * \param[in] cfg Configuration to use, if known. May be NULL. + * \param[in] num_enc Total number of encoders. + * \param[in] flags Bitfield of VPX_CODEC_USE_* flags + * \param[in] dsf Pointer to down-sampling factors. + * \param[in] ver ABI version number. Must be set to + * VPX_ENCODER_ABI_VERSION + * \retval #VPX_CODEC_OK + * The decoder algorithm initialized. + * \retval #VPX_CODEC_MEM_ERROR + * Memory allocation failed. + */ + vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx, + vpx_codec_iface_t *iface, + vpx_codec_enc_cfg_t *cfg, + int num_enc, + vpx_codec_flags_t flags, + vpx_rational_t *dsf, + int ver); + + + /*!\brief Convenience macro for vpx_codec_enc_init_multi_ver() + * + * Ensures the ABI version parameter is properly set. + */ +#define vpx_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf) \ + vpx_codec_enc_init_multi_ver(ctx, iface, cfg, num_enc, flags, dsf, \ + VPX_ENCODER_ABI_VERSION) + + + /*!\brief Get a default configuration + * + * Initializes a encoder configuration structure with default values. Supports + * the notion of "usages" so that an algorithm may offer different default + * settings depending on the user's intended goal. This function \ref SHOULD + * be called by all applications to initialize the configuration structure + * before specializing the configuration with application specific values. + * + * \param[in] iface Pointer to the algorithm interface to use. + * \param[out] cfg Configuration buffer to populate. + * \param[in] reserved Must set to 0 for VP8 and VP9. + * + * \retval #VPX_CODEC_OK + * The configuration was populated. + * \retval #VPX_CODEC_INCAPABLE + * Interface is not an encoder interface. + * \retval #VPX_CODEC_INVALID_PARAM + * A parameter was NULL, or the usage value was not recognized. + */ + vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, + vpx_codec_enc_cfg_t *cfg, + unsigned int reserved); + + + /*!\brief Set or change configuration + * + * Reconfigures an encoder instance according to the given configuration. + * + * \param[in] ctx Pointer to this instance's context + * \param[in] cfg Configuration buffer to use + * + * \retval #VPX_CODEC_OK + * The configuration was populated. + * \retval #VPX_CODEC_INCAPABLE + * Interface is not an encoder interface. + * \retval #VPX_CODEC_INVALID_PARAM + * A parameter was NULL, or the usage value was not recognized. + */ + vpx_codec_err_t vpx_codec_enc_config_set(vpx_codec_ctx_t *ctx, + const vpx_codec_enc_cfg_t *cfg); + + + /*!\brief Get global stream headers + * + * Retrieves a stream level global header packet, if supported by the codec. + * + * \param[in] ctx Pointer to this instance's context + * + * \retval NULL + * Encoder does not support global header + * \retval Non-NULL + * Pointer to buffer containing global header packet + */ + vpx_fixed_buf_t *vpx_codec_get_global_headers(vpx_codec_ctx_t *ctx); + + +#define VPX_DL_REALTIME (1) /**< deadline parameter analogous to + * VPx REALTIME mode. */ +#define VPX_DL_GOOD_QUALITY (1000000) /**< deadline parameter analogous to + * VPx GOOD QUALITY mode. */ +#define VPX_DL_BEST_QUALITY (0) /**< deadline parameter analogous to + * VPx BEST QUALITY mode. */ + /*!\brief Encode a frame + * + * Encodes a video frame at the given "presentation time." The presentation + * time stamp (PTS) \ref MUST be strictly increasing. + * + * The encoder supports the notion of a soft real-time deadline. Given a + * non-zero value to the deadline parameter, the encoder will make a "best + * effort" guarantee to return before the given time slice expires. It is + * implicit that limiting the available time to encode will degrade the + * output quality. The encoder can be given an unlimited time to produce the + * best possible frame by specifying a deadline of '0'. This deadline + * supercedes the VPx notion of "best quality, good quality, realtime". + * Applications that wish to map these former settings to the new deadline + * based system can use the symbols #VPX_DL_REALTIME, #VPX_DL_GOOD_QUALITY, + * and #VPX_DL_BEST_QUALITY. + * + * When the last frame has been passed to the encoder, this function should + * continue to be called, with the img parameter set to NULL. This will + * signal the end-of-stream condition to the encoder and allow it to encode + * any held buffers. Encoding is complete when vpx_codec_encode() is called + * and vpx_codec_get_cx_data() returns no data. + * + * \param[in] ctx Pointer to this instance's context + * \param[in] img Image data to encode, NULL to flush. + * \param[in] pts Presentation time stamp, in timebase units. + * \param[in] duration Duration to show frame, in timebase units. + * \param[in] flags Flags to use for encoding this frame. + * \param[in] deadline Time to spend encoding, in microseconds. (0=infinite) + * + * \retval #VPX_CODEC_OK + * The configuration was populated. + * \retval #VPX_CODEC_INCAPABLE + * Interface is not an encoder interface. + * \retval #VPX_CODEC_INVALID_PARAM + * A parameter was NULL, the image format is unsupported, etc. + */ + vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx, + const vpx_image_t *img, + vpx_codec_pts_t pts, + unsigned long duration, + vpx_enc_frame_flags_t flags, + unsigned long deadline); + + /*!\brief Set compressed data output buffer + * + * Sets the buffer that the codec should output the compressed data + * into. This call effectively sets the buffer pointer returned in the + * next VPX_CODEC_CX_FRAME_PKT packet. Subsequent packets will be + * appended into this buffer. The buffer is preserved across frames, + * so applications must periodically call this function after flushing + * the accumulated compressed data to disk or to the network to reset + * the pointer to the buffer's head. + * + * `pad_before` bytes will be skipped before writing the compressed + * data, and `pad_after` bytes will be appended to the packet. The size + * of the packet will be the sum of the size of the actual compressed + * data, pad_before, and pad_after. The padding bytes will be preserved + * (not overwritten). + * + * Note that calling this function does not guarantee that the returned + * compressed data will be placed into the specified buffer. In the + * event that the encoded data will not fit into the buffer provided, + * the returned packet \ref MAY point to an internal buffer, as it would + * if this call were never used. In this event, the output packet will + * NOT have any padding, and the application must free space and copy it + * to the proper place. This is of particular note in configurations + * that may output multiple packets for a single encoded frame (e.g., lagged + * encoding) or if the application does not reset the buffer periodically. + * + * Applications may restore the default behavior of the codec providing + * the compressed data buffer by calling this function with a NULL + * buffer. + * + * Applications \ref MUSTNOT call this function during iteration of + * vpx_codec_get_cx_data(). + * + * \param[in] ctx Pointer to this instance's context + * \param[in] buf Buffer to store compressed data into + * \param[in] pad_before Bytes to skip before writing compressed data + * \param[in] pad_after Bytes to skip after writing compressed data + * + * \retval #VPX_CODEC_OK + * The buffer was set successfully. + * \retval #VPX_CODEC_INVALID_PARAM + * A parameter was NULL, the image format is unsupported, etc. + */ + vpx_codec_err_t vpx_codec_set_cx_data_buf(vpx_codec_ctx_t *ctx, + const vpx_fixed_buf_t *buf, + unsigned int pad_before, + unsigned int pad_after); + + + /*!\brief Encoded data iterator + * + * Iterates over a list of data packets to be passed from the encoder to the + * application. The different kinds of packets available are enumerated in + * #vpx_codec_cx_pkt_kind. + * + * #VPX_CODEC_CX_FRAME_PKT packets should be passed to the application's + * muxer. Multiple compressed frames may be in the list. + * #VPX_CODEC_STATS_PKT packets should be appended to a global buffer. + * + * The application \ref MUST silently ignore any packet kinds that it does + * not recognize or support. + * + * The data buffers returned from this function are only guaranteed to be + * valid until the application makes another call to any vpx_codec_* function. + * + * \param[in] ctx Pointer to this instance's context + * \param[in,out] iter Iterator storage, initialized to NULL + * + * \return Returns a pointer to an output data packet (compressed frame data, + * two-pass statistics, etc.) or NULL to signal end-of-list. + * + */ + const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, + vpx_codec_iter_t *iter); + + + /*!\brief Get Preview Frame + * + * Returns an image that can be used as a preview. Shows the image as it would + * exist at the decompressor. The application \ref MUST NOT write into this + * image buffer. + * + * \param[in] ctx Pointer to this instance's context + * + * \return Returns a pointer to a preview image, or NULL if no image is + * available. + * + */ + const vpx_image_t *vpx_codec_get_preview_frame(vpx_codec_ctx_t *ctx); + + + /*!@} - end defgroup encoder*/ +#ifdef __cplusplus +} +#endif +#endif // VPX_VPX_ENCODER_H_ + diff --git a/thirdparty/libvpx/vpx/vpx_frame_buffer.h b/thirdparty/libvpx/vpx/vpx_frame_buffer.h new file mode 100644 index 00000000000..9036459af0a --- /dev/null +++ b/thirdparty/libvpx/vpx/vpx_frame_buffer.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_FRAME_BUFFER_H_ +#define VPX_VPX_FRAME_BUFFER_H_ + +/*!\file + * \brief Describes the decoder external frame buffer interface. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./vpx_integer.h" + +/*!\brief The maximum number of work buffers used by libvpx. + * Support maximum 4 threads to decode video in parallel. + * Each thread will use one work buffer. + * TODO(hkuang): Add support to set number of worker threads dynamically. + */ +#define VPX_MAXIMUM_WORK_BUFFERS 8 + +/*!\brief The maximum number of reference buffers that a VP9 encoder may use. + */ +#define VP9_MAXIMUM_REF_BUFFERS 8 + +/*!\brief External frame buffer + * + * This structure holds allocated frame buffers used by the decoder. + */ +typedef struct vpx_codec_frame_buffer { + uint8_t *data; /**< Pointer to the data buffer */ + size_t size; /**< Size of data in bytes */ + void *priv; /**< Frame's private data */ +} vpx_codec_frame_buffer_t; + +/*!\brief get frame buffer callback prototype + * + * This callback is invoked by the decoder to retrieve data for the frame + * buffer in order for the decode call to complete. The callback must + * allocate at least min_size in bytes and assign it to fb->data. The callback + * must zero out all the data allocated. Then the callback must set fb->size + * to the allocated size. The application does not need to align the allocated + * data. The callback is triggered when the decoder needs a frame buffer to + * decode a compressed image into. This function may be called more than once + * for every call to vpx_codec_decode. The application may set fb->priv to + * some data which will be passed back in the ximage and the release function + * call. |fb| is guaranteed to not be NULL. On success the callback must + * return 0. Any failure the callback must return a value less than 0. + * + * \param[in] priv Callback's private data + * \param[in] new_size Size in bytes needed by the buffer + * \param[in,out] fb Pointer to vpx_codec_frame_buffer_t + */ +typedef int (*vpx_get_frame_buffer_cb_fn_t)( + void *priv, size_t min_size, vpx_codec_frame_buffer_t *fb); + +/*!\brief release frame buffer callback prototype + * + * This callback is invoked by the decoder when the frame buffer is not + * referenced by any other buffers. |fb| is guaranteed to not be NULL. On + * success the callback must return 0. Any failure the callback must return + * a value less than 0. + * + * \param[in] priv Callback's private data + * \param[in] fb Pointer to vpx_codec_frame_buffer_t + */ +typedef int (*vpx_release_frame_buffer_cb_fn_t)( + void *priv, vpx_codec_frame_buffer_t *fb); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VPX_FRAME_BUFFER_H_ diff --git a/thirdparty/libvpx/vpx/vpx_image.h b/thirdparty/libvpx/vpx/vpx_image.h new file mode 100644 index 00000000000..7958c69806e --- /dev/null +++ b/thirdparty/libvpx/vpx/vpx_image.h @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/*!\file + * \brief Describes the vpx image descriptor and associated operations + * + */ +#ifndef VPX_VPX_IMAGE_H_ +#define VPX_VPX_IMAGE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + + /*!\brief Current ABI version number + * + * \internal + * If this file is altered in any way that changes the ABI, this value + * must be bumped. Examples include, but are not limited to, changing + * types, removing or reassigning enums, adding/removing/rearranging + * fields to structures + */ +#define VPX_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/ + + +#define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */ +#define VPX_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U in memory. */ +#define VPX_IMG_FMT_HAS_ALPHA 0x400 /**< Image has an alpha channel. */ +#define VPX_IMG_FMT_HIGHBITDEPTH 0x800 /**< Image uses 16bit framebuffer. */ + + /*!\brief List of supported image formats */ + typedef enum vpx_img_fmt { + VPX_IMG_FMT_NONE, + VPX_IMG_FMT_RGB24, /**< 24 bit per pixel packed RGB */ + VPX_IMG_FMT_RGB32, /**< 32 bit per pixel packed 0RGB */ + VPX_IMG_FMT_RGB565, /**< 16 bit per pixel, 565 */ + VPX_IMG_FMT_RGB555, /**< 16 bit per pixel, 555 */ + VPX_IMG_FMT_UYVY, /**< UYVY packed YUV */ + VPX_IMG_FMT_YUY2, /**< YUYV packed YUV */ + VPX_IMG_FMT_YVYU, /**< YVYU packed YUV */ + VPX_IMG_FMT_BGR24, /**< 24 bit per pixel packed BGR */ + VPX_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */ + VPX_IMG_FMT_ARGB, /**< 32 bit packed ARGB, alpha=255 */ + VPX_IMG_FMT_ARGB_LE, /**< 32 bit packed BGRA, alpha=255 */ + VPX_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */ + VPX_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */ + VPX_IMG_FMT_YV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */ + VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2, + VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */ + VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4, + VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5, + VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6, + VPX_IMG_FMT_I440 = VPX_IMG_FMT_PLANAR | 7, + VPX_IMG_FMT_444A = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 6, + VPX_IMG_FMT_I42016 = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGHBITDEPTH, + VPX_IMG_FMT_I42216 = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGHBITDEPTH, + VPX_IMG_FMT_I44416 = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGHBITDEPTH, + VPX_IMG_FMT_I44016 = VPX_IMG_FMT_I440 | VPX_IMG_FMT_HIGHBITDEPTH + } vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */ + + /*!\brief List of supported color spaces */ + typedef enum vpx_color_space { + VPX_CS_UNKNOWN = 0, /**< Unknown */ + VPX_CS_BT_601 = 1, /**< BT.601 */ + VPX_CS_BT_709 = 2, /**< BT.709 */ + VPX_CS_SMPTE_170 = 3, /**< SMPTE.170 */ + VPX_CS_SMPTE_240 = 4, /**< SMPTE.240 */ + VPX_CS_BT_2020 = 5, /**< BT.2020 */ + VPX_CS_RESERVED = 6, /**< Reserved */ + VPX_CS_SRGB = 7 /**< sRGB */ + } vpx_color_space_t; /**< alias for enum vpx_color_space */ + + /*!\brief List of supported color range */ + typedef enum vpx_color_range { + VPX_CR_STUDIO_RANGE = 0, /**< Y [16..235], UV [16..240] */ + VPX_CR_FULL_RANGE = 1 /**< YUV/RGB [0..255] */ + } vpx_color_range_t; /**< alias for enum vpx_color_range */ + + /**\brief Image Descriptor */ + typedef struct vpx_image { + vpx_img_fmt_t fmt; /**< Image Format */ + vpx_color_space_t cs; /**< Color Space */ + vpx_color_range_t range; /**< Color Range */ + + /* Image storage dimensions */ + unsigned int w; /**< Stored image width */ + unsigned int h; /**< Stored image height */ + unsigned int bit_depth; /**< Stored image bit-depth */ + + /* Image display dimensions */ + unsigned int d_w; /**< Displayed image width */ + unsigned int d_h; /**< Displayed image height */ + + /* Image intended rendering dimensions */ + unsigned int r_w; /**< Intended rendering image width */ + unsigned int r_h; /**< Intended rendering image height */ + + /* Chroma subsampling info */ + unsigned int x_chroma_shift; /**< subsampling order, X */ + unsigned int y_chroma_shift; /**< subsampling order, Y */ + + /* Image data pointers. */ +#define VPX_PLANE_PACKED 0 /**< To be used for all packed formats */ +#define VPX_PLANE_Y 0 /**< Y (Luminance) plane */ +#define VPX_PLANE_U 1 /**< U (Chroma) plane */ +#define VPX_PLANE_V 2 /**< V (Chroma) plane */ +#define VPX_PLANE_ALPHA 3 /**< A (Transparency) plane */ + unsigned char *planes[4]; /**< pointer to the top left pixel for each plane */ + int stride[4]; /**< stride between rows for each plane */ + + int bps; /**< bits per sample (for packed formats) */ + + /* The following member may be set by the application to associate data + * with this image. + */ + void *user_priv; /**< may be set by the application to associate data + * with this image. */ + + /* The following members should be treated as private. */ + unsigned char *img_data; /**< private */ + int img_data_owner; /**< private */ + int self_allocd; /**< private */ + + void *fb_priv; /**< Frame buffer data associated with the image. */ + } vpx_image_t; /**< alias for struct vpx_image */ + + /**\brief Representation of a rectangle on a surface */ + typedef struct vpx_image_rect { + unsigned int x; /**< leftmost column */ + unsigned int y; /**< topmost row */ + unsigned int w; /**< width */ + unsigned int h; /**< height */ + } vpx_image_rect_t; /**< alias for struct vpx_image_rect */ + + /*!\brief Open a descriptor, allocating storage for the underlying image + * + * Returns a descriptor for storing an image of the given format. The + * storage for the descriptor is allocated on the heap. + * + * \param[in] img Pointer to storage for descriptor. If this parameter + * is NULL, the storage for the descriptor will be + * allocated on the heap. + * \param[in] fmt Format for the image + * \param[in] d_w Width of the image + * \param[in] d_h Height of the image + * \param[in] align Alignment, in bytes, of the image buffer and + * each row in the image(stride). + * + * \return Returns a pointer to the initialized image descriptor. If the img + * parameter is non-null, the value of the img parameter will be + * returned. + */ + vpx_image_t *vpx_img_alloc(vpx_image_t *img, + vpx_img_fmt_t fmt, + unsigned int d_w, + unsigned int d_h, + unsigned int align); + + /*!\brief Open a descriptor, using existing storage for the underlying image + * + * Returns a descriptor for storing an image of the given format. The + * storage for descriptor has been allocated elsewhere, and a descriptor is + * desired to "wrap" that storage. + * + * \param[in] img Pointer to storage for descriptor. If this parameter + * is NULL, the storage for the descriptor will be + * allocated on the heap. + * \param[in] fmt Format for the image + * \param[in] d_w Width of the image + * \param[in] d_h Height of the image + * \param[in] align Alignment, in bytes, of each row in the image. + * \param[in] img_data Storage to use for the image + * + * \return Returns a pointer to the initialized image descriptor. If the img + * parameter is non-null, the value of the img parameter will be + * returned. + */ + vpx_image_t *vpx_img_wrap(vpx_image_t *img, + vpx_img_fmt_t fmt, + unsigned int d_w, + unsigned int d_h, + unsigned int align, + unsigned char *img_data); + + + /*!\brief Set the rectangle identifying the displayed portion of the image + * + * Updates the displayed rectangle (aka viewport) on the image surface to + * match the specified coordinates and size. + * + * \param[in] img Image descriptor + * \param[in] x leftmost column + * \param[in] y topmost row + * \param[in] w width + * \param[in] h height + * + * \return 0 if the requested rectangle is valid, nonzero otherwise. + */ + int vpx_img_set_rect(vpx_image_t *img, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h); + + + /*!\brief Flip the image vertically (top for bottom) + * + * Adjusts the image descriptor's pointers and strides to make the image + * be referenced upside-down. + * + * \param[in] img Image descriptor + */ + void vpx_img_flip(vpx_image_t *img); + + /*!\brief Close an image descriptor + * + * Frees all allocated storage associated with an image descriptor. + * + * \param[in] img Image descriptor + */ + void vpx_img_free(vpx_image_t *img); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VPX_IMAGE_H_ diff --git a/thirdparty/libvpx/vpx/vpx_integer.h b/thirdparty/libvpx/vpx/vpx_integer.h new file mode 100644 index 00000000000..829c9d132c8 --- /dev/null +++ b/thirdparty/libvpx/vpx/vpx_integer.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VPX_VPX_INTEGER_H_ +#define VPX_VPX_INTEGER_H_ + +/* get ptrdiff_t, size_t, wchar_t, NULL */ +#include + +#if defined(_MSC_VER) +#define VPX_FORCE_INLINE __forceinline +#define VPX_INLINE __inline +#else +#define VPX_FORCE_INLINE __inline__ __attribute__(always_inline) +// TODO(jbb): Allow a way to force inline off for older compilers. +#define VPX_INLINE inline +#endif + +#if (defined(_MSC_VER) && (_MSC_VER < 1600)) || defined(VPX_EMULATE_INTTYPES) +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; + +#if (defined(_MSC_VER) && (_MSC_VER < 1600)) +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; +#define INT64_MAX _I64_MAX +#define INT32_MAX _I32_MAX +#define INT32_MIN _I32_MIN +#define INT16_MAX _I16_MAX +#define INT16_MIN _I16_MIN +#endif + +#ifndef _UINTPTR_T_DEFINED +typedef size_t uintptr_t; +#endif + +#else + +/* Most platforms have the C99 standard integer types. */ + +#if defined(__cplusplus) +# if !defined(__STDC_FORMAT_MACROS) +# define __STDC_FORMAT_MACROS +# endif +# if !defined(__STDC_LIMIT_MACROS) +# define __STDC_LIMIT_MACROS +# endif +#endif // __cplusplus + +#include + +#endif + +/* VS2010 defines stdint.h, but not inttypes.h */ +#if defined(_MSC_VER) && _MSC_VER < 1800 +#define PRId64 "I64d" +#else +#include +#endif + +#endif // VPX_VPX_INTEGER_H_ diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c new file mode 100644 index 00000000000..f734e480279 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vpx_dsp/inv_txfm.h" +#include "vpx_ports/mem.h" + +void vpx_idct16x16_1_add_neon( + int16_t *input, + uint8_t *dest, + int dest_stride) { + uint8x8_t d2u8, d3u8, d30u8, d31u8; + uint64x1_t d2u64, d3u64, d4u64, d5u64; + uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16; + int16x8_t q0s16; + uint8_t *d1, *d2; + int16_t i, j, a1, cospi_16_64 = 11585; + int16_t out = dct_const_round_shift(input[0] * cospi_16_64); + out = dct_const_round_shift(out * cospi_16_64); + a1 = ROUND_POWER_OF_TWO(out, 6); + + q0s16 = vdupq_n_s16(a1); + q0u16 = vreinterpretq_u16_s16(q0s16); + + for (d1 = d2 = dest, i = 0; i < 4; i++) { + for (j = 0; j < 2; j++) { + d2u64 = vld1_u64((const uint64_t *)d1); + d3u64 = vld1_u64((const uint64_t *)(d1 + 8)); + d1 += dest_stride; + d4u64 = vld1_u64((const uint64_t *)d1); + d5u64 = vld1_u64((const uint64_t *)(d1 + 8)); + d1 += dest_stride; + + q9u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d2u64)); + q10u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d3u64)); + q11u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d4u64)); + q12u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d5u64)); + + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); + d30u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); + d31u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); + + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); + vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d3u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d30u8)); + vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d31u8)); + d2 += dest_stride; + } + } + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c new file mode 100644 index 00000000000..651ebb21f99 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c @@ -0,0 +1,1317 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_config.h" +#include "vpx_dsp/txfm_common.h" + +static INLINE void TRANSPOSE8X8( + int16x8_t *q8s16, + int16x8_t *q9s16, + int16x8_t *q10s16, + int16x8_t *q11s16, + int16x8_t *q12s16, + int16x8_t *q13s16, + int16x8_t *q14s16, + int16x8_t *q15s16) { + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; + int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 + *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 + *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 + *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 + *q12s16 = vcombine_s16(d17s16, d25s16); + *q13s16 = vcombine_s16(d19s16, d27s16); + *q14s16 = vcombine_s16(d21s16, d29s16); + *q15s16 = vcombine_s16(d23s16, d31s16); + + q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q8s16), + vreinterpretq_s32_s16(*q10s16)); + q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q9s16), + vreinterpretq_s32_s16(*q11s16)); + q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q12s16), + vreinterpretq_s32_s16(*q14s16)); + q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q13s16), + vreinterpretq_s32_s16(*q15s16)); + + q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 + vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 + q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 + vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 + q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 + vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 + q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 + vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 + + *q8s16 = q0x2s16.val[0]; + *q9s16 = q0x2s16.val[1]; + *q10s16 = q1x2s16.val[0]; + *q11s16 = q1x2s16.val[1]; + *q12s16 = q2x2s16.val[0]; + *q13s16 = q2x2s16.val[1]; + *q14s16 = q3x2s16.val[0]; + *q15s16 = q3x2s16.val[1]; + return; +} + +void vpx_idct16x16_256_add_neon_pass1( + int16_t *in, + int16_t *out, + int output_stride) { + int16x4_t d0s16, d1s16, d2s16, d3s16; + int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + uint64x1_t d16u64, d17u64, d18u64, d19u64, d20u64, d21u64, d22u64, d23u64; + uint64x1_t d24u64, d25u64, d26u64, d27u64, d28u64, d29u64, d30u64, d31u64; + int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; + int32x4_t q0s32, q1s32, q2s32, q3s32, q5s32, q6s32, q9s32; + int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; + int16x8x2_t q0x2s16; + + q0x2s16 = vld2q_s16(in); + q8s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q9s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q10s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q11s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q12s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q13s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q14s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q15s16 = q0x2s16.val[0]; + + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + d16s16 = vget_low_s16(q8s16); + d17s16 = vget_high_s16(q8s16); + d18s16 = vget_low_s16(q9s16); + d19s16 = vget_high_s16(q9s16); + d20s16 = vget_low_s16(q10s16); + d21s16 = vget_high_s16(q10s16); + d22s16 = vget_low_s16(q11s16); + d23s16 = vget_high_s16(q11s16); + d24s16 = vget_low_s16(q12s16); + d25s16 = vget_high_s16(q12s16); + d26s16 = vget_low_s16(q13s16); + d27s16 = vget_high_s16(q13s16); + d28s16 = vget_low_s16(q14s16); + d29s16 = vget_high_s16(q14s16); + d30s16 = vget_low_s16(q15s16); + d31s16 = vget_high_s16(q15s16); + + // stage 3 + d0s16 = vdup_n_s16(cospi_28_64); + d1s16 = vdup_n_s16(cospi_4_64); + + q2s32 = vmull_s16(d18s16, d0s16); + q3s32 = vmull_s16(d19s16, d0s16); + q5s32 = vmull_s16(d18s16, d1s16); + q6s32 = vmull_s16(d19s16, d1s16); + + q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); + q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); + q5s32 = vmlal_s16(q5s32, d30s16, d0s16); + q6s32 = vmlal_s16(q6s32, d31s16, d0s16); + + d2s16 = vdup_n_s16(cospi_12_64); + d3s16 = vdup_n_s16(cospi_20_64); + + d8s16 = vqrshrn_n_s32(q2s32, 14); + d9s16 = vqrshrn_n_s32(q3s32, 14); + d14s16 = vqrshrn_n_s32(q5s32, 14); + d15s16 = vqrshrn_n_s32(q6s32, 14); + q4s16 = vcombine_s16(d8s16, d9s16); + q7s16 = vcombine_s16(d14s16, d15s16); + + q2s32 = vmull_s16(d26s16, d2s16); + q3s32 = vmull_s16(d27s16, d2s16); + q9s32 = vmull_s16(d26s16, d3s16); + q15s32 = vmull_s16(d27s16, d3s16); + + q2s32 = vmlsl_s16(q2s32, d22s16, d3s16); + q3s32 = vmlsl_s16(q3s32, d23s16, d3s16); + q9s32 = vmlal_s16(q9s32, d22s16, d2s16); + q15s32 = vmlal_s16(q15s32, d23s16, d2s16); + + d10s16 = vqrshrn_n_s32(q2s32, 14); + d11s16 = vqrshrn_n_s32(q3s32, 14); + d12s16 = vqrshrn_n_s32(q9s32, 14); + d13s16 = vqrshrn_n_s32(q15s32, 14); + q5s16 = vcombine_s16(d10s16, d11s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + // stage 4 + d30s16 = vdup_n_s16(cospi_16_64); + + q2s32 = vmull_s16(d16s16, d30s16); + q11s32 = vmull_s16(d17s16, d30s16); + q0s32 = vmull_s16(d24s16, d30s16); + q1s32 = vmull_s16(d25s16, d30s16); + + d30s16 = vdup_n_s16(cospi_24_64); + d31s16 = vdup_n_s16(cospi_8_64); + + q3s32 = vaddq_s32(q2s32, q0s32); + q12s32 = vaddq_s32(q11s32, q1s32); + q13s32 = vsubq_s32(q2s32, q0s32); + q1s32 = vsubq_s32(q11s32, q1s32); + + d16s16 = vqrshrn_n_s32(q3s32, 14); + d17s16 = vqrshrn_n_s32(q12s32, 14); + d18s16 = vqrshrn_n_s32(q13s32, 14); + d19s16 = vqrshrn_n_s32(q1s32, 14); + q8s16 = vcombine_s16(d16s16, d17s16); + q9s16 = vcombine_s16(d18s16, d19s16); + + q0s32 = vmull_s16(d20s16, d31s16); + q1s32 = vmull_s16(d21s16, d31s16); + q12s32 = vmull_s16(d20s16, d30s16); + q13s32 = vmull_s16(d21s16, d30s16); + + q0s32 = vmlal_s16(q0s32, d28s16, d30s16); + q1s32 = vmlal_s16(q1s32, d29s16, d30s16); + q12s32 = vmlsl_s16(q12s32, d28s16, d31s16); + q13s32 = vmlsl_s16(q13s32, d29s16, d31s16); + + d22s16 = vqrshrn_n_s32(q0s32, 14); + d23s16 = vqrshrn_n_s32(q1s32, 14); + d20s16 = vqrshrn_n_s32(q12s32, 14); + d21s16 = vqrshrn_n_s32(q13s32, 14); + q10s16 = vcombine_s16(d20s16, d21s16); + q11s16 = vcombine_s16(d22s16, d23s16); + + q13s16 = vsubq_s16(q4s16, q5s16); + q4s16 = vaddq_s16(q4s16, q5s16); + q14s16 = vsubq_s16(q7s16, q6s16); + q15s16 = vaddq_s16(q6s16, q7s16); + d26s16 = vget_low_s16(q13s16); + d27s16 = vget_high_s16(q13s16); + d28s16 = vget_low_s16(q14s16); + d29s16 = vget_high_s16(q14s16); + + // stage 5 + q0s16 = vaddq_s16(q8s16, q11s16); + q1s16 = vaddq_s16(q9s16, q10s16); + q2s16 = vsubq_s16(q9s16, q10s16); + q3s16 = vsubq_s16(q8s16, q11s16); + + d16s16 = vdup_n_s16(cospi_16_64); + + q11s32 = vmull_s16(d26s16, d16s16); + q12s32 = vmull_s16(d27s16, d16s16); + q9s32 = vmull_s16(d28s16, d16s16); + q10s32 = vmull_s16(d29s16, d16s16); + + q6s32 = vsubq_s32(q9s32, q11s32); + q13s32 = vsubq_s32(q10s32, q12s32); + q9s32 = vaddq_s32(q9s32, q11s32); + q10s32 = vaddq_s32(q10s32, q12s32); + + d10s16 = vqrshrn_n_s32(q6s32, 14); + d11s16 = vqrshrn_n_s32(q13s32, 14); + d12s16 = vqrshrn_n_s32(q9s32, 14); + d13s16 = vqrshrn_n_s32(q10s32, 14); + q5s16 = vcombine_s16(d10s16, d11s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + // stage 6 + q8s16 = vaddq_s16(q0s16, q15s16); + q9s16 = vaddq_s16(q1s16, q6s16); + q10s16 = vaddq_s16(q2s16, q5s16); + q11s16 = vaddq_s16(q3s16, q4s16); + q12s16 = vsubq_s16(q3s16, q4s16); + q13s16 = vsubq_s16(q2s16, q5s16); + q14s16 = vsubq_s16(q1s16, q6s16); + q15s16 = vsubq_s16(q0s16, q15s16); + + d16u64 = vreinterpret_u64_s16(vget_low_s16(q8s16)); + d17u64 = vreinterpret_u64_s16(vget_high_s16(q8s16)); + d18u64 = vreinterpret_u64_s16(vget_low_s16(q9s16)); + d19u64 = vreinterpret_u64_s16(vget_high_s16(q9s16)); + d20u64 = vreinterpret_u64_s16(vget_low_s16(q10s16)); + d21u64 = vreinterpret_u64_s16(vget_high_s16(q10s16)); + d22u64 = vreinterpret_u64_s16(vget_low_s16(q11s16)); + d23u64 = vreinterpret_u64_s16(vget_high_s16(q11s16)); + d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); + d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); + d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); + d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); + d28u64 = vreinterpret_u64_s16(vget_low_s16(q14s16)); + d29u64 = vreinterpret_u64_s16(vget_high_s16(q14s16)); + d30u64 = vreinterpret_u64_s16(vget_low_s16(q15s16)); + d31u64 = vreinterpret_u64_s16(vget_high_s16(q15s16)); + + // store the data + output_stride >>= 1; // output_stride / 2, out is int16_t + vst1_u64((uint64_t *)out, d16u64); + out += output_stride; + vst1_u64((uint64_t *)out, d17u64); + out += output_stride; + vst1_u64((uint64_t *)out, d18u64); + out += output_stride; + vst1_u64((uint64_t *)out, d19u64); + out += output_stride; + vst1_u64((uint64_t *)out, d20u64); + out += output_stride; + vst1_u64((uint64_t *)out, d21u64); + out += output_stride; + vst1_u64((uint64_t *)out, d22u64); + out += output_stride; + vst1_u64((uint64_t *)out, d23u64); + out += output_stride; + vst1_u64((uint64_t *)out, d24u64); + out += output_stride; + vst1_u64((uint64_t *)out, d25u64); + out += output_stride; + vst1_u64((uint64_t *)out, d26u64); + out += output_stride; + vst1_u64((uint64_t *)out, d27u64); + out += output_stride; + vst1_u64((uint64_t *)out, d28u64); + out += output_stride; + vst1_u64((uint64_t *)out, d29u64); + out += output_stride; + vst1_u64((uint64_t *)out, d30u64); + out += output_stride; + vst1_u64((uint64_t *)out, d31u64); + return; +} + +void vpx_idct16x16_256_add_neon_pass2( + int16_t *src, + int16_t *out, + int16_t *pass1Output, + int16_t skip_adding, + uint8_t *dest, + int dest_stride) { + uint8_t *d; + uint8x8_t d12u8, d13u8; + int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; + int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + uint64x1_t d24u64, d25u64, d26u64, d27u64; + int64x1_t d12s64, d13s64; + uint16x8_t q2u16, q3u16, q4u16, q5u16, q8u16; + uint16x8_t q9u16, q12u16, q13u16, q14u16, q15u16; + int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; + int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q8s32, q9s32; + int32x4_t q10s32, q11s32, q12s32, q13s32; + int16x8x2_t q0x2s16; + + q0x2s16 = vld2q_s16(src); + q8s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q9s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q10s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q11s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q12s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q13s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q14s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q15s16 = q0x2s16.val[0]; + + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + d16s16 = vget_low_s16(q8s16); + d17s16 = vget_high_s16(q8s16); + d18s16 = vget_low_s16(q9s16); + d19s16 = vget_high_s16(q9s16); + d20s16 = vget_low_s16(q10s16); + d21s16 = vget_high_s16(q10s16); + d22s16 = vget_low_s16(q11s16); + d23s16 = vget_high_s16(q11s16); + d24s16 = vget_low_s16(q12s16); + d25s16 = vget_high_s16(q12s16); + d26s16 = vget_low_s16(q13s16); + d27s16 = vget_high_s16(q13s16); + d28s16 = vget_low_s16(q14s16); + d29s16 = vget_high_s16(q14s16); + d30s16 = vget_low_s16(q15s16); + d31s16 = vget_high_s16(q15s16); + + // stage 3 + d12s16 = vdup_n_s16(cospi_30_64); + d13s16 = vdup_n_s16(cospi_2_64); + + q2s32 = vmull_s16(d16s16, d12s16); + q3s32 = vmull_s16(d17s16, d12s16); + q1s32 = vmull_s16(d16s16, d13s16); + q4s32 = vmull_s16(d17s16, d13s16); + + q2s32 = vmlsl_s16(q2s32, d30s16, d13s16); + q3s32 = vmlsl_s16(q3s32, d31s16, d13s16); + q1s32 = vmlal_s16(q1s32, d30s16, d12s16); + q4s32 = vmlal_s16(q4s32, d31s16, d12s16); + + d0s16 = vqrshrn_n_s32(q2s32, 14); + d1s16 = vqrshrn_n_s32(q3s32, 14); + d14s16 = vqrshrn_n_s32(q1s32, 14); + d15s16 = vqrshrn_n_s32(q4s32, 14); + q0s16 = vcombine_s16(d0s16, d1s16); + q7s16 = vcombine_s16(d14s16, d15s16); + + d30s16 = vdup_n_s16(cospi_14_64); + d31s16 = vdup_n_s16(cospi_18_64); + + q2s32 = vmull_s16(d24s16, d30s16); + q3s32 = vmull_s16(d25s16, d30s16); + q4s32 = vmull_s16(d24s16, d31s16); + q5s32 = vmull_s16(d25s16, d31s16); + + q2s32 = vmlsl_s16(q2s32, d22s16, d31s16); + q3s32 = vmlsl_s16(q3s32, d23s16, d31s16); + q4s32 = vmlal_s16(q4s32, d22s16, d30s16); + q5s32 = vmlal_s16(q5s32, d23s16, d30s16); + + d2s16 = vqrshrn_n_s32(q2s32, 14); + d3s16 = vqrshrn_n_s32(q3s32, 14); + d12s16 = vqrshrn_n_s32(q4s32, 14); + d13s16 = vqrshrn_n_s32(q5s32, 14); + q1s16 = vcombine_s16(d2s16, d3s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + d30s16 = vdup_n_s16(cospi_22_64); + d31s16 = vdup_n_s16(cospi_10_64); + + q11s32 = vmull_s16(d20s16, d30s16); + q12s32 = vmull_s16(d21s16, d30s16); + q4s32 = vmull_s16(d20s16, d31s16); + q5s32 = vmull_s16(d21s16, d31s16); + + q11s32 = vmlsl_s16(q11s32, d26s16, d31s16); + q12s32 = vmlsl_s16(q12s32, d27s16, d31s16); + q4s32 = vmlal_s16(q4s32, d26s16, d30s16); + q5s32 = vmlal_s16(q5s32, d27s16, d30s16); + + d4s16 = vqrshrn_n_s32(q11s32, 14); + d5s16 = vqrshrn_n_s32(q12s32, 14); + d11s16 = vqrshrn_n_s32(q5s32, 14); + d10s16 = vqrshrn_n_s32(q4s32, 14); + q2s16 = vcombine_s16(d4s16, d5s16); + q5s16 = vcombine_s16(d10s16, d11s16); + + d30s16 = vdup_n_s16(cospi_6_64); + d31s16 = vdup_n_s16(cospi_26_64); + + q10s32 = vmull_s16(d28s16, d30s16); + q11s32 = vmull_s16(d29s16, d30s16); + q12s32 = vmull_s16(d28s16, d31s16); + q13s32 = vmull_s16(d29s16, d31s16); + + q10s32 = vmlsl_s16(q10s32, d18s16, d31s16); + q11s32 = vmlsl_s16(q11s32, d19s16, d31s16); + q12s32 = vmlal_s16(q12s32, d18s16, d30s16); + q13s32 = vmlal_s16(q13s32, d19s16, d30s16); + + d6s16 = vqrshrn_n_s32(q10s32, 14); + d7s16 = vqrshrn_n_s32(q11s32, 14); + d8s16 = vqrshrn_n_s32(q12s32, 14); + d9s16 = vqrshrn_n_s32(q13s32, 14); + q3s16 = vcombine_s16(d6s16, d7s16); + q4s16 = vcombine_s16(d8s16, d9s16); + + // stage 3 + q9s16 = vsubq_s16(q0s16, q1s16); + q0s16 = vaddq_s16(q0s16, q1s16); + q10s16 = vsubq_s16(q3s16, q2s16); + q11s16 = vaddq_s16(q2s16, q3s16); + q12s16 = vaddq_s16(q4s16, q5s16); + q13s16 = vsubq_s16(q4s16, q5s16); + q14s16 = vsubq_s16(q7s16, q6s16); + q7s16 = vaddq_s16(q6s16, q7s16); + + // stage 4 + d18s16 = vget_low_s16(q9s16); + d19s16 = vget_high_s16(q9s16); + d20s16 = vget_low_s16(q10s16); + d21s16 = vget_high_s16(q10s16); + d26s16 = vget_low_s16(q13s16); + d27s16 = vget_high_s16(q13s16); + d28s16 = vget_low_s16(q14s16); + d29s16 = vget_high_s16(q14s16); + + d30s16 = vdup_n_s16(cospi_8_64); + d31s16 = vdup_n_s16(cospi_24_64); + + q2s32 = vmull_s16(d18s16, d31s16); + q3s32 = vmull_s16(d19s16, d31s16); + q4s32 = vmull_s16(d28s16, d31s16); + q5s32 = vmull_s16(d29s16, d31s16); + + q2s32 = vmlal_s16(q2s32, d28s16, d30s16); + q3s32 = vmlal_s16(q3s32, d29s16, d30s16); + q4s32 = vmlsl_s16(q4s32, d18s16, d30s16); + q5s32 = vmlsl_s16(q5s32, d19s16, d30s16); + + d12s16 = vqrshrn_n_s32(q2s32, 14); + d13s16 = vqrshrn_n_s32(q3s32, 14); + d2s16 = vqrshrn_n_s32(q4s32, 14); + d3s16 = vqrshrn_n_s32(q5s32, 14); + q1s16 = vcombine_s16(d2s16, d3s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + q3s16 = q11s16; + q4s16 = q12s16; + + d30s16 = vdup_n_s16(-cospi_8_64); + q11s32 = vmull_s16(d26s16, d30s16); + q12s32 = vmull_s16(d27s16, d30s16); + q8s32 = vmull_s16(d20s16, d30s16); + q9s32 = vmull_s16(d21s16, d30s16); + + q11s32 = vmlsl_s16(q11s32, d20s16, d31s16); + q12s32 = vmlsl_s16(q12s32, d21s16, d31s16); + q8s32 = vmlal_s16(q8s32, d26s16, d31s16); + q9s32 = vmlal_s16(q9s32, d27s16, d31s16); + + d4s16 = vqrshrn_n_s32(q11s32, 14); + d5s16 = vqrshrn_n_s32(q12s32, 14); + d10s16 = vqrshrn_n_s32(q8s32, 14); + d11s16 = vqrshrn_n_s32(q9s32, 14); + q2s16 = vcombine_s16(d4s16, d5s16); + q5s16 = vcombine_s16(d10s16, d11s16); + + // stage 5 + q8s16 = vaddq_s16(q0s16, q3s16); + q9s16 = vaddq_s16(q1s16, q2s16); + q10s16 = vsubq_s16(q1s16, q2s16); + q11s16 = vsubq_s16(q0s16, q3s16); + q12s16 = vsubq_s16(q7s16, q4s16); + q13s16 = vsubq_s16(q6s16, q5s16); + q14s16 = vaddq_s16(q6s16, q5s16); + q15s16 = vaddq_s16(q7s16, q4s16); + + // stage 6 + d20s16 = vget_low_s16(q10s16); + d21s16 = vget_high_s16(q10s16); + d22s16 = vget_low_s16(q11s16); + d23s16 = vget_high_s16(q11s16); + d24s16 = vget_low_s16(q12s16); + d25s16 = vget_high_s16(q12s16); + d26s16 = vget_low_s16(q13s16); + d27s16 = vget_high_s16(q13s16); + + d14s16 = vdup_n_s16(cospi_16_64); + + q3s32 = vmull_s16(d26s16, d14s16); + q4s32 = vmull_s16(d27s16, d14s16); + q0s32 = vmull_s16(d20s16, d14s16); + q1s32 = vmull_s16(d21s16, d14s16); + + q5s32 = vsubq_s32(q3s32, q0s32); + q6s32 = vsubq_s32(q4s32, q1s32); + q10s32 = vaddq_s32(q3s32, q0s32); + q4s32 = vaddq_s32(q4s32, q1s32); + + d4s16 = vqrshrn_n_s32(q5s32, 14); + d5s16 = vqrshrn_n_s32(q6s32, 14); + d10s16 = vqrshrn_n_s32(q10s32, 14); + d11s16 = vqrshrn_n_s32(q4s32, 14); + q2s16 = vcombine_s16(d4s16, d5s16); + q5s16 = vcombine_s16(d10s16, d11s16); + + q0s32 = vmull_s16(d22s16, d14s16); + q1s32 = vmull_s16(d23s16, d14s16); + q13s32 = vmull_s16(d24s16, d14s16); + q6s32 = vmull_s16(d25s16, d14s16); + + q10s32 = vsubq_s32(q13s32, q0s32); + q4s32 = vsubq_s32(q6s32, q1s32); + q13s32 = vaddq_s32(q13s32, q0s32); + q6s32 = vaddq_s32(q6s32, q1s32); + + d6s16 = vqrshrn_n_s32(q10s32, 14); + d7s16 = vqrshrn_n_s32(q4s32, 14); + d8s16 = vqrshrn_n_s32(q13s32, 14); + d9s16 = vqrshrn_n_s32(q6s32, 14); + q3s16 = vcombine_s16(d6s16, d7s16); + q4s16 = vcombine_s16(d8s16, d9s16); + + // stage 7 + if (skip_adding != 0) { + d = dest; + // load the data in pass1 + q0s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q1s16 = vld1q_s16(pass1Output); + pass1Output += 8; + d12s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + d13s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + + q12s16 = vaddq_s16(q0s16, q15s16); + q13s16 = vaddq_s16(q1s16, q14s16); + q12s16 = vrshrq_n_s16(q12s16, 6); + q13s16 = vrshrq_n_s16(q13s16, 6); + q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), + vreinterpret_u8_s64(d12s64)); + q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), + vreinterpret_u8_s64(d13s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); + d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + d += dest_stride; + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); + d += dest_stride; + q14s16 = vsubq_s16(q1s16, q14s16); + q15s16 = vsubq_s16(q0s16, q15s16); + + q10s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q11s16 = vld1q_s16(pass1Output); + pass1Output += 8; + d12s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + d13s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + q12s16 = vaddq_s16(q10s16, q5s16); + q13s16 = vaddq_s16(q11s16, q4s16); + q12s16 = vrshrq_n_s16(q12s16, 6); + q13s16 = vrshrq_n_s16(q13s16, 6); + q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), + vreinterpret_u8_s64(d12s64)); + q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), + vreinterpret_u8_s64(d13s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); + d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + d += dest_stride; + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); + d += dest_stride; + q4s16 = vsubq_s16(q11s16, q4s16); + q5s16 = vsubq_s16(q10s16, q5s16); + + q0s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q1s16 = vld1q_s16(pass1Output); + pass1Output += 8; + d12s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + d13s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + q12s16 = vaddq_s16(q0s16, q3s16); + q13s16 = vaddq_s16(q1s16, q2s16); + q12s16 = vrshrq_n_s16(q12s16, 6); + q13s16 = vrshrq_n_s16(q13s16, 6); + q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), + vreinterpret_u8_s64(d12s64)); + q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), + vreinterpret_u8_s64(d13s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); + d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + d += dest_stride; + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); + d += dest_stride; + q2s16 = vsubq_s16(q1s16, q2s16); + q3s16 = vsubq_s16(q0s16, q3s16); + + q10s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q11s16 = vld1q_s16(pass1Output); + d12s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + d13s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + q12s16 = vaddq_s16(q10s16, q9s16); + q13s16 = vaddq_s16(q11s16, q8s16); + q12s16 = vrshrq_n_s16(q12s16, 6); + q13s16 = vrshrq_n_s16(q13s16, 6); + q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), + vreinterpret_u8_s64(d12s64)); + q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), + vreinterpret_u8_s64(d13s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); + d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + d += dest_stride; + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); + d += dest_stride; + q8s16 = vsubq_s16(q11s16, q8s16); + q9s16 = vsubq_s16(q10s16, q9s16); + + // store the data out 8,9,10,11,12,13,14,15 + d12s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + q8s16 = vrshrq_n_s16(q8s16, 6); + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), + vreinterpret_u8_s64(d12s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + d += dest_stride; + + d12s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + q9s16 = vrshrq_n_s16(q9s16, 6); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), + vreinterpret_u8_s64(d12s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + d += dest_stride; + + d12s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + q2s16 = vrshrq_n_s16(q2s16, 6); + q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2s16), + vreinterpret_u8_s64(d12s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + d += dest_stride; + + d12s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + q3s16 = vrshrq_n_s16(q3s16, 6); + q3u16 = vaddw_u8(vreinterpretq_u16_s16(q3s16), + vreinterpret_u8_s64(d12s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q3u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + d += dest_stride; + + d12s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + q4s16 = vrshrq_n_s16(q4s16, 6); + q4u16 = vaddw_u8(vreinterpretq_u16_s16(q4s16), + vreinterpret_u8_s64(d12s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q4u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + d += dest_stride; + + d12s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + q5s16 = vrshrq_n_s16(q5s16, 6); + q5u16 = vaddw_u8(vreinterpretq_u16_s16(q5s16), + vreinterpret_u8_s64(d12s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q5u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + d += dest_stride; + + d12s64 = vld1_s64((int64_t *)dest); + dest += dest_stride; + q14s16 = vrshrq_n_s16(q14s16, 6); + q14u16 = vaddw_u8(vreinterpretq_u16_s16(q14s16), + vreinterpret_u8_s64(d12s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q14u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + d += dest_stride; + + d12s64 = vld1_s64((int64_t *)dest); + q15s16 = vrshrq_n_s16(q15s16, 6); + q15u16 = vaddw_u8(vreinterpretq_u16_s16(q15s16), + vreinterpret_u8_s64(d12s64)); + d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q15u16)); + vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); + } else { // skip_adding_dest + q0s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q1s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q12s16 = vaddq_s16(q0s16, q15s16); + q13s16 = vaddq_s16(q1s16, q14s16); + d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); + d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); + d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); + d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); + vst1_u64((uint64_t *)out, d24u64); + out += 4; + vst1_u64((uint64_t *)out, d25u64); + out += 12; + vst1_u64((uint64_t *)out, d26u64); + out += 4; + vst1_u64((uint64_t *)out, d27u64); + out += 12; + q14s16 = vsubq_s16(q1s16, q14s16); + q15s16 = vsubq_s16(q0s16, q15s16); + + q10s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q11s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q12s16 = vaddq_s16(q10s16, q5s16); + q13s16 = vaddq_s16(q11s16, q4s16); + d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); + d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); + d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); + d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); + vst1_u64((uint64_t *)out, d24u64); + out += 4; + vst1_u64((uint64_t *)out, d25u64); + out += 12; + vst1_u64((uint64_t *)out, d26u64); + out += 4; + vst1_u64((uint64_t *)out, d27u64); + out += 12; + q4s16 = vsubq_s16(q11s16, q4s16); + q5s16 = vsubq_s16(q10s16, q5s16); + + q0s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q1s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q12s16 = vaddq_s16(q0s16, q3s16); + q13s16 = vaddq_s16(q1s16, q2s16); + d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); + d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); + d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); + d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); + vst1_u64((uint64_t *)out, d24u64); + out += 4; + vst1_u64((uint64_t *)out, d25u64); + out += 12; + vst1_u64((uint64_t *)out, d26u64); + out += 4; + vst1_u64((uint64_t *)out, d27u64); + out += 12; + q2s16 = vsubq_s16(q1s16, q2s16); + q3s16 = vsubq_s16(q0s16, q3s16); + + q10s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q11s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q12s16 = vaddq_s16(q10s16, q9s16); + q13s16 = vaddq_s16(q11s16, q8s16); + d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); + d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); + d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); + d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); + vst1_u64((uint64_t *)out, d24u64); + out += 4; + vst1_u64((uint64_t *)out, d25u64); + out += 12; + vst1_u64((uint64_t *)out, d26u64); + out += 4; + vst1_u64((uint64_t *)out, d27u64); + out += 12; + q8s16 = vsubq_s16(q11s16, q8s16); + q9s16 = vsubq_s16(q10s16, q9s16); + + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q8s16))); + out += 4; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q8s16))); + out += 12; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q9s16))); + out += 4; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q9s16))); + out += 12; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q2s16))); + out += 4; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q2s16))); + out += 12; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q3s16))); + out += 4; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q3s16))); + out += 12; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q4s16))); + out += 4; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q4s16))); + out += 12; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q5s16))); + out += 4; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q5s16))); + out += 12; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q14s16))); + out += 4; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q14s16))); + out += 12; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q15s16))); + out += 4; + vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q15s16))); + } + return; +} + +void vpx_idct16x16_10_add_neon_pass1( + int16_t *in, + int16_t *out, + int output_stride) { + int16x4_t d4s16; + int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; + uint64x1_t d4u64, d5u64, d18u64, d19u64, d20u64, d21u64, d22u64, d23u64; + uint64x1_t d24u64, d25u64, d26u64, d27u64, d28u64, d29u64, d30u64, d31u64; + int16x8_t q0s16, q1s16, q2s16, q4s16, q5s16, q6s16, q7s16; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; + int32x4_t q6s32, q9s32; + int32x4_t q10s32, q11s32, q12s32, q15s32; + int16x8x2_t q0x2s16; + + q0x2s16 = vld2q_s16(in); + q8s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q9s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q10s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q11s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q12s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q13s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q14s16 = q0x2s16.val[0]; + in += 16; + q0x2s16 = vld2q_s16(in); + q15s16 = q0x2s16.val[0]; + + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // stage 3 + q0s16 = vdupq_n_s16(cospi_28_64 * 2); + q1s16 = vdupq_n_s16(cospi_4_64 * 2); + + q4s16 = vqrdmulhq_s16(q9s16, q0s16); + q7s16 = vqrdmulhq_s16(q9s16, q1s16); + + // stage 4 + q1s16 = vdupq_n_s16(cospi_16_64 * 2); + d4s16 = vdup_n_s16(cospi_16_64); + + q8s16 = vqrdmulhq_s16(q8s16, q1s16); + + d8s16 = vget_low_s16(q4s16); + d9s16 = vget_high_s16(q4s16); + d14s16 = vget_low_s16(q7s16); + d15s16 = vget_high_s16(q7s16); + q9s32 = vmull_s16(d14s16, d4s16); + q10s32 = vmull_s16(d15s16, d4s16); + q12s32 = vmull_s16(d9s16, d4s16); + q11s32 = vmull_s16(d8s16, d4s16); + + q15s32 = vsubq_s32(q10s32, q12s32); + q6s32 = vsubq_s32(q9s32, q11s32); + q9s32 = vaddq_s32(q9s32, q11s32); + q10s32 = vaddq_s32(q10s32, q12s32); + + d11s16 = vqrshrn_n_s32(q15s32, 14); + d10s16 = vqrshrn_n_s32(q6s32, 14); + d12s16 = vqrshrn_n_s32(q9s32, 14); + d13s16 = vqrshrn_n_s32(q10s32, 14); + q5s16 = vcombine_s16(d10s16, d11s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + // stage 6 + q2s16 = vaddq_s16(q8s16, q7s16); + q9s16 = vaddq_s16(q8s16, q6s16); + q10s16 = vaddq_s16(q8s16, q5s16); + q11s16 = vaddq_s16(q8s16, q4s16); + q12s16 = vsubq_s16(q8s16, q4s16); + q13s16 = vsubq_s16(q8s16, q5s16); + q14s16 = vsubq_s16(q8s16, q6s16); + q15s16 = vsubq_s16(q8s16, q7s16); + + d4u64 = vreinterpret_u64_s16(vget_low_s16(q2s16)); + d5u64 = vreinterpret_u64_s16(vget_high_s16(q2s16)); + d18u64 = vreinterpret_u64_s16(vget_low_s16(q9s16)); + d19u64 = vreinterpret_u64_s16(vget_high_s16(q9s16)); + d20u64 = vreinterpret_u64_s16(vget_low_s16(q10s16)); + d21u64 = vreinterpret_u64_s16(vget_high_s16(q10s16)); + d22u64 = vreinterpret_u64_s16(vget_low_s16(q11s16)); + d23u64 = vreinterpret_u64_s16(vget_high_s16(q11s16)); + d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); + d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); + d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); + d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); + d28u64 = vreinterpret_u64_s16(vget_low_s16(q14s16)); + d29u64 = vreinterpret_u64_s16(vget_high_s16(q14s16)); + d30u64 = vreinterpret_u64_s16(vget_low_s16(q15s16)); + d31u64 = vreinterpret_u64_s16(vget_high_s16(q15s16)); + + // store the data + output_stride >>= 1; // output_stride / 2, out is int16_t + vst1_u64((uint64_t *)out, d4u64); + out += output_stride; + vst1_u64((uint64_t *)out, d5u64); + out += output_stride; + vst1_u64((uint64_t *)out, d18u64); + out += output_stride; + vst1_u64((uint64_t *)out, d19u64); + out += output_stride; + vst1_u64((uint64_t *)out, d20u64); + out += output_stride; + vst1_u64((uint64_t *)out, d21u64); + out += output_stride; + vst1_u64((uint64_t *)out, d22u64); + out += output_stride; + vst1_u64((uint64_t *)out, d23u64); + out += output_stride; + vst1_u64((uint64_t *)out, d24u64); + out += output_stride; + vst1_u64((uint64_t *)out, d25u64); + out += output_stride; + vst1_u64((uint64_t *)out, d26u64); + out += output_stride; + vst1_u64((uint64_t *)out, d27u64); + out += output_stride; + vst1_u64((uint64_t *)out, d28u64); + out += output_stride; + vst1_u64((uint64_t *)out, d29u64); + out += output_stride; + vst1_u64((uint64_t *)out, d30u64); + out += output_stride; + vst1_u64((uint64_t *)out, d31u64); + return; +} + +void vpx_idct16x16_10_add_neon_pass2( + int16_t *src, + int16_t *out, + int16_t *pass1Output, + int16_t skip_adding, + uint8_t *dest, + int dest_stride) { + int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; + int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; + int16x4_t d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d30s16, d31s16; + uint64x1_t d4u64, d5u64, d6u64, d7u64, d8u64, d9u64, d10u64, d11u64; + uint64x1_t d16u64, d17u64, d18u64, d19u64; + uint64x1_t d24u64, d25u64, d26u64, d27u64, d28u64, d29u64, d30u64, d31u64; + int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; + int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q8s32, q9s32; + int32x4_t q10s32, q11s32, q12s32, q13s32; + int16x8x2_t q0x2s16; + (void)skip_adding; + (void)dest; + (void)dest_stride; + + q0x2s16 = vld2q_s16(src); + q8s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q9s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q10s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q11s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q12s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q13s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q14s16 = q0x2s16.val[0]; + src += 16; + q0x2s16 = vld2q_s16(src); + q15s16 = q0x2s16.val[0]; + + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // stage 3 + q6s16 = vdupq_n_s16(cospi_30_64 * 2); + q0s16 = vqrdmulhq_s16(q8s16, q6s16); + q6s16 = vdupq_n_s16(cospi_2_64 * 2); + q7s16 = vqrdmulhq_s16(q8s16, q6s16); + + q15s16 = vdupq_n_s16(-cospi_26_64 * 2); + q14s16 = vdupq_n_s16(cospi_6_64 * 2); + q3s16 = vqrdmulhq_s16(q9s16, q15s16); + q4s16 = vqrdmulhq_s16(q9s16, q14s16); + + // stage 4 + d0s16 = vget_low_s16(q0s16); + d1s16 = vget_high_s16(q0s16); + d6s16 = vget_low_s16(q3s16); + d7s16 = vget_high_s16(q3s16); + d8s16 = vget_low_s16(q4s16); + d9s16 = vget_high_s16(q4s16); + d14s16 = vget_low_s16(q7s16); + d15s16 = vget_high_s16(q7s16); + + d30s16 = vdup_n_s16(cospi_8_64); + d31s16 = vdup_n_s16(cospi_24_64); + + q12s32 = vmull_s16(d14s16, d31s16); + q5s32 = vmull_s16(d15s16, d31s16); + q2s32 = vmull_s16(d0s16, d31s16); + q11s32 = vmull_s16(d1s16, d31s16); + + q12s32 = vmlsl_s16(q12s32, d0s16, d30s16); + q5s32 = vmlsl_s16(q5s32, d1s16, d30s16); + q2s32 = vmlal_s16(q2s32, d14s16, d30s16); + q11s32 = vmlal_s16(q11s32, d15s16, d30s16); + + d2s16 = vqrshrn_n_s32(q12s32, 14); + d3s16 = vqrshrn_n_s32(q5s32, 14); + d12s16 = vqrshrn_n_s32(q2s32, 14); + d13s16 = vqrshrn_n_s32(q11s32, 14); + q1s16 = vcombine_s16(d2s16, d3s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + d30s16 = vdup_n_s16(-cospi_8_64); + q10s32 = vmull_s16(d8s16, d30s16); + q13s32 = vmull_s16(d9s16, d30s16); + q8s32 = vmull_s16(d6s16, d30s16); + q9s32 = vmull_s16(d7s16, d30s16); + + q10s32 = vmlsl_s16(q10s32, d6s16, d31s16); + q13s32 = vmlsl_s16(q13s32, d7s16, d31s16); + q8s32 = vmlal_s16(q8s32, d8s16, d31s16); + q9s32 = vmlal_s16(q9s32, d9s16, d31s16); + + d4s16 = vqrshrn_n_s32(q10s32, 14); + d5s16 = vqrshrn_n_s32(q13s32, 14); + d10s16 = vqrshrn_n_s32(q8s32, 14); + d11s16 = vqrshrn_n_s32(q9s32, 14); + q2s16 = vcombine_s16(d4s16, d5s16); + q5s16 = vcombine_s16(d10s16, d11s16); + + // stage 5 + q8s16 = vaddq_s16(q0s16, q3s16); + q9s16 = vaddq_s16(q1s16, q2s16); + q10s16 = vsubq_s16(q1s16, q2s16); + q11s16 = vsubq_s16(q0s16, q3s16); + q12s16 = vsubq_s16(q7s16, q4s16); + q13s16 = vsubq_s16(q6s16, q5s16); + q14s16 = vaddq_s16(q6s16, q5s16); + q15s16 = vaddq_s16(q7s16, q4s16); + + // stage 6 + d20s16 = vget_low_s16(q10s16); + d21s16 = vget_high_s16(q10s16); + d22s16 = vget_low_s16(q11s16); + d23s16 = vget_high_s16(q11s16); + d24s16 = vget_low_s16(q12s16); + d25s16 = vget_high_s16(q12s16); + d26s16 = vget_low_s16(q13s16); + d27s16 = vget_high_s16(q13s16); + + d14s16 = vdup_n_s16(cospi_16_64); + q3s32 = vmull_s16(d26s16, d14s16); + q4s32 = vmull_s16(d27s16, d14s16); + q0s32 = vmull_s16(d20s16, d14s16); + q1s32 = vmull_s16(d21s16, d14s16); + + q5s32 = vsubq_s32(q3s32, q0s32); + q6s32 = vsubq_s32(q4s32, q1s32); + q0s32 = vaddq_s32(q3s32, q0s32); + q4s32 = vaddq_s32(q4s32, q1s32); + + d4s16 = vqrshrn_n_s32(q5s32, 14); + d5s16 = vqrshrn_n_s32(q6s32, 14); + d10s16 = vqrshrn_n_s32(q0s32, 14); + d11s16 = vqrshrn_n_s32(q4s32, 14); + q2s16 = vcombine_s16(d4s16, d5s16); + q5s16 = vcombine_s16(d10s16, d11s16); + + q0s32 = vmull_s16(d22s16, d14s16); + q1s32 = vmull_s16(d23s16, d14s16); + q13s32 = vmull_s16(d24s16, d14s16); + q6s32 = vmull_s16(d25s16, d14s16); + + q10s32 = vsubq_s32(q13s32, q0s32); + q4s32 = vsubq_s32(q6s32, q1s32); + q13s32 = vaddq_s32(q13s32, q0s32); + q6s32 = vaddq_s32(q6s32, q1s32); + + d6s16 = vqrshrn_n_s32(q10s32, 14); + d7s16 = vqrshrn_n_s32(q4s32, 14); + d8s16 = vqrshrn_n_s32(q13s32, 14); + d9s16 = vqrshrn_n_s32(q6s32, 14); + q3s16 = vcombine_s16(d6s16, d7s16); + q4s16 = vcombine_s16(d8s16, d9s16); + + // stage 7 + q0s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q1s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q12s16 = vaddq_s16(q0s16, q15s16); + q13s16 = vaddq_s16(q1s16, q14s16); + d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); + d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); + d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); + d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); + vst1_u64((uint64_t *)out, d24u64); + out += 4; + vst1_u64((uint64_t *)out, d25u64); + out += 12; + vst1_u64((uint64_t *)out, d26u64); + out += 4; + vst1_u64((uint64_t *)out, d27u64); + out += 12; + q14s16 = vsubq_s16(q1s16, q14s16); + q15s16 = vsubq_s16(q0s16, q15s16); + + q10s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q11s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q12s16 = vaddq_s16(q10s16, q5s16); + q13s16 = vaddq_s16(q11s16, q4s16); + d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); + d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); + d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); + d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); + vst1_u64((uint64_t *)out, d24u64); + out += 4; + vst1_u64((uint64_t *)out, d25u64); + out += 12; + vst1_u64((uint64_t *)out, d26u64); + out += 4; + vst1_u64((uint64_t *)out, d27u64); + out += 12; + q4s16 = vsubq_s16(q11s16, q4s16); + q5s16 = vsubq_s16(q10s16, q5s16); + + q0s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q1s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q12s16 = vaddq_s16(q0s16, q3s16); + q13s16 = vaddq_s16(q1s16, q2s16); + d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); + d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); + d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); + d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); + vst1_u64((uint64_t *)out, d24u64); + out += 4; + vst1_u64((uint64_t *)out, d25u64); + out += 12; + vst1_u64((uint64_t *)out, d26u64); + out += 4; + vst1_u64((uint64_t *)out, d27u64); + out += 12; + q2s16 = vsubq_s16(q1s16, q2s16); + q3s16 = vsubq_s16(q0s16, q3s16); + + q10s16 = vld1q_s16(pass1Output); + pass1Output += 8; + q11s16 = vld1q_s16(pass1Output); + q12s16 = vaddq_s16(q10s16, q9s16); + q13s16 = vaddq_s16(q11s16, q8s16); + d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); + d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); + d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); + d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); + vst1_u64((uint64_t *)out, d24u64); + out += 4; + vst1_u64((uint64_t *)out, d25u64); + out += 12; + vst1_u64((uint64_t *)out, d26u64); + out += 4; + vst1_u64((uint64_t *)out, d27u64); + out += 12; + q8s16 = vsubq_s16(q11s16, q8s16); + q9s16 = vsubq_s16(q10s16, q9s16); + + d4u64 = vreinterpret_u64_s16(vget_low_s16(q2s16)); + d5u64 = vreinterpret_u64_s16(vget_high_s16(q2s16)); + d6u64 = vreinterpret_u64_s16(vget_low_s16(q3s16)); + d7u64 = vreinterpret_u64_s16(vget_high_s16(q3s16)); + d8u64 = vreinterpret_u64_s16(vget_low_s16(q4s16)); + d9u64 = vreinterpret_u64_s16(vget_high_s16(q4s16)); + d10u64 = vreinterpret_u64_s16(vget_low_s16(q5s16)); + d11u64 = vreinterpret_u64_s16(vget_high_s16(q5s16)); + d16u64 = vreinterpret_u64_s16(vget_low_s16(q8s16)); + d17u64 = vreinterpret_u64_s16(vget_high_s16(q8s16)); + d18u64 = vreinterpret_u64_s16(vget_low_s16(q9s16)); + d19u64 = vreinterpret_u64_s16(vget_high_s16(q9s16)); + d28u64 = vreinterpret_u64_s16(vget_low_s16(q14s16)); + d29u64 = vreinterpret_u64_s16(vget_high_s16(q14s16)); + d30u64 = vreinterpret_u64_s16(vget_low_s16(q15s16)); + d31u64 = vreinterpret_u64_s16(vget_high_s16(q15s16)); + + vst1_u64((uint64_t *)out, d16u64); + out += 4; + vst1_u64((uint64_t *)out, d17u64); + out += 12; + vst1_u64((uint64_t *)out, d18u64); + out += 4; + vst1_u64((uint64_t *)out, d19u64); + out += 12; + vst1_u64((uint64_t *)out, d4u64); + out += 4; + vst1_u64((uint64_t *)out, d5u64); + out += 12; + vst1_u64((uint64_t *)out, d6u64); + out += 4; + vst1_u64((uint64_t *)out, d7u64); + out += 12; + vst1_u64((uint64_t *)out, d8u64); + out += 4; + vst1_u64((uint64_t *)out, d9u64); + out += 12; + vst1_u64((uint64_t *)out, d10u64); + out += 4; + vst1_u64((uint64_t *)out, d11u64); + out += 12; + vst1_u64((uint64_t *)out, d28u64); + out += 4; + vst1_u64((uint64_t *)out, d29u64); + out += 12; + vst1_u64((uint64_t *)out, d30u64); + out += 4; + vst1_u64((uint64_t *)out, d31u64); + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c new file mode 100644 index 00000000000..352979aa16f --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_dsp/vpx_dsp_common.h" + +void vpx_idct16x16_256_add_neon_pass1(const int16_t *input, + int16_t *output, + int output_stride); +void vpx_idct16x16_256_add_neon_pass2(const int16_t *src, + int16_t *output, + int16_t *pass1Output, + int16_t skip_adding, + uint8_t *dest, + int dest_stride); +void vpx_idct16x16_10_add_neon_pass1(const int16_t *input, + int16_t *output, + int output_stride); +void vpx_idct16x16_10_add_neon_pass2(const int16_t *src, + int16_t *output, + int16_t *pass1Output, + int16_t skip_adding, + uint8_t *dest, + int dest_stride); + +#if HAVE_NEON_ASM +/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ +extern void vpx_push_neon(int64_t *store); +extern void vpx_pop_neon(int64_t *store); +#endif // HAVE_NEON_ASM + +void vpx_idct16x16_256_add_neon(const int16_t *input, + uint8_t *dest, int dest_stride) { +#if HAVE_NEON_ASM + int64_t store_reg[8]; +#endif + int16_t pass1_output[16*16] = {0}; + int16_t row_idct_output[16*16] = {0}; + +#if HAVE_NEON_ASM + // save d8-d15 register values. + vpx_push_neon(store_reg); +#endif + + /* Parallel idct on the upper 8 rows */ + // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the + // stage 6 result in pass1_output. + vpx_idct16x16_256_add_neon_pass1(input, pass1_output, 8); + + // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines + // with result in pass1(pass1_output) to calculate final result in stage 7 + // which will be saved into row_idct_output. + vpx_idct16x16_256_add_neon_pass2(input+1, + row_idct_output, + pass1_output, + 0, + dest, + dest_stride); + + /* Parallel idct on the lower 8 rows */ + // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the + // stage 6 result in pass1_output. + vpx_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8); + + // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines + // with result in pass1(pass1_output) to calculate final result in stage 7 + // which will be saved into row_idct_output. + vpx_idct16x16_256_add_neon_pass2(input+8*16+1, + row_idct_output+8, + pass1_output, + 0, + dest, + dest_stride); + + /* Parallel idct on the left 8 columns */ + // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the + // stage 6 result in pass1_output. + vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); + + // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines + // with result in pass1(pass1_output) to calculate final result in stage 7. + // Then add the result to the destination data. + vpx_idct16x16_256_add_neon_pass2(row_idct_output+1, + row_idct_output, + pass1_output, + 1, + dest, + dest_stride); + + /* Parallel idct on the right 8 columns */ + // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the + // stage 6 result in pass1_output. + vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); + + // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines + // with result in pass1(pass1_output) to calculate final result in stage 7. + // Then add the result to the destination data. + vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, + row_idct_output+8, + pass1_output, + 1, + dest+8, + dest_stride); + +#if HAVE_NEON_ASM + // restore d8-d15 register values. + vpx_pop_neon(store_reg); +#endif + + return; +} + +void vpx_idct16x16_10_add_neon(const int16_t *input, + uint8_t *dest, int dest_stride) { +#if HAVE_NEON_ASM + int64_t store_reg[8]; +#endif + int16_t pass1_output[16*16] = {0}; + int16_t row_idct_output[16*16] = {0}; + +#if HAVE_NEON_ASM + // save d8-d15 register values. + vpx_push_neon(store_reg); +#endif + + /* Parallel idct on the upper 8 rows */ + // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the + // stage 6 result in pass1_output. + vpx_idct16x16_10_add_neon_pass1(input, pass1_output, 8); + + // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines + // with result in pass1(pass1_output) to calculate final result in stage 7 + // which will be saved into row_idct_output. + vpx_idct16x16_10_add_neon_pass2(input+1, + row_idct_output, + pass1_output, + 0, + dest, + dest_stride); + + /* Skip Parallel idct on the lower 8 rows as they are all 0s */ + + /* Parallel idct on the left 8 columns */ + // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the + // stage 6 result in pass1_output. + vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); + + // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines + // with result in pass1(pass1_output) to calculate final result in stage 7. + // Then add the result to the destination data. + vpx_idct16x16_256_add_neon_pass2(row_idct_output+1, + row_idct_output, + pass1_output, + 1, + dest, + dest_stride); + + /* Parallel idct on the right 8 columns */ + // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the + // stage 6 result in pass1_output. + vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); + + // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines + // with result in pass1(pass1_output) to calculate final result in stage 7. + // Then add the result to the destination data. + vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, + row_idct_output+8, + pass1_output, + 1, + dest+8, + dest_stride); + +#if HAVE_NEON_ASM + // restore d8-d15 register values. + vpx_pop_neon(store_reg); +#endif + + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c new file mode 100644 index 00000000000..c25c0c4a5c2 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_config.h" + +#include "vpx_dsp/inv_txfm.h" +#include "vpx_ports/mem.h" + +static INLINE void LD_16x8( + uint8_t *d, + int d_stride, + uint8x16_t *q8u8, + uint8x16_t *q9u8, + uint8x16_t *q10u8, + uint8x16_t *q11u8, + uint8x16_t *q12u8, + uint8x16_t *q13u8, + uint8x16_t *q14u8, + uint8x16_t *q15u8) { + *q8u8 = vld1q_u8(d); + d += d_stride; + *q9u8 = vld1q_u8(d); + d += d_stride; + *q10u8 = vld1q_u8(d); + d += d_stride; + *q11u8 = vld1q_u8(d); + d += d_stride; + *q12u8 = vld1q_u8(d); + d += d_stride; + *q13u8 = vld1q_u8(d); + d += d_stride; + *q14u8 = vld1q_u8(d); + d += d_stride; + *q15u8 = vld1q_u8(d); + return; +} + +static INLINE void ADD_DIFF_16x8( + uint8x16_t qdiffu8, + uint8x16_t *q8u8, + uint8x16_t *q9u8, + uint8x16_t *q10u8, + uint8x16_t *q11u8, + uint8x16_t *q12u8, + uint8x16_t *q13u8, + uint8x16_t *q14u8, + uint8x16_t *q15u8) { + *q8u8 = vqaddq_u8(*q8u8, qdiffu8); + *q9u8 = vqaddq_u8(*q9u8, qdiffu8); + *q10u8 = vqaddq_u8(*q10u8, qdiffu8); + *q11u8 = vqaddq_u8(*q11u8, qdiffu8); + *q12u8 = vqaddq_u8(*q12u8, qdiffu8); + *q13u8 = vqaddq_u8(*q13u8, qdiffu8); + *q14u8 = vqaddq_u8(*q14u8, qdiffu8); + *q15u8 = vqaddq_u8(*q15u8, qdiffu8); + return; +} + +static INLINE void SUB_DIFF_16x8( + uint8x16_t qdiffu8, + uint8x16_t *q8u8, + uint8x16_t *q9u8, + uint8x16_t *q10u8, + uint8x16_t *q11u8, + uint8x16_t *q12u8, + uint8x16_t *q13u8, + uint8x16_t *q14u8, + uint8x16_t *q15u8) { + *q8u8 = vqsubq_u8(*q8u8, qdiffu8); + *q9u8 = vqsubq_u8(*q9u8, qdiffu8); + *q10u8 = vqsubq_u8(*q10u8, qdiffu8); + *q11u8 = vqsubq_u8(*q11u8, qdiffu8); + *q12u8 = vqsubq_u8(*q12u8, qdiffu8); + *q13u8 = vqsubq_u8(*q13u8, qdiffu8); + *q14u8 = vqsubq_u8(*q14u8, qdiffu8); + *q15u8 = vqsubq_u8(*q15u8, qdiffu8); + return; +} + +static INLINE void ST_16x8( + uint8_t *d, + int d_stride, + uint8x16_t *q8u8, + uint8x16_t *q9u8, + uint8x16_t *q10u8, + uint8x16_t *q11u8, + uint8x16_t *q12u8, + uint8x16_t *q13u8, + uint8x16_t *q14u8, + uint8x16_t *q15u8) { + vst1q_u8(d, *q8u8); + d += d_stride; + vst1q_u8(d, *q9u8); + d += d_stride; + vst1q_u8(d, *q10u8); + d += d_stride; + vst1q_u8(d, *q11u8); + d += d_stride; + vst1q_u8(d, *q12u8); + d += d_stride; + vst1q_u8(d, *q13u8); + d += d_stride; + vst1q_u8(d, *q14u8); + d += d_stride; + vst1q_u8(d, *q15u8); + return; +} + +void vpx_idct32x32_1_add_neon( + int16_t *input, + uint8_t *dest, + int dest_stride) { + uint8x16_t q0u8, q8u8, q9u8, q10u8, q11u8, q12u8, q13u8, q14u8, q15u8; + int i, j, dest_stride8; + uint8_t *d; + int16_t a1, cospi_16_64 = 11585; + int16_t out = dct_const_round_shift(input[0] * cospi_16_64); + + out = dct_const_round_shift(out * cospi_16_64); + a1 = ROUND_POWER_OF_TWO(out, 6); + + dest_stride8 = dest_stride * 8; + if (a1 >= 0) { // diff_positive_32_32 + a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1; + q0u8 = vdupq_n_u8(a1); + for (i = 0; i < 2; i++, dest += 16) { // diff_positive_32_32_loop + d = dest; + for (j = 0; j < 4; j++) { + LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, + &q12u8, &q13u8, &q14u8, &q15u8); + ADD_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, + &q12u8, &q13u8, &q14u8, &q15u8); + ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, + &q12u8, &q13u8, &q14u8, &q15u8); + d += dest_stride8; + } + } + } else { // diff_negative_32_32 + a1 = -a1; + a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1; + q0u8 = vdupq_n_u8(a1); + for (i = 0; i < 2; i++, dest += 16) { // diff_negative_32_32_loop + d = dest; + for (j = 0; j < 4; j++) { + LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, + &q12u8, &q13u8, &q14u8, &q15u8); + SUB_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, + &q12u8, &q13u8, &q14u8, &q15u8); + ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, + &q12u8, &q13u8, &q14u8, &q15u8); + d += dest_stride8; + } + } + } + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c new file mode 100644 index 00000000000..025437eb963 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c @@ -0,0 +1,719 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_config.h" +#include "vpx_dsp/txfm_common.h" + +#define LOAD_FROM_TRANSPOSED(prev, first, second) \ + q14s16 = vld1q_s16(trans_buf + first * 8); \ + q13s16 = vld1q_s16(trans_buf + second * 8); + +#define LOAD_FROM_OUTPUT(prev, first, second, qA, qB) \ + qA = vld1q_s16(out + first * 32); \ + qB = vld1q_s16(out + second * 32); + +#define STORE_IN_OUTPUT(prev, first, second, qA, qB) \ + vst1q_s16(out + first * 32, qA); \ + vst1q_s16(out + second * 32, qB); + +#define STORE_COMBINE_CENTER_RESULTS(r10, r9) \ + __STORE_COMBINE_CENTER_RESULTS(r10, r9, stride, \ + q6s16, q7s16, q8s16, q9s16); +static INLINE void __STORE_COMBINE_CENTER_RESULTS( + uint8_t *p1, + uint8_t *p2, + int stride, + int16x8_t q6s16, + int16x8_t q7s16, + int16x8_t q8s16, + int16x8_t q9s16) { + int16x4_t d8s16, d9s16, d10s16, d11s16; + + d8s16 = vld1_s16((int16_t *)p1); + p1 += stride; + d11s16 = vld1_s16((int16_t *)p2); + p2 -= stride; + d9s16 = vld1_s16((int16_t *)p1); + d10s16 = vld1_s16((int16_t *)p2); + + q7s16 = vrshrq_n_s16(q7s16, 6); + q8s16 = vrshrq_n_s16(q8s16, 6); + q9s16 = vrshrq_n_s16(q9s16, 6); + q6s16 = vrshrq_n_s16(q6s16, 6); + + q7s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q7s16), + vreinterpret_u8_s16(d9s16))); + q8s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q8s16), + vreinterpret_u8_s16(d10s16))); + q9s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q9s16), + vreinterpret_u8_s16(d11s16))); + q6s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q6s16), + vreinterpret_u8_s16(d8s16))); + + d9s16 = vreinterpret_s16_u8(vqmovun_s16(q7s16)); + d10s16 = vreinterpret_s16_u8(vqmovun_s16(q8s16)); + d11s16 = vreinterpret_s16_u8(vqmovun_s16(q9s16)); + d8s16 = vreinterpret_s16_u8(vqmovun_s16(q6s16)); + + vst1_s16((int16_t *)p1, d9s16); + p1 -= stride; + vst1_s16((int16_t *)p2, d10s16); + p2 += stride; + vst1_s16((int16_t *)p1, d8s16); + vst1_s16((int16_t *)p2, d11s16); + return; +} + +#define STORE_COMBINE_EXTREME_RESULTS(r7, r6); \ + __STORE_COMBINE_EXTREME_RESULTS(r7, r6, stride, \ + q4s16, q5s16, q6s16, q7s16); +static INLINE void __STORE_COMBINE_EXTREME_RESULTS( + uint8_t *p1, + uint8_t *p2, + int stride, + int16x8_t q4s16, + int16x8_t q5s16, + int16x8_t q6s16, + int16x8_t q7s16) { + int16x4_t d4s16, d5s16, d6s16, d7s16; + + d4s16 = vld1_s16((int16_t *)p1); + p1 += stride; + d7s16 = vld1_s16((int16_t *)p2); + p2 -= stride; + d5s16 = vld1_s16((int16_t *)p1); + d6s16 = vld1_s16((int16_t *)p2); + + q5s16 = vrshrq_n_s16(q5s16, 6); + q6s16 = vrshrq_n_s16(q6s16, 6); + q7s16 = vrshrq_n_s16(q7s16, 6); + q4s16 = vrshrq_n_s16(q4s16, 6); + + q5s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q5s16), + vreinterpret_u8_s16(d5s16))); + q6s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q6s16), + vreinterpret_u8_s16(d6s16))); + q7s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q7s16), + vreinterpret_u8_s16(d7s16))); + q4s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q4s16), + vreinterpret_u8_s16(d4s16))); + + d5s16 = vreinterpret_s16_u8(vqmovun_s16(q5s16)); + d6s16 = vreinterpret_s16_u8(vqmovun_s16(q6s16)); + d7s16 = vreinterpret_s16_u8(vqmovun_s16(q7s16)); + d4s16 = vreinterpret_s16_u8(vqmovun_s16(q4s16)); + + vst1_s16((int16_t *)p1, d5s16); + p1 -= stride; + vst1_s16((int16_t *)p2, d6s16); + p2 += stride; + vst1_s16((int16_t *)p2, d7s16); + vst1_s16((int16_t *)p1, d4s16); + return; +} + +#define DO_BUTTERFLY_STD(const_1, const_2, qA, qB) \ + DO_BUTTERFLY(q14s16, q13s16, const_1, const_2, qA, qB); +static INLINE void DO_BUTTERFLY( + int16x8_t q14s16, + int16x8_t q13s16, + int16_t first_const, + int16_t second_const, + int16x8_t *qAs16, + int16x8_t *qBs16) { + int16x4_t d30s16, d31s16; + int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q15s32; + int16x4_t dCs16, dDs16, dAs16, dBs16; + + dCs16 = vget_low_s16(q14s16); + dDs16 = vget_high_s16(q14s16); + dAs16 = vget_low_s16(q13s16); + dBs16 = vget_high_s16(q13s16); + + d30s16 = vdup_n_s16(first_const); + d31s16 = vdup_n_s16(second_const); + + q8s32 = vmull_s16(dCs16, d30s16); + q10s32 = vmull_s16(dAs16, d31s16); + q9s32 = vmull_s16(dDs16, d30s16); + q11s32 = vmull_s16(dBs16, d31s16); + q12s32 = vmull_s16(dCs16, d31s16); + + q8s32 = vsubq_s32(q8s32, q10s32); + q9s32 = vsubq_s32(q9s32, q11s32); + + q10s32 = vmull_s16(dDs16, d31s16); + q11s32 = vmull_s16(dAs16, d30s16); + q15s32 = vmull_s16(dBs16, d30s16); + + q11s32 = vaddq_s32(q12s32, q11s32); + q10s32 = vaddq_s32(q10s32, q15s32); + + *qAs16 = vcombine_s16(vqrshrn_n_s32(q8s32, 14), + vqrshrn_n_s32(q9s32, 14)); + *qBs16 = vcombine_s16(vqrshrn_n_s32(q11s32, 14), + vqrshrn_n_s32(q10s32, 14)); + return; +} + +static INLINE void idct32_transpose_pair( + int16_t *input, + int16_t *t_buf) { + int16_t *in; + int i; + const int stride = 32; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; + int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; + int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; + + for (i = 0; i < 4; i++, input += 8) { + in = input; + q8s16 = vld1q_s16(in); + in += stride; + q9s16 = vld1q_s16(in); + in += stride; + q10s16 = vld1q_s16(in); + in += stride; + q11s16 = vld1q_s16(in); + in += stride; + q12s16 = vld1q_s16(in); + in += stride; + q13s16 = vld1q_s16(in); + in += stride; + q14s16 = vld1q_s16(in); + in += stride; + q15s16 = vld1q_s16(in); + + d16s16 = vget_low_s16(q8s16); + d17s16 = vget_high_s16(q8s16); + d18s16 = vget_low_s16(q9s16); + d19s16 = vget_high_s16(q9s16); + d20s16 = vget_low_s16(q10s16); + d21s16 = vget_high_s16(q10s16); + d22s16 = vget_low_s16(q11s16); + d23s16 = vget_high_s16(q11s16); + d24s16 = vget_low_s16(q12s16); + d25s16 = vget_high_s16(q12s16); + d26s16 = vget_low_s16(q13s16); + d27s16 = vget_high_s16(q13s16); + d28s16 = vget_low_s16(q14s16); + d29s16 = vget_high_s16(q14s16); + d30s16 = vget_low_s16(q15s16); + d31s16 = vget_high_s16(q15s16); + + q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 + q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 + q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 + q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 + q12s16 = vcombine_s16(d17s16, d25s16); + q13s16 = vcombine_s16(d19s16, d27s16); + q14s16 = vcombine_s16(d21s16, d29s16); + q15s16 = vcombine_s16(d23s16, d31s16); + + q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q8s16), + vreinterpretq_s32_s16(q10s16)); + q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q9s16), + vreinterpretq_s32_s16(q11s16)); + q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q12s16), + vreinterpretq_s32_s16(q14s16)); + q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q13s16), + vreinterpretq_s32_s16(q15s16)); + + q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 + vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 + q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 + vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 + q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 + vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 + q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 + vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 + + vst1q_s16(t_buf, q0x2s16.val[0]); + t_buf += 8; + vst1q_s16(t_buf, q0x2s16.val[1]); + t_buf += 8; + vst1q_s16(t_buf, q1x2s16.val[0]); + t_buf += 8; + vst1q_s16(t_buf, q1x2s16.val[1]); + t_buf += 8; + vst1q_s16(t_buf, q2x2s16.val[0]); + t_buf += 8; + vst1q_s16(t_buf, q2x2s16.val[1]); + t_buf += 8; + vst1q_s16(t_buf, q3x2s16.val[0]); + t_buf += 8; + vst1q_s16(t_buf, q3x2s16.val[1]); + t_buf += 8; + } + return; +} + +static INLINE void idct32_bands_end_1st_pass( + int16_t *out, + int16x8_t q2s16, + int16x8_t q3s16, + int16x8_t q6s16, + int16x8_t q7s16, + int16x8_t q8s16, + int16x8_t q9s16, + int16x8_t q10s16, + int16x8_t q11s16, + int16x8_t q12s16, + int16x8_t q13s16, + int16x8_t q14s16, + int16x8_t q15s16) { + int16x8_t q0s16, q1s16, q4s16, q5s16; + + STORE_IN_OUTPUT(17, 16, 17, q6s16, q7s16); + STORE_IN_OUTPUT(17, 14, 15, q8s16, q9s16); + + LOAD_FROM_OUTPUT(15, 30, 31, q0s16, q1s16); + q4s16 = vaddq_s16(q2s16, q1s16); + q5s16 = vaddq_s16(q3s16, q0s16); + q6s16 = vsubq_s16(q3s16, q0s16); + q7s16 = vsubq_s16(q2s16, q1s16); + STORE_IN_OUTPUT(31, 30, 31, q6s16, q7s16); + STORE_IN_OUTPUT(31, 0, 1, q4s16, q5s16); + + LOAD_FROM_OUTPUT(1, 12, 13, q0s16, q1s16); + q2s16 = vaddq_s16(q10s16, q1s16); + q3s16 = vaddq_s16(q11s16, q0s16); + q4s16 = vsubq_s16(q11s16, q0s16); + q5s16 = vsubq_s16(q10s16, q1s16); + + LOAD_FROM_OUTPUT(13, 18, 19, q0s16, q1s16); + q8s16 = vaddq_s16(q4s16, q1s16); + q9s16 = vaddq_s16(q5s16, q0s16); + q6s16 = vsubq_s16(q5s16, q0s16); + q7s16 = vsubq_s16(q4s16, q1s16); + STORE_IN_OUTPUT(19, 18, 19, q6s16, q7s16); + STORE_IN_OUTPUT(19, 12, 13, q8s16, q9s16); + + LOAD_FROM_OUTPUT(13, 28, 29, q0s16, q1s16); + q4s16 = vaddq_s16(q2s16, q1s16); + q5s16 = vaddq_s16(q3s16, q0s16); + q6s16 = vsubq_s16(q3s16, q0s16); + q7s16 = vsubq_s16(q2s16, q1s16); + STORE_IN_OUTPUT(29, 28, 29, q6s16, q7s16); + STORE_IN_OUTPUT(29, 2, 3, q4s16, q5s16); + + LOAD_FROM_OUTPUT(3, 10, 11, q0s16, q1s16); + q2s16 = vaddq_s16(q12s16, q1s16); + q3s16 = vaddq_s16(q13s16, q0s16); + q4s16 = vsubq_s16(q13s16, q0s16); + q5s16 = vsubq_s16(q12s16, q1s16); + + LOAD_FROM_OUTPUT(11, 20, 21, q0s16, q1s16); + q8s16 = vaddq_s16(q4s16, q1s16); + q9s16 = vaddq_s16(q5s16, q0s16); + q6s16 = vsubq_s16(q5s16, q0s16); + q7s16 = vsubq_s16(q4s16, q1s16); + STORE_IN_OUTPUT(21, 20, 21, q6s16, q7s16); + STORE_IN_OUTPUT(21, 10, 11, q8s16, q9s16); + + LOAD_FROM_OUTPUT(11, 26, 27, q0s16, q1s16); + q4s16 = vaddq_s16(q2s16, q1s16); + q5s16 = vaddq_s16(q3s16, q0s16); + q6s16 = vsubq_s16(q3s16, q0s16); + q7s16 = vsubq_s16(q2s16, q1s16); + STORE_IN_OUTPUT(27, 26, 27, q6s16, q7s16); + STORE_IN_OUTPUT(27, 4, 5, q4s16, q5s16); + + LOAD_FROM_OUTPUT(5, 8, 9, q0s16, q1s16); + q2s16 = vaddq_s16(q14s16, q1s16); + q3s16 = vaddq_s16(q15s16, q0s16); + q4s16 = vsubq_s16(q15s16, q0s16); + q5s16 = vsubq_s16(q14s16, q1s16); + + LOAD_FROM_OUTPUT(9, 22, 23, q0s16, q1s16); + q8s16 = vaddq_s16(q4s16, q1s16); + q9s16 = vaddq_s16(q5s16, q0s16); + q6s16 = vsubq_s16(q5s16, q0s16); + q7s16 = vsubq_s16(q4s16, q1s16); + STORE_IN_OUTPUT(23, 22, 23, q6s16, q7s16); + STORE_IN_OUTPUT(23, 8, 9, q8s16, q9s16); + + LOAD_FROM_OUTPUT(9, 24, 25, q0s16, q1s16); + q4s16 = vaddq_s16(q2s16, q1s16); + q5s16 = vaddq_s16(q3s16, q0s16); + q6s16 = vsubq_s16(q3s16, q0s16); + q7s16 = vsubq_s16(q2s16, q1s16); + STORE_IN_OUTPUT(25, 24, 25, q6s16, q7s16); + STORE_IN_OUTPUT(25, 6, 7, q4s16, q5s16); + return; +} + +static INLINE void idct32_bands_end_2nd_pass( + int16_t *out, + uint8_t *dest, + int stride, + int16x8_t q2s16, + int16x8_t q3s16, + int16x8_t q6s16, + int16x8_t q7s16, + int16x8_t q8s16, + int16x8_t q9s16, + int16x8_t q10s16, + int16x8_t q11s16, + int16x8_t q12s16, + int16x8_t q13s16, + int16x8_t q14s16, + int16x8_t q15s16) { + uint8_t *r6 = dest + 31 * stride; + uint8_t *r7 = dest/* + 0 * stride*/; + uint8_t *r9 = dest + 15 * stride; + uint8_t *r10 = dest + 16 * stride; + int str2 = stride << 1; + int16x8_t q0s16, q1s16, q4s16, q5s16; + + STORE_COMBINE_CENTER_RESULTS(r10, r9); + r10 += str2; r9 -= str2; + + LOAD_FROM_OUTPUT(17, 30, 31, q0s16, q1s16) + q4s16 = vaddq_s16(q2s16, q1s16); + q5s16 = vaddq_s16(q3s16, q0s16); + q6s16 = vsubq_s16(q3s16, q0s16); + q7s16 = vsubq_s16(q2s16, q1s16); + STORE_COMBINE_EXTREME_RESULTS(r7, r6); + r7 += str2; r6 -= str2; + + LOAD_FROM_OUTPUT(31, 12, 13, q0s16, q1s16) + q2s16 = vaddq_s16(q10s16, q1s16); + q3s16 = vaddq_s16(q11s16, q0s16); + q4s16 = vsubq_s16(q11s16, q0s16); + q5s16 = vsubq_s16(q10s16, q1s16); + + LOAD_FROM_OUTPUT(13, 18, 19, q0s16, q1s16) + q8s16 = vaddq_s16(q4s16, q1s16); + q9s16 = vaddq_s16(q5s16, q0s16); + q6s16 = vsubq_s16(q5s16, q0s16); + q7s16 = vsubq_s16(q4s16, q1s16); + STORE_COMBINE_CENTER_RESULTS(r10, r9); + r10 += str2; r9 -= str2; + + LOAD_FROM_OUTPUT(19, 28, 29, q0s16, q1s16) + q4s16 = vaddq_s16(q2s16, q1s16); + q5s16 = vaddq_s16(q3s16, q0s16); + q6s16 = vsubq_s16(q3s16, q0s16); + q7s16 = vsubq_s16(q2s16, q1s16); + STORE_COMBINE_EXTREME_RESULTS(r7, r6); + r7 += str2; r6 -= str2; + + LOAD_FROM_OUTPUT(29, 10, 11, q0s16, q1s16) + q2s16 = vaddq_s16(q12s16, q1s16); + q3s16 = vaddq_s16(q13s16, q0s16); + q4s16 = vsubq_s16(q13s16, q0s16); + q5s16 = vsubq_s16(q12s16, q1s16); + + LOAD_FROM_OUTPUT(11, 20, 21, q0s16, q1s16) + q8s16 = vaddq_s16(q4s16, q1s16); + q9s16 = vaddq_s16(q5s16, q0s16); + q6s16 = vsubq_s16(q5s16, q0s16); + q7s16 = vsubq_s16(q4s16, q1s16); + STORE_COMBINE_CENTER_RESULTS(r10, r9); + r10 += str2; r9 -= str2; + + LOAD_FROM_OUTPUT(21, 26, 27, q0s16, q1s16) + q4s16 = vaddq_s16(q2s16, q1s16); + q5s16 = vaddq_s16(q3s16, q0s16); + q6s16 = vsubq_s16(q3s16, q0s16); + q7s16 = vsubq_s16(q2s16, q1s16); + STORE_COMBINE_EXTREME_RESULTS(r7, r6); + r7 += str2; r6 -= str2; + + LOAD_FROM_OUTPUT(27, 8, 9, q0s16, q1s16) + q2s16 = vaddq_s16(q14s16, q1s16); + q3s16 = vaddq_s16(q15s16, q0s16); + q4s16 = vsubq_s16(q15s16, q0s16); + q5s16 = vsubq_s16(q14s16, q1s16); + + LOAD_FROM_OUTPUT(9, 22, 23, q0s16, q1s16) + q8s16 = vaddq_s16(q4s16, q1s16); + q9s16 = vaddq_s16(q5s16, q0s16); + q6s16 = vsubq_s16(q5s16, q0s16); + q7s16 = vsubq_s16(q4s16, q1s16); + STORE_COMBINE_CENTER_RESULTS(r10, r9); + + LOAD_FROM_OUTPUT(23, 24, 25, q0s16, q1s16) + q4s16 = vaddq_s16(q2s16, q1s16); + q5s16 = vaddq_s16(q3s16, q0s16); + q6s16 = vsubq_s16(q3s16, q0s16); + q7s16 = vsubq_s16(q2s16, q1s16); + STORE_COMBINE_EXTREME_RESULTS(r7, r6); + return; +} + +void vpx_idct32x32_1024_add_neon( + int16_t *input, + uint8_t *dest, + int stride) { + int i, idct32_pass_loop; + int16_t trans_buf[32 * 8]; + int16_t pass1[32 * 32]; + int16_t pass2[32 * 32]; + int16_t *out; + int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; + + for (idct32_pass_loop = 0, out = pass1; + idct32_pass_loop < 2; + idct32_pass_loop++, + input = pass1, // the input of pass2 is the result of pass1 + out = pass2) { + for (i = 0; + i < 4; i++, + input += 32 * 8, out += 8) { // idct32_bands_loop + idct32_transpose_pair(input, trans_buf); + + // ----------------------------------------- + // BLOCK A: 16-19,28-31 + // ----------------------------------------- + // generate 16,17,30,31 + // part of stage 1 + LOAD_FROM_TRANSPOSED(0, 1, 31) + DO_BUTTERFLY_STD(cospi_31_64, cospi_1_64, &q0s16, &q2s16) + LOAD_FROM_TRANSPOSED(31, 17, 15) + DO_BUTTERFLY_STD(cospi_15_64, cospi_17_64, &q1s16, &q3s16) + // part of stage 2 + q4s16 = vaddq_s16(q0s16, q1s16); + q13s16 = vsubq_s16(q0s16, q1s16); + q6s16 = vaddq_s16(q2s16, q3s16); + q14s16 = vsubq_s16(q2s16, q3s16); + // part of stage 3 + DO_BUTTERFLY_STD(cospi_28_64, cospi_4_64, &q5s16, &q7s16) + + // generate 18,19,28,29 + // part of stage 1 + LOAD_FROM_TRANSPOSED(15, 9, 23) + DO_BUTTERFLY_STD(cospi_23_64, cospi_9_64, &q0s16, &q2s16) + LOAD_FROM_TRANSPOSED(23, 25, 7) + DO_BUTTERFLY_STD(cospi_7_64, cospi_25_64, &q1s16, &q3s16) + // part of stage 2 + q13s16 = vsubq_s16(q3s16, q2s16); + q3s16 = vaddq_s16(q3s16, q2s16); + q14s16 = vsubq_s16(q1s16, q0s16); + q2s16 = vaddq_s16(q1s16, q0s16); + // part of stage 3 + DO_BUTTERFLY_STD(-cospi_4_64, -cospi_28_64, &q1s16, &q0s16) + // part of stage 4 + q8s16 = vaddq_s16(q4s16, q2s16); + q9s16 = vaddq_s16(q5s16, q0s16); + q10s16 = vaddq_s16(q7s16, q1s16); + q15s16 = vaddq_s16(q6s16, q3s16); + q13s16 = vsubq_s16(q5s16, q0s16); + q14s16 = vsubq_s16(q7s16, q1s16); + STORE_IN_OUTPUT(0, 16, 31, q8s16, q15s16) + STORE_IN_OUTPUT(31, 17, 30, q9s16, q10s16) + // part of stage 5 + DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q0s16, &q1s16) + STORE_IN_OUTPUT(30, 29, 18, q1s16, q0s16) + // part of stage 4 + q13s16 = vsubq_s16(q4s16, q2s16); + q14s16 = vsubq_s16(q6s16, q3s16); + // part of stage 5 + DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q4s16, &q6s16) + STORE_IN_OUTPUT(18, 19, 28, q4s16, q6s16) + + // ----------------------------------------- + // BLOCK B: 20-23,24-27 + // ----------------------------------------- + // generate 20,21,26,27 + // part of stage 1 + LOAD_FROM_TRANSPOSED(7, 5, 27) + DO_BUTTERFLY_STD(cospi_27_64, cospi_5_64, &q0s16, &q2s16) + LOAD_FROM_TRANSPOSED(27, 21, 11) + DO_BUTTERFLY_STD(cospi_11_64, cospi_21_64, &q1s16, &q3s16) + // part of stage 2 + q13s16 = vsubq_s16(q0s16, q1s16); + q0s16 = vaddq_s16(q0s16, q1s16); + q14s16 = vsubq_s16(q2s16, q3s16); + q2s16 = vaddq_s16(q2s16, q3s16); + // part of stage 3 + DO_BUTTERFLY_STD(cospi_12_64, cospi_20_64, &q1s16, &q3s16) + + // generate 22,23,24,25 + // part of stage 1 + LOAD_FROM_TRANSPOSED(11, 13, 19) + DO_BUTTERFLY_STD(cospi_19_64, cospi_13_64, &q5s16, &q7s16) + LOAD_FROM_TRANSPOSED(19, 29, 3) + DO_BUTTERFLY_STD(cospi_3_64, cospi_29_64, &q4s16, &q6s16) + // part of stage 2 + q14s16 = vsubq_s16(q4s16, q5s16); + q5s16 = vaddq_s16(q4s16, q5s16); + q13s16 = vsubq_s16(q6s16, q7s16); + q6s16 = vaddq_s16(q6s16, q7s16); + // part of stage 3 + DO_BUTTERFLY_STD(-cospi_20_64, -cospi_12_64, &q4s16, &q7s16) + // part of stage 4 + q10s16 = vaddq_s16(q7s16, q1s16); + q11s16 = vaddq_s16(q5s16, q0s16); + q12s16 = vaddq_s16(q6s16, q2s16); + q15s16 = vaddq_s16(q4s16, q3s16); + // part of stage 6 + LOAD_FROM_OUTPUT(28, 16, 17, q14s16, q13s16) + q8s16 = vaddq_s16(q14s16, q11s16); + q9s16 = vaddq_s16(q13s16, q10s16); + q13s16 = vsubq_s16(q13s16, q10s16); + q11s16 = vsubq_s16(q14s16, q11s16); + STORE_IN_OUTPUT(17, 17, 16, q9s16, q8s16) + LOAD_FROM_OUTPUT(16, 30, 31, q14s16, q9s16) + q8s16 = vsubq_s16(q9s16, q12s16); + q10s16 = vaddq_s16(q14s16, q15s16); + q14s16 = vsubq_s16(q14s16, q15s16); + q12s16 = vaddq_s16(q9s16, q12s16); + STORE_IN_OUTPUT(31, 30, 31, q10s16, q12s16) + // part of stage 7 + DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16) + STORE_IN_OUTPUT(31, 25, 22, q14s16, q13s16) + q13s16 = q11s16; + q14s16 = q8s16; + DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16) + STORE_IN_OUTPUT(22, 24, 23, q14s16, q13s16) + // part of stage 4 + q14s16 = vsubq_s16(q5s16, q0s16); + q13s16 = vsubq_s16(q6s16, q2s16); + DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q5s16, &q6s16); + q14s16 = vsubq_s16(q7s16, q1s16); + q13s16 = vsubq_s16(q4s16, q3s16); + DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q0s16, &q1s16); + // part of stage 6 + LOAD_FROM_OUTPUT(23, 18, 19, q14s16, q13s16) + q8s16 = vaddq_s16(q14s16, q1s16); + q9s16 = vaddq_s16(q13s16, q6s16); + q13s16 = vsubq_s16(q13s16, q6s16); + q1s16 = vsubq_s16(q14s16, q1s16); + STORE_IN_OUTPUT(19, 18, 19, q8s16, q9s16) + LOAD_FROM_OUTPUT(19, 28, 29, q8s16, q9s16) + q14s16 = vsubq_s16(q8s16, q5s16); + q10s16 = vaddq_s16(q8s16, q5s16); + q11s16 = vaddq_s16(q9s16, q0s16); + q0s16 = vsubq_s16(q9s16, q0s16); + STORE_IN_OUTPUT(29, 28, 29, q10s16, q11s16) + // part of stage 7 + DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16) + STORE_IN_OUTPUT(29, 20, 27, q13s16, q14s16) + DO_BUTTERFLY(q0s16, q1s16, cospi_16_64, cospi_16_64, + &q1s16, &q0s16); + STORE_IN_OUTPUT(27, 21, 26, q1s16, q0s16) + + // ----------------------------------------- + // BLOCK C: 8-10,11-15 + // ----------------------------------------- + // generate 8,9,14,15 + // part of stage 2 + LOAD_FROM_TRANSPOSED(3, 2, 30) + DO_BUTTERFLY_STD(cospi_30_64, cospi_2_64, &q0s16, &q2s16) + LOAD_FROM_TRANSPOSED(30, 18, 14) + DO_BUTTERFLY_STD(cospi_14_64, cospi_18_64, &q1s16, &q3s16) + // part of stage 3 + q13s16 = vsubq_s16(q0s16, q1s16); + q0s16 = vaddq_s16(q0s16, q1s16); + q14s16 = vsubq_s16(q2s16, q3s16); + q2s16 = vaddq_s16(q2s16, q3s16); + // part of stage 4 + DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q1s16, &q3s16) + + // generate 10,11,12,13 + // part of stage 2 + LOAD_FROM_TRANSPOSED(14, 10, 22) + DO_BUTTERFLY_STD(cospi_22_64, cospi_10_64, &q5s16, &q7s16) + LOAD_FROM_TRANSPOSED(22, 26, 6) + DO_BUTTERFLY_STD(cospi_6_64, cospi_26_64, &q4s16, &q6s16) + // part of stage 3 + q14s16 = vsubq_s16(q4s16, q5s16); + q5s16 = vaddq_s16(q4s16, q5s16); + q13s16 = vsubq_s16(q6s16, q7s16); + q6s16 = vaddq_s16(q6s16, q7s16); + // part of stage 4 + DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q4s16, &q7s16) + // part of stage 5 + q8s16 = vaddq_s16(q0s16, q5s16); + q9s16 = vaddq_s16(q1s16, q7s16); + q13s16 = vsubq_s16(q1s16, q7s16); + q14s16 = vsubq_s16(q3s16, q4s16); + q10s16 = vaddq_s16(q3s16, q4s16); + q15s16 = vaddq_s16(q2s16, q6s16); + STORE_IN_OUTPUT(26, 8, 15, q8s16, q15s16) + STORE_IN_OUTPUT(15, 9, 14, q9s16, q10s16) + // part of stage 6 + DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16) + STORE_IN_OUTPUT(14, 13, 10, q3s16, q1s16) + q13s16 = vsubq_s16(q0s16, q5s16); + q14s16 = vsubq_s16(q2s16, q6s16); + DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16) + STORE_IN_OUTPUT(10, 11, 12, q1s16, q3s16) + + // ----------------------------------------- + // BLOCK D: 0-3,4-7 + // ----------------------------------------- + // generate 4,5,6,7 + // part of stage 3 + LOAD_FROM_TRANSPOSED(6, 4, 28) + DO_BUTTERFLY_STD(cospi_28_64, cospi_4_64, &q0s16, &q2s16) + LOAD_FROM_TRANSPOSED(28, 20, 12) + DO_BUTTERFLY_STD(cospi_12_64, cospi_20_64, &q1s16, &q3s16) + // part of stage 4 + q13s16 = vsubq_s16(q0s16, q1s16); + q0s16 = vaddq_s16(q0s16, q1s16); + q14s16 = vsubq_s16(q2s16, q3s16); + q2s16 = vaddq_s16(q2s16, q3s16); + // part of stage 5 + DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16) + + // generate 0,1,2,3 + // part of stage 4 + LOAD_FROM_TRANSPOSED(12, 0, 16) + DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q5s16, &q7s16) + LOAD_FROM_TRANSPOSED(16, 8, 24) + DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q14s16, &q6s16) + // part of stage 5 + q4s16 = vaddq_s16(q7s16, q6s16); + q7s16 = vsubq_s16(q7s16, q6s16); + q6s16 = vsubq_s16(q5s16, q14s16); + q5s16 = vaddq_s16(q5s16, q14s16); + // part of stage 6 + q8s16 = vaddq_s16(q4s16, q2s16); + q9s16 = vaddq_s16(q5s16, q3s16); + q10s16 = vaddq_s16(q6s16, q1s16); + q11s16 = vaddq_s16(q7s16, q0s16); + q12s16 = vsubq_s16(q7s16, q0s16); + q13s16 = vsubq_s16(q6s16, q1s16); + q14s16 = vsubq_s16(q5s16, q3s16); + q15s16 = vsubq_s16(q4s16, q2s16); + // part of stage 7 + LOAD_FROM_OUTPUT(12, 14, 15, q0s16, q1s16) + q2s16 = vaddq_s16(q8s16, q1s16); + q3s16 = vaddq_s16(q9s16, q0s16); + q4s16 = vsubq_s16(q9s16, q0s16); + q5s16 = vsubq_s16(q8s16, q1s16); + LOAD_FROM_OUTPUT(15, 16, 17, q0s16, q1s16) + q8s16 = vaddq_s16(q4s16, q1s16); + q9s16 = vaddq_s16(q5s16, q0s16); + q6s16 = vsubq_s16(q5s16, q0s16); + q7s16 = vsubq_s16(q4s16, q1s16); + + if (idct32_pass_loop == 0) { + idct32_bands_end_1st_pass(out, + q2s16, q3s16, q6s16, q7s16, q8s16, q9s16, + q10s16, q11s16, q12s16, q13s16, q14s16, q15s16); + } else { + idct32_bands_end_2nd_pass(out, dest, stride, + q2s16, q3s16, q6s16, q7s16, q8s16, q9s16, + q10s16, q11s16, q12s16, q13s16, q14s16, q15s16); + dest += 8; + } + } + } + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c new file mode 100644 index 00000000000..ea618700c95 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vpx_dsp/inv_txfm.h" +#include "vpx_ports/mem.h" + +void vpx_idct4x4_1_add_neon( + int16_t *input, + uint8_t *dest, + int dest_stride) { + uint8x8_t d6u8; + uint32x2_t d2u32 = vdup_n_u32(0); + uint16x8_t q8u16; + int16x8_t q0s16; + uint8_t *d1, *d2; + int16_t i, a1, cospi_16_64 = 11585; + int16_t out = dct_const_round_shift(input[0] * cospi_16_64); + out = dct_const_round_shift(out * cospi_16_64); + a1 = ROUND_POWER_OF_TWO(out, 4); + + q0s16 = vdupq_n_s16(a1); + + // dc_only_idct_add + d1 = d2 = dest; + for (i = 0; i < 2; i++) { + d2u32 = vld1_lane_u32((const uint32_t *)d1, d2u32, 0); + d1 += dest_stride; + d2u32 = vld1_lane_u32((const uint32_t *)d1, d2u32, 1); + d1 += dest_stride; + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q0s16), + vreinterpret_u8_u32(d2u32)); + d6u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + + vst1_lane_u32((uint32_t *)d2, vreinterpret_u32_u8(d6u8), 0); + d2 += dest_stride; + vst1_lane_u32((uint32_t *)d2, vreinterpret_u32_u8(d6u8), 1); + d2 += dest_stride; + } + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c new file mode 100644 index 00000000000..3c975c99b77 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +void vpx_idct4x4_16_add_neon( + int16_t *input, + uint8_t *dest, + int dest_stride) { + uint8x8_t d26u8, d27u8; + uint32x2_t d26u32, d27u32; + uint16x8_t q8u16, q9u16; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16; + int16x4_t d22s16, d23s16, d24s16, d26s16, d27s16, d28s16, d29s16; + int16x8_t q8s16, q9s16, q13s16, q14s16; + int32x4_t q1s32, q13s32, q14s32, q15s32; + int16x4x2_t d0x2s16, d1x2s16; + int32x4x2_t q0x2s32; + uint8_t *d; + int16_t cospi_8_64 = 15137; + int16_t cospi_16_64 = 11585; + int16_t cospi_24_64 = 6270; + + d26u32 = d27u32 = vdup_n_u32(0); + + q8s16 = vld1q_s16(input); + q9s16 = vld1q_s16(input + 8); + + d16s16 = vget_low_s16(q8s16); + d17s16 = vget_high_s16(q8s16); + d18s16 = vget_low_s16(q9s16); + d19s16 = vget_high_s16(q9s16); + + d0x2s16 = vtrn_s16(d16s16, d17s16); + d1x2s16 = vtrn_s16(d18s16, d19s16); + q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]); + q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]); + + d20s16 = vdup_n_s16(cospi_8_64); + d21s16 = vdup_n_s16(cospi_16_64); + + q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q8s16), + vreinterpretq_s32_s16(q9s16)); + d16s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); + d17s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); + d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); + d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); + + d22s16 = vdup_n_s16(cospi_24_64); + + // stage 1 + d23s16 = vadd_s16(d16s16, d18s16); + d24s16 = vsub_s16(d16s16, d18s16); + + q15s32 = vmull_s16(d17s16, d22s16); + q1s32 = vmull_s16(d17s16, d20s16); + q13s32 = vmull_s16(d23s16, d21s16); + q14s32 = vmull_s16(d24s16, d21s16); + + q15s32 = vmlsl_s16(q15s32, d19s16, d20s16); + q1s32 = vmlal_s16(q1s32, d19s16, d22s16); + + d26s16 = vqrshrn_n_s32(q13s32, 14); + d27s16 = vqrshrn_n_s32(q14s32, 14); + d29s16 = vqrshrn_n_s32(q15s32, 14); + d28s16 = vqrshrn_n_s32(q1s32, 14); + q13s16 = vcombine_s16(d26s16, d27s16); + q14s16 = vcombine_s16(d28s16, d29s16); + + // stage 2 + q8s16 = vaddq_s16(q13s16, q14s16); + q9s16 = vsubq_s16(q13s16, q14s16); + + d16s16 = vget_low_s16(q8s16); + d17s16 = vget_high_s16(q8s16); + d18s16 = vget_high_s16(q9s16); // vswp d18 d19 + d19s16 = vget_low_s16(q9s16); + + d0x2s16 = vtrn_s16(d16s16, d17s16); + d1x2s16 = vtrn_s16(d18s16, d19s16); + q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]); + q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]); + + q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q8s16), + vreinterpretq_s32_s16(q9s16)); + d16s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); + d17s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); + d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); + d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); + + // do the transform on columns + // stage 1 + d23s16 = vadd_s16(d16s16, d18s16); + d24s16 = vsub_s16(d16s16, d18s16); + + q15s32 = vmull_s16(d17s16, d22s16); + q1s32 = vmull_s16(d17s16, d20s16); + q13s32 = vmull_s16(d23s16, d21s16); + q14s32 = vmull_s16(d24s16, d21s16); + + q15s32 = vmlsl_s16(q15s32, d19s16, d20s16); + q1s32 = vmlal_s16(q1s32, d19s16, d22s16); + + d26s16 = vqrshrn_n_s32(q13s32, 14); + d27s16 = vqrshrn_n_s32(q14s32, 14); + d29s16 = vqrshrn_n_s32(q15s32, 14); + d28s16 = vqrshrn_n_s32(q1s32, 14); + q13s16 = vcombine_s16(d26s16, d27s16); + q14s16 = vcombine_s16(d28s16, d29s16); + + // stage 2 + q8s16 = vaddq_s16(q13s16, q14s16); + q9s16 = vsubq_s16(q13s16, q14s16); + + q8s16 = vrshrq_n_s16(q8s16, 4); + q9s16 = vrshrq_n_s16(q9s16, 4); + + d = dest; + d26u32 = vld1_lane_u32((const uint32_t *)d, d26u32, 0); + d += dest_stride; + d26u32 = vld1_lane_u32((const uint32_t *)d, d26u32, 1); + d += dest_stride; + d27u32 = vld1_lane_u32((const uint32_t *)d, d27u32, 1); + d += dest_stride; + d27u32 = vld1_lane_u32((const uint32_t *)d, d27u32, 0); + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), + vreinterpret_u8_u32(d26u32)); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), + vreinterpret_u8_u32(d27u32)); + + d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + + d = dest; + vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d26u8), 0); + d += dest_stride; + vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d26u8), 1); + d += dest_stride; + vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d27u8), 1); + d += dest_stride; + vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d27u8), 0); + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c new file mode 100644 index 00000000000..c1b801fad54 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vpx_dsp/inv_txfm.h" +#include "vpx_ports/mem.h" + +void vpx_idct8x8_1_add_neon( + int16_t *input, + uint8_t *dest, + int dest_stride) { + uint8x8_t d2u8, d3u8, d30u8, d31u8; + uint64x1_t d2u64, d3u64, d4u64, d5u64; + uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16; + int16x8_t q0s16; + uint8_t *d1, *d2; + int16_t i, a1, cospi_16_64 = 11585; + int16_t out = dct_const_round_shift(input[0] * cospi_16_64); + out = dct_const_round_shift(out * cospi_16_64); + a1 = ROUND_POWER_OF_TWO(out, 5); + + q0s16 = vdupq_n_s16(a1); + q0u16 = vreinterpretq_u16_s16(q0s16); + + d1 = d2 = dest; + for (i = 0; i < 2; i++) { + d2u64 = vld1_u64((const uint64_t *)d1); + d1 += dest_stride; + d3u64 = vld1_u64((const uint64_t *)d1); + d1 += dest_stride; + d4u64 = vld1_u64((const uint64_t *)d1); + d1 += dest_stride; + d5u64 = vld1_u64((const uint64_t *)d1); + d1 += dest_stride; + + q9u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d2u64)); + q10u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d3u64)); + q11u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d4u64)); + q12u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d5u64)); + + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); + d30u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); + d31u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); + + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d30u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d31u8)); + d2 += dest_stride; + } + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c new file mode 100644 index 00000000000..4b2c2a6f83c --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c @@ -0,0 +1,540 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_config.h" +#include "vpx_dsp/txfm_common.h" + +static INLINE void TRANSPOSE8X8( + int16x8_t *q8s16, + int16x8_t *q9s16, + int16x8_t *q10s16, + int16x8_t *q11s16, + int16x8_t *q12s16, + int16x8_t *q13s16, + int16x8_t *q14s16, + int16x8_t *q15s16) { + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; + int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 + *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 + *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 + *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 + *q12s16 = vcombine_s16(d17s16, d25s16); + *q13s16 = vcombine_s16(d19s16, d27s16); + *q14s16 = vcombine_s16(d21s16, d29s16); + *q15s16 = vcombine_s16(d23s16, d31s16); + + q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q8s16), + vreinterpretq_s32_s16(*q10s16)); + q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q9s16), + vreinterpretq_s32_s16(*q11s16)); + q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q12s16), + vreinterpretq_s32_s16(*q14s16)); + q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q13s16), + vreinterpretq_s32_s16(*q15s16)); + + q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 + vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 + q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 + vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 + q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 + vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 + q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 + vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 + + *q8s16 = q0x2s16.val[0]; + *q9s16 = q0x2s16.val[1]; + *q10s16 = q1x2s16.val[0]; + *q11s16 = q1x2s16.val[1]; + *q12s16 = q2x2s16.val[0]; + *q13s16 = q2x2s16.val[1]; + *q14s16 = q3x2s16.val[0]; + *q15s16 = q3x2s16.val[1]; + return; +} + +static INLINE void IDCT8x8_1D( + int16x8_t *q8s16, + int16x8_t *q9s16, + int16x8_t *q10s16, + int16x8_t *q11s16, + int16x8_t *q12s16, + int16x8_t *q13s16, + int16x8_t *q14s16, + int16x8_t *q15s16) { + int16x4_t d0s16, d1s16, d2s16, d3s16; + int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; + int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32; + int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; + + d0s16 = vdup_n_s16(cospi_28_64); + d1s16 = vdup_n_s16(cospi_4_64); + d2s16 = vdup_n_s16(cospi_12_64); + d3s16 = vdup_n_s16(cospi_20_64); + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + q2s32 = vmull_s16(d18s16, d0s16); + q3s32 = vmull_s16(d19s16, d0s16); + q5s32 = vmull_s16(d26s16, d2s16); + q6s32 = vmull_s16(d27s16, d2s16); + + q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); + q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); + q5s32 = vmlsl_s16(q5s32, d22s16, d3s16); + q6s32 = vmlsl_s16(q6s32, d23s16, d3s16); + + d8s16 = vqrshrn_n_s32(q2s32, 14); + d9s16 = vqrshrn_n_s32(q3s32, 14); + d10s16 = vqrshrn_n_s32(q5s32, 14); + d11s16 = vqrshrn_n_s32(q6s32, 14); + q4s16 = vcombine_s16(d8s16, d9s16); + q5s16 = vcombine_s16(d10s16, d11s16); + + q2s32 = vmull_s16(d18s16, d1s16); + q3s32 = vmull_s16(d19s16, d1s16); + q9s32 = vmull_s16(d26s16, d3s16); + q13s32 = vmull_s16(d27s16, d3s16); + + q2s32 = vmlal_s16(q2s32, d30s16, d0s16); + q3s32 = vmlal_s16(q3s32, d31s16, d0s16); + q9s32 = vmlal_s16(q9s32, d22s16, d2s16); + q13s32 = vmlal_s16(q13s32, d23s16, d2s16); + + d14s16 = vqrshrn_n_s32(q2s32, 14); + d15s16 = vqrshrn_n_s32(q3s32, 14); + d12s16 = vqrshrn_n_s32(q9s32, 14); + d13s16 = vqrshrn_n_s32(q13s32, 14); + q6s16 = vcombine_s16(d12s16, d13s16); + q7s16 = vcombine_s16(d14s16, d15s16); + + d0s16 = vdup_n_s16(cospi_16_64); + + q2s32 = vmull_s16(d16s16, d0s16); + q3s32 = vmull_s16(d17s16, d0s16); + q13s32 = vmull_s16(d16s16, d0s16); + q15s32 = vmull_s16(d17s16, d0s16); + + q2s32 = vmlal_s16(q2s32, d24s16, d0s16); + q3s32 = vmlal_s16(q3s32, d25s16, d0s16); + q13s32 = vmlsl_s16(q13s32, d24s16, d0s16); + q15s32 = vmlsl_s16(q15s32, d25s16, d0s16); + + d0s16 = vdup_n_s16(cospi_24_64); + d1s16 = vdup_n_s16(cospi_8_64); + + d18s16 = vqrshrn_n_s32(q2s32, 14); + d19s16 = vqrshrn_n_s32(q3s32, 14); + d22s16 = vqrshrn_n_s32(q13s32, 14); + d23s16 = vqrshrn_n_s32(q15s32, 14); + *q9s16 = vcombine_s16(d18s16, d19s16); + *q11s16 = vcombine_s16(d22s16, d23s16); + + q2s32 = vmull_s16(d20s16, d0s16); + q3s32 = vmull_s16(d21s16, d0s16); + q8s32 = vmull_s16(d20s16, d1s16); + q12s32 = vmull_s16(d21s16, d1s16); + + q2s32 = vmlsl_s16(q2s32, d28s16, d1s16); + q3s32 = vmlsl_s16(q3s32, d29s16, d1s16); + q8s32 = vmlal_s16(q8s32, d28s16, d0s16); + q12s32 = vmlal_s16(q12s32, d29s16, d0s16); + + d26s16 = vqrshrn_n_s32(q2s32, 14); + d27s16 = vqrshrn_n_s32(q3s32, 14); + d30s16 = vqrshrn_n_s32(q8s32, 14); + d31s16 = vqrshrn_n_s32(q12s32, 14); + *q13s16 = vcombine_s16(d26s16, d27s16); + *q15s16 = vcombine_s16(d30s16, d31s16); + + q0s16 = vaddq_s16(*q9s16, *q15s16); + q1s16 = vaddq_s16(*q11s16, *q13s16); + q2s16 = vsubq_s16(*q11s16, *q13s16); + q3s16 = vsubq_s16(*q9s16, *q15s16); + + *q13s16 = vsubq_s16(q4s16, q5s16); + q4s16 = vaddq_s16(q4s16, q5s16); + *q14s16 = vsubq_s16(q7s16, q6s16); + q7s16 = vaddq_s16(q7s16, q6s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + + d16s16 = vdup_n_s16(cospi_16_64); + + q9s32 = vmull_s16(d28s16, d16s16); + q10s32 = vmull_s16(d29s16, d16s16); + q11s32 = vmull_s16(d28s16, d16s16); + q12s32 = vmull_s16(d29s16, d16s16); + + q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); + q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); + q11s32 = vmlal_s16(q11s32, d26s16, d16s16); + q12s32 = vmlal_s16(q12s32, d27s16, d16s16); + + d10s16 = vqrshrn_n_s32(q9s32, 14); + d11s16 = vqrshrn_n_s32(q10s32, 14); + d12s16 = vqrshrn_n_s32(q11s32, 14); + d13s16 = vqrshrn_n_s32(q12s32, 14); + q5s16 = vcombine_s16(d10s16, d11s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + *q8s16 = vaddq_s16(q0s16, q7s16); + *q9s16 = vaddq_s16(q1s16, q6s16); + *q10s16 = vaddq_s16(q2s16, q5s16); + *q11s16 = vaddq_s16(q3s16, q4s16); + *q12s16 = vsubq_s16(q3s16, q4s16); + *q13s16 = vsubq_s16(q2s16, q5s16); + *q14s16 = vsubq_s16(q1s16, q6s16); + *q15s16 = vsubq_s16(q0s16, q7s16); + return; +} + +void vpx_idct8x8_64_add_neon( + int16_t *input, + uint8_t *dest, + int dest_stride) { + uint8_t *d1, *d2; + uint8x8_t d0u8, d1u8, d2u8, d3u8; + uint64x1_t d0u64, d1u64, d2u64, d3u64; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; + uint16x8_t q8u16, q9u16, q10u16, q11u16; + + q8s16 = vld1q_s16(input); + q9s16 = vld1q_s16(input + 8); + q10s16 = vld1q_s16(input + 16); + q11s16 = vld1q_s16(input + 24); + q12s16 = vld1q_s16(input + 32); + q13s16 = vld1q_s16(input + 40); + q14s16 = vld1q_s16(input + 48); + q15s16 = vld1q_s16(input + 56); + + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + q8s16 = vrshrq_n_s16(q8s16, 5); + q9s16 = vrshrq_n_s16(q9s16, 5); + q10s16 = vrshrq_n_s16(q10s16, 5); + q11s16 = vrshrq_n_s16(q11s16, 5); + q12s16 = vrshrq_n_s16(q12s16, 5); + q13s16 = vrshrq_n_s16(q13s16, 5); + q14s16 = vrshrq_n_s16(q14s16, 5); + q15s16 = vrshrq_n_s16(q15s16, 5); + + d1 = d2 = dest; + + d0u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d1u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d2u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d3u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), + vreinterpret_u8_u64(d0u64)); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), + vreinterpret_u8_u64(d1u64)); + q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), + vreinterpret_u8_u64(d2u64)); + q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), + vreinterpret_u8_u64(d3u64)); + + d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); + d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); + + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); + d2 += dest_stride; + + q8s16 = q12s16; + q9s16 = q13s16; + q10s16 = q14s16; + q11s16 = q15s16; + + d0u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d1u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d2u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d3u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), + vreinterpret_u8_u64(d0u64)); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), + vreinterpret_u8_u64(d1u64)); + q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), + vreinterpret_u8_u64(d2u64)); + q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), + vreinterpret_u8_u64(d3u64)); + + d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); + d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); + + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); + d2 += dest_stride; + return; +} + +void vpx_idct8x8_12_add_neon( + int16_t *input, + uint8_t *dest, + int dest_stride) { + uint8_t *d1, *d2; + uint8x8_t d0u8, d1u8, d2u8, d3u8; + int16x4_t d10s16, d11s16, d12s16, d13s16, d16s16; + int16x4_t d26s16, d27s16, d28s16, d29s16; + uint64x1_t d0u64, d1u64, d2u64, d3u64; + int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; + uint16x8_t q8u16, q9u16, q10u16, q11u16; + int32x4_t q9s32, q10s32, q11s32, q12s32; + + q8s16 = vld1q_s16(input); + q9s16 = vld1q_s16(input + 8); + q10s16 = vld1q_s16(input + 16); + q11s16 = vld1q_s16(input + 24); + q12s16 = vld1q_s16(input + 32); + q13s16 = vld1q_s16(input + 40); + q14s16 = vld1q_s16(input + 48); + q15s16 = vld1q_s16(input + 56); + + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // First transform rows + // stage 1 + q0s16 = vdupq_n_s16(cospi_28_64 * 2); + q1s16 = vdupq_n_s16(cospi_4_64 * 2); + + q4s16 = vqrdmulhq_s16(q9s16, q0s16); + + q0s16 = vdupq_n_s16(-cospi_20_64 * 2); + + q7s16 = vqrdmulhq_s16(q9s16, q1s16); + + q1s16 = vdupq_n_s16(cospi_12_64 * 2); + + q5s16 = vqrdmulhq_s16(q11s16, q0s16); + + q0s16 = vdupq_n_s16(cospi_16_64 * 2); + + q6s16 = vqrdmulhq_s16(q11s16, q1s16); + + // stage 2 & stage 3 - even half + q1s16 = vdupq_n_s16(cospi_24_64 * 2); + + q9s16 = vqrdmulhq_s16(q8s16, q0s16); + + q0s16 = vdupq_n_s16(cospi_8_64 * 2); + + q13s16 = vqrdmulhq_s16(q10s16, q1s16); + + q15s16 = vqrdmulhq_s16(q10s16, q0s16); + + // stage 3 -odd half + q0s16 = vaddq_s16(q9s16, q15s16); + q1s16 = vaddq_s16(q9s16, q13s16); + q2s16 = vsubq_s16(q9s16, q13s16); + q3s16 = vsubq_s16(q9s16, q15s16); + + // stage 2 - odd half + q13s16 = vsubq_s16(q4s16, q5s16); + q4s16 = vaddq_s16(q4s16, q5s16); + q14s16 = vsubq_s16(q7s16, q6s16); + q7s16 = vaddq_s16(q7s16, q6s16); + d26s16 = vget_low_s16(q13s16); + d27s16 = vget_high_s16(q13s16); + d28s16 = vget_low_s16(q14s16); + d29s16 = vget_high_s16(q14s16); + + d16s16 = vdup_n_s16(cospi_16_64); + q9s32 = vmull_s16(d28s16, d16s16); + q10s32 = vmull_s16(d29s16, d16s16); + q11s32 = vmull_s16(d28s16, d16s16); + q12s32 = vmull_s16(d29s16, d16s16); + + q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); + q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); + q11s32 = vmlal_s16(q11s32, d26s16, d16s16); + q12s32 = vmlal_s16(q12s32, d27s16, d16s16); + + d10s16 = vqrshrn_n_s32(q9s32, 14); + d11s16 = vqrshrn_n_s32(q10s32, 14); + d12s16 = vqrshrn_n_s32(q11s32, 14); + d13s16 = vqrshrn_n_s32(q12s32, 14); + q5s16 = vcombine_s16(d10s16, d11s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + // stage 4 + q8s16 = vaddq_s16(q0s16, q7s16); + q9s16 = vaddq_s16(q1s16, q6s16); + q10s16 = vaddq_s16(q2s16, q5s16); + q11s16 = vaddq_s16(q3s16, q4s16); + q12s16 = vsubq_s16(q3s16, q4s16); + q13s16 = vsubq_s16(q2s16, q5s16); + q14s16 = vsubq_s16(q1s16, q6s16); + q15s16 = vsubq_s16(q0s16, q7s16); + + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + q8s16 = vrshrq_n_s16(q8s16, 5); + q9s16 = vrshrq_n_s16(q9s16, 5); + q10s16 = vrshrq_n_s16(q10s16, 5); + q11s16 = vrshrq_n_s16(q11s16, 5); + q12s16 = vrshrq_n_s16(q12s16, 5); + q13s16 = vrshrq_n_s16(q13s16, 5); + q14s16 = vrshrq_n_s16(q14s16, 5); + q15s16 = vrshrq_n_s16(q15s16, 5); + + d1 = d2 = dest; + + d0u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d1u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d2u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d3u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), + vreinterpret_u8_u64(d0u64)); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), + vreinterpret_u8_u64(d1u64)); + q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), + vreinterpret_u8_u64(d2u64)); + q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), + vreinterpret_u8_u64(d3u64)); + + d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); + d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); + + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); + d2 += dest_stride; + + q8s16 = q12s16; + q9s16 = q13s16; + q10s16 = q14s16; + q11s16 = q15s16; + + d0u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d1u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d2u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d3u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), + vreinterpret_u8_u64(d0u64)); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), + vreinterpret_u8_u64(d1u64)); + q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), + vreinterpret_u8_u64(d2u64)); + q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), + vreinterpret_u8_u64(d3u64)); + + d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); + d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); + + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); + d2 += dest_stride; + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c b/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c new file mode 100644 index 00000000000..0a376104d2b --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c @@ -0,0 +1,822 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" + +//------------------------------------------------------------------------------ +// DC 4x4 + +// 'do_above' and 'do_left' facilitate branch removal when inlined. +static INLINE void dc_4x4(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left, + int do_above, int do_left) { + uint16x8_t sum_top; + uint16x8_t sum_left; + uint8x8_t dc0; + + if (do_above) { + const uint8x8_t A = vld1_u8(above); // top row + const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top + const uint16x4_t p1 = vpadd_u16(p0, p0); + sum_top = vcombine_u16(p1, p1); + } + + if (do_left) { + const uint8x8_t L = vld1_u8(left); // left border + const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left + const uint16x4_t p1 = vpadd_u16(p0, p0); + sum_left = vcombine_u16(p1, p1); + } + + if (do_above && do_left) { + const uint16x8_t sum = vaddq_u16(sum_left, sum_top); + dc0 = vrshrn_n_u16(sum, 3); + } else if (do_above) { + dc0 = vrshrn_n_u16(sum_top, 2); + } else if (do_left) { + dc0 = vrshrn_n_u16(sum_left, 2); + } else { + dc0 = vdup_n_u8(0x80); + } + + { + const uint8x8_t dc = vdup_lane_u8(dc0, 0); + int i; + for (i = 0; i < 4; ++i) { + vst1_lane_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc), 0); + } + } +} + +void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_4x4(dst, stride, above, left, 1, 1); +} + +void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + (void)above; + dc_4x4(dst, stride, NULL, left, 0, 1); +} + +void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + (void)left; + dc_4x4(dst, stride, above, NULL, 1, 0); +} + +void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + (void)above; + (void)left; + dc_4x4(dst, stride, NULL, NULL, 0, 0); +} + +//------------------------------------------------------------------------------ +// DC 8x8 + +// 'do_above' and 'do_left' facilitate branch removal when inlined. +static INLINE void dc_8x8(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left, + int do_above, int do_left) { + uint16x8_t sum_top; + uint16x8_t sum_left; + uint8x8_t dc0; + + if (do_above) { + const uint8x8_t A = vld1_u8(above); // top row + const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top + const uint16x4_t p1 = vpadd_u16(p0, p0); + const uint16x4_t p2 = vpadd_u16(p1, p1); + sum_top = vcombine_u16(p2, p2); + } + + if (do_left) { + const uint8x8_t L = vld1_u8(left); // left border + const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left + const uint16x4_t p1 = vpadd_u16(p0, p0); + const uint16x4_t p2 = vpadd_u16(p1, p1); + sum_left = vcombine_u16(p2, p2); + } + + if (do_above && do_left) { + const uint16x8_t sum = vaddq_u16(sum_left, sum_top); + dc0 = vrshrn_n_u16(sum, 4); + } else if (do_above) { + dc0 = vrshrn_n_u16(sum_top, 3); + } else if (do_left) { + dc0 = vrshrn_n_u16(sum_left, 3); + } else { + dc0 = vdup_n_u8(0x80); + } + + { + const uint8x8_t dc = vdup_lane_u8(dc0, 0); + int i; + for (i = 0; i < 8; ++i) { + vst1_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc)); + } + } +} + +void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_8x8(dst, stride, above, left, 1, 1); +} + +void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + (void)above; + dc_8x8(dst, stride, NULL, left, 0, 1); +} + +void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + (void)left; + dc_8x8(dst, stride, above, NULL, 1, 0); +} + +void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + (void)above; + (void)left; + dc_8x8(dst, stride, NULL, NULL, 0, 0); +} + +//------------------------------------------------------------------------------ +// DC 16x16 + +// 'do_above' and 'do_left' facilitate branch removal when inlined. +static INLINE void dc_16x16(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left, + int do_above, int do_left) { + uint16x8_t sum_top; + uint16x8_t sum_left; + uint8x8_t dc0; + + if (do_above) { + const uint8x16_t A = vld1q_u8(above); // top row + const uint16x8_t p0 = vpaddlq_u8(A); // cascading summation of the top + const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); + const uint16x4_t p2 = vpadd_u16(p1, p1); + const uint16x4_t p3 = vpadd_u16(p2, p2); + sum_top = vcombine_u16(p3, p3); + } + + if (do_left) { + const uint8x16_t L = vld1q_u8(left); // left row + const uint16x8_t p0 = vpaddlq_u8(L); // cascading summation of the left + const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); + const uint16x4_t p2 = vpadd_u16(p1, p1); + const uint16x4_t p3 = vpadd_u16(p2, p2); + sum_left = vcombine_u16(p3, p3); + } + + if (do_above && do_left) { + const uint16x8_t sum = vaddq_u16(sum_left, sum_top); + dc0 = vrshrn_n_u16(sum, 5); + } else if (do_above) { + dc0 = vrshrn_n_u16(sum_top, 4); + } else if (do_left) { + dc0 = vrshrn_n_u16(sum_left, 4); + } else { + dc0 = vdup_n_u8(0x80); + } + + { + const uint8x16_t dc = vdupq_lane_u8(dc0, 0); + int i; + for (i = 0; i < 16; ++i) { + vst1q_u8(dst + i * stride, dc); + } + } +} + +void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_16x16(dst, stride, above, left, 1, 1); +} + +void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, + const uint8_t *left) { + (void)above; + dc_16x16(dst, stride, NULL, left, 0, 1); +} + +void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, + const uint8_t *left) { + (void)left; + dc_16x16(dst, stride, above, NULL, 1, 0); +} + +void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, + const uint8_t *left) { + (void)above; + (void)left; + dc_16x16(dst, stride, NULL, NULL, 0, 0); +} + +//------------------------------------------------------------------------------ +// DC 32x32 + +// 'do_above' and 'do_left' facilitate branch removal when inlined. +static INLINE void dc_32x32(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left, + int do_above, int do_left) { + uint16x8_t sum_top; + uint16x8_t sum_left; + uint8x8_t dc0; + + if (do_above) { + const uint8x16_t A0 = vld1q_u8(above); // top row + const uint8x16_t A1 = vld1q_u8(above + 16); + const uint16x8_t p0 = vpaddlq_u8(A0); // cascading summation of the top + const uint16x8_t p1 = vpaddlq_u8(A1); + const uint16x8_t p2 = vaddq_u16(p0, p1); + const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2)); + const uint16x4_t p4 = vpadd_u16(p3, p3); + const uint16x4_t p5 = vpadd_u16(p4, p4); + sum_top = vcombine_u16(p5, p5); + } + + if (do_left) { + const uint8x16_t L0 = vld1q_u8(left); // left row + const uint8x16_t L1 = vld1q_u8(left + 16); + const uint16x8_t p0 = vpaddlq_u8(L0); // cascading summation of the left + const uint16x8_t p1 = vpaddlq_u8(L1); + const uint16x8_t p2 = vaddq_u16(p0, p1); + const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2)); + const uint16x4_t p4 = vpadd_u16(p3, p3); + const uint16x4_t p5 = vpadd_u16(p4, p4); + sum_left = vcombine_u16(p5, p5); + } + + if (do_above && do_left) { + const uint16x8_t sum = vaddq_u16(sum_left, sum_top); + dc0 = vrshrn_n_u16(sum, 6); + } else if (do_above) { + dc0 = vrshrn_n_u16(sum_top, 5); + } else if (do_left) { + dc0 = vrshrn_n_u16(sum_left, 5); + } else { + dc0 = vdup_n_u8(0x80); + } + + { + const uint8x16_t dc = vdupq_lane_u8(dc0, 0); + int i; + for (i = 0; i < 32; ++i) { + vst1q_u8(dst + i * stride, dc); + vst1q_u8(dst + i * stride + 16, dc); + } + } +} + +void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_32x32(dst, stride, above, left, 1, 1); +} + +void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, + const uint8_t *left) { + (void)above; + dc_32x32(dst, stride, NULL, left, 0, 1); +} + +void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, + const uint8_t *left) { + (void)left; + dc_32x32(dst, stride, above, NULL, 1, 0); +} + +void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, + const uint8_t *left) { + (void)above; + (void)left; + dc_32x32(dst, stride, NULL, NULL, 0, 0); +} + +// ----------------------------------------------------------------------------- + +void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(above)); // top row + const uint64x1_t A1 = vshr_n_u64(A0, 8); + const uint64x1_t A2 = vshr_n_u64(A0, 16); + const uint8x8_t ABCDEFGH = vreinterpret_u8_u64(A0); + const uint8x8_t BCDEFGH0 = vreinterpret_u8_u64(A1); + const uint8x8_t CDEFGH00 = vreinterpret_u8_u64(A2); + const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGH00); + const uint8x8_t avg2 = vrhadd_u8(avg1, BCDEFGH0); + const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2); + const uint32x2_t r0 = vreinterpret_u32_u8(avg2); + const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8)); + const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16)); + const uint32x2_t r3 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24)); + (void)left; + vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0); + vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0); + vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0); + vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0); + dst[3 * stride + 3] = above[7]; +} + +void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + static const uint8_t shuffle1[8] = { 1, 2, 3, 4, 5, 6, 7, 7 }; + static const uint8_t shuffle2[8] = { 2, 3, 4, 5, 6, 7, 7, 7 }; + const uint8x8_t sh_12345677 = vld1_u8(shuffle1); + const uint8x8_t sh_23456777 = vld1_u8(shuffle2); + const uint8x8_t A0 = vld1_u8(above); // top row + const uint8x8_t A1 = vtbl1_u8(A0, sh_12345677); + const uint8x8_t A2 = vtbl1_u8(A0, sh_23456777); + const uint8x8_t avg1 = vhadd_u8(A0, A2); + uint8x8_t row = vrhadd_u8(avg1, A1); + int i; + (void)left; + for (i = 0; i < 7; ++i) { + vst1_u8(dst + i * stride, row); + row = vtbl1_u8(row, sh_12345677); + } + vst1_u8(dst + i * stride, row); +} + +void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const uint8x16_t A0 = vld1q_u8(above); // top row + const uint8x16_t above_right = vld1q_dup_u8(above + 15); + const uint8x16_t A1 = vextq_u8(A0, above_right, 1); + const uint8x16_t A2 = vextq_u8(A0, above_right, 2); + const uint8x16_t avg1 = vhaddq_u8(A0, A2); + uint8x16_t row = vrhaddq_u8(avg1, A1); + int i; + (void)left; + for (i = 0; i < 15; ++i) { + vst1q_u8(dst + i * stride, row); + row = vextq_u8(row, above_right, 1); + } + vst1q_u8(dst + i * stride, row); +} + +// ----------------------------------------------------------------------------- + +void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const uint8x8_t XABCD_u8 = vld1_u8(above - 1); + const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8); + const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32); + const uint32x2_t zero = vdup_n_u32(0); + const uint32x2_t IJKL = vld1_lane_u32((const uint32_t *)left, zero, 0); + const uint8x8_t IJKL_u8 = vreinterpret_u8_u32(IJKL); + const uint64x1_t LKJI____ = vreinterpret_u64_u8(vrev32_u8(IJKL_u8)); + const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC); + const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8)); + const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16)); + const uint8_t D = vget_lane_u8(XABCD_u8, 4); + const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6); + const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC); + const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8); + const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_); + const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2); + const uint32x2_t r3 = vreinterpret_u32_u8(avg2); + const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8)); + const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16)); + const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24)); + vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0); + vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0); + vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0); + vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0); +} + +#if !HAVE_NEON_ASM + +void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int i; + uint32x2_t d0u32 = vdup_n_u32(0); + (void)left; + + d0u32 = vld1_lane_u32((const uint32_t *)above, d0u32, 0); + for (i = 0; i < 4; i++, dst += stride) + vst1_lane_u32((uint32_t *)dst, d0u32, 0); +} + +void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int i; + uint8x8_t d0u8 = vdup_n_u8(0); + (void)left; + + d0u8 = vld1_u8(above); + for (i = 0; i < 8; i++, dst += stride) + vst1_u8(dst, d0u8); +} + +void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int i; + uint8x16_t q0u8 = vdupq_n_u8(0); + (void)left; + + q0u8 = vld1q_u8(above); + for (i = 0; i < 16; i++, dst += stride) + vst1q_u8(dst, q0u8); +} + +void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int i; + uint8x16_t q0u8 = vdupq_n_u8(0); + uint8x16_t q1u8 = vdupq_n_u8(0); + (void)left; + + q0u8 = vld1q_u8(above); + q1u8 = vld1q_u8(above + 16); + for (i = 0; i < 32; i++, dst += stride) { + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q1u8); + } +} + +void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + uint8x8_t d0u8 = vdup_n_u8(0); + uint32x2_t d1u32 = vdup_n_u32(0); + (void)above; + + d1u32 = vld1_lane_u32((const uint32_t *)left, d1u32, 0); + + d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 0); + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 1); + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 2); + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 3); + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); +} + +void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + uint8x8_t d0u8 = vdup_n_u8(0); + uint64x1_t d1u64 = vdup_n_u64(0); + (void)above; + + d1u64 = vld1_u64((const uint64_t *)left); + + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 0); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 1); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 2); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 3); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 4); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 5); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 6); + vst1_u8(dst, d0u8); + dst += stride; + d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 7); + vst1_u8(dst, d0u8); +} + +void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int j; + uint8x8_t d2u8 = vdup_n_u8(0); + uint8x16_t q0u8 = vdupq_n_u8(0); + uint8x16_t q1u8 = vdupq_n_u8(0); + (void)above; + + q1u8 = vld1q_u8(left); + d2u8 = vget_low_u8(q1u8); + for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) { + q0u8 = vdupq_lane_u8(d2u8, 0); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 1); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 2); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 3); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 4); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 5); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 6); + vst1q_u8(dst, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 7); + vst1q_u8(dst, q0u8); + dst += stride; + } +} + +void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int j, k; + uint8x8_t d2u8 = vdup_n_u8(0); + uint8x16_t q0u8 = vdupq_n_u8(0); + uint8x16_t q1u8 = vdupq_n_u8(0); + (void)above; + + for (k = 0; k < 2; k++, left += 16) { + q1u8 = vld1q_u8(left); + d2u8 = vget_low_u8(q1u8); + for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) { + q0u8 = vdupq_lane_u8(d2u8, 0); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 1); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 2); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 3); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 4); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 5); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 6); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + q0u8 = vdupq_lane_u8(d2u8, 7); + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q0u8); + dst += stride; + } + } +} + +void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int i; + uint16x8_t q1u16, q3u16; + int16x8_t q1s16; + uint8x8_t d0u8 = vdup_n_u8(0); + uint32x2_t d2u32 = vdup_n_u32(0); + + d0u8 = vld1_dup_u8(above - 1); + d2u32 = vld1_lane_u32((const uint32_t *)above, d2u32, 0); + q3u16 = vsubl_u8(vreinterpret_u8_u32(d2u32), d0u8); + for (i = 0; i < 4; i++, dst += stride) { + q1u16 = vdupq_n_u16((uint16_t)left[i]); + q1s16 = vaddq_s16(vreinterpretq_s16_u16(q1u16), + vreinterpretq_s16_u16(q3u16)); + d0u8 = vqmovun_s16(q1s16); + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); + } +} + +void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int j; + uint16x8_t q0u16, q3u16, q10u16; + int16x8_t q0s16; + uint16x4_t d20u16; + uint8x8_t d0u8, d2u8, d30u8; + + d0u8 = vld1_dup_u8(above - 1); + d30u8 = vld1_u8(left); + d2u8 = vld1_u8(above); + q10u16 = vmovl_u8(d30u8); + q3u16 = vsubl_u8(d2u8, d0u8); + d20u16 = vget_low_u16(q10u16); + for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) { + q0u16 = vdupq_lane_u16(d20u16, 0); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), + vreinterpretq_s16_u16(q0u16)); + d0u8 = vqmovun_s16(q0s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); + dst += stride; + q0u16 = vdupq_lane_u16(d20u16, 1); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), + vreinterpretq_s16_u16(q0u16)); + d0u8 = vqmovun_s16(q0s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); + dst += stride; + q0u16 = vdupq_lane_u16(d20u16, 2); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), + vreinterpretq_s16_u16(q0u16)); + d0u8 = vqmovun_s16(q0s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); + dst += stride; + q0u16 = vdupq_lane_u16(d20u16, 3); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), + vreinterpretq_s16_u16(q0u16)); + d0u8 = vqmovun_s16(q0s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); + dst += stride; + } +} + +void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int j, k; + uint16x8_t q0u16, q2u16, q3u16, q8u16, q10u16; + uint8x16_t q0u8, q1u8; + int16x8_t q0s16, q1s16, q8s16, q11s16; + uint16x4_t d20u16; + uint8x8_t d2u8, d3u8, d18u8, d22u8, d23u8; + + q0u8 = vld1q_dup_u8(above - 1); + q1u8 = vld1q_u8(above); + q2u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8)); + q3u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8)); + for (k = 0; k < 2; k++, left += 8) { + d18u8 = vld1_u8(left); + q10u16 = vmovl_u8(d18u8); + d20u16 = vget_low_u16(q10u16); + for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) { + q0u16 = vdupq_lane_u16(d20u16, 0); + q8u16 = vdupq_lane_u16(d20u16, 1); + q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q2u16)); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q3u16)); + q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), + vreinterpretq_s16_u16(q2u16)); + q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), + vreinterpretq_s16_u16(q3u16)); + d2u8 = vqmovun_s16(q1s16); + d3u8 = vqmovun_s16(q0s16); + d22u8 = vqmovun_s16(q11s16); + d23u8 = vqmovun_s16(q8s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8)); + vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8)); + dst += stride; + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8)); + vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8)); + dst += stride; + + q0u16 = vdupq_lane_u16(d20u16, 2); + q8u16 = vdupq_lane_u16(d20u16, 3); + q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q2u16)); + q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q3u16)); + q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), + vreinterpretq_s16_u16(q2u16)); + q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), + vreinterpretq_s16_u16(q3u16)); + d2u8 = vqmovun_s16(q1s16); + d3u8 = vqmovun_s16(q0s16); + d22u8 = vqmovun_s16(q11s16); + d23u8 = vqmovun_s16(q8s16); + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8)); + vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8)); + dst += stride; + vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8)); + vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8)); + dst += stride; + } + } +} + +void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + int j, k; + uint16x8_t q0u16, q3u16, q8u16, q9u16, q10u16, q11u16; + uint8x16_t q0u8, q1u8, q2u8; + int16x8_t q12s16, q13s16, q14s16, q15s16; + uint16x4_t d6u16; + uint8x8_t d0u8, d1u8, d2u8, d3u8, d26u8; + + q0u8 = vld1q_dup_u8(above - 1); + q1u8 = vld1q_u8(above); + q2u8 = vld1q_u8(above + 16); + q8u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8)); + q9u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8)); + q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q0u8)); + q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q0u8)); + for (k = 0; k < 4; k++, left += 8) { + d26u8 = vld1_u8(left); + q3u16 = vmovl_u8(d26u8); + d6u16 = vget_low_u16(q3u16); + for (j = 0; j < 2; j++, d6u16 = vget_high_u16(q3u16)) { + q0u16 = vdupq_lane_u16(d6u16, 0); + q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q8u16)); + q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q9u16)); + q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q10u16)); + q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q11u16)); + d0u8 = vqmovun_s16(q12s16); + d1u8 = vqmovun_s16(q13s16); + d2u8 = vqmovun_s16(q14s16); + d3u8 = vqmovun_s16(q15s16); + q0u8 = vcombine_u8(d0u8, d1u8); + q1u8 = vcombine_u8(d2u8, d3u8); + vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); + vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); + dst += stride; + + q0u16 = vdupq_lane_u16(d6u16, 1); + q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q8u16)); + q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q9u16)); + q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q10u16)); + q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q11u16)); + d0u8 = vqmovun_s16(q12s16); + d1u8 = vqmovun_s16(q13s16); + d2u8 = vqmovun_s16(q14s16); + d3u8 = vqmovun_s16(q15s16); + q0u8 = vcombine_u8(d0u8, d1u8); + q1u8 = vcombine_u8(d2u8, d3u8); + vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); + vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); + dst += stride; + + q0u16 = vdupq_lane_u16(d6u16, 2); + q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q8u16)); + q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q9u16)); + q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q10u16)); + q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q11u16)); + d0u8 = vqmovun_s16(q12s16); + d1u8 = vqmovun_s16(q13s16); + d2u8 = vqmovun_s16(q14s16); + d3u8 = vqmovun_s16(q15s16); + q0u8 = vcombine_u8(d0u8, d1u8); + q1u8 = vcombine_u8(d2u8, d3u8); + vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); + vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); + dst += stride; + + q0u16 = vdupq_lane_u16(d6u16, 3); + q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q8u16)); + q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q9u16)); + q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q10u16)); + q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), + vreinterpretq_s16_u16(q11u16)); + d0u8 = vqmovun_s16(q12s16); + d1u8 = vqmovun_s16(q13s16); + d2u8 = vqmovun_s16(q14s16); + d3u8 = vqmovun_s16(q15s16); + q0u8 = vcombine_u8(d0u8, d1u8); + q1u8 = vcombine_u8(d2u8, d3u8); + vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); + vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); + dst += stride; + } + } +} +#endif // !HAVE_NEON_ASM diff --git a/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon_asm.asm b/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon_asm.asm new file mode 100644 index 00000000000..115790d4801 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon_asm.asm @@ -0,0 +1,630 @@ +; +; Copyright (c) 2014 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + EXPORT |vpx_v_predictor_4x4_neon| + EXPORT |vpx_v_predictor_8x8_neon| + EXPORT |vpx_v_predictor_16x16_neon| + EXPORT |vpx_v_predictor_32x32_neon| + EXPORT |vpx_h_predictor_4x4_neon| + EXPORT |vpx_h_predictor_8x8_neon| + EXPORT |vpx_h_predictor_16x16_neon| + EXPORT |vpx_h_predictor_32x32_neon| + EXPORT |vpx_tm_predictor_4x4_neon| + EXPORT |vpx_tm_predictor_8x8_neon| + EXPORT |vpx_tm_predictor_16x16_neon| + EXPORT |vpx_tm_predictor_32x32_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +;void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_v_predictor_4x4_neon| PROC + vld1.32 {d0[0]}, [r2] + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d0[0]}, [r0], r1 + bx lr + ENDP ; |vpx_v_predictor_4x4_neon| + +;void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_v_predictor_8x8_neon| PROC + vld1.8 {d0}, [r2] + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + bx lr + ENDP ; |vpx_v_predictor_8x8_neon| + +;void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_v_predictor_16x16_neon| PROC + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + bx lr + ENDP ; |vpx_v_predictor_16x16_neon| + +;void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_v_predictor_32x32_neon| PROC + vld1.8 {q0, q1}, [r2] + mov r2, #2 +loop_v + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + subs r2, r2, #1 + bgt loop_v + bx lr + ENDP ; |vpx_v_predictor_32x32_neon| + +;void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_h_predictor_4x4_neon| PROC + vld1.32 {d1[0]}, [r3] + vdup.8 d0, d1[0] + vst1.32 {d0[0]}, [r0], r1 + vdup.8 d0, d1[1] + vst1.32 {d0[0]}, [r0], r1 + vdup.8 d0, d1[2] + vst1.32 {d0[0]}, [r0], r1 + vdup.8 d0, d1[3] + vst1.32 {d0[0]}, [r0], r1 + bx lr + ENDP ; |vpx_h_predictor_4x4_neon| + +;void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_h_predictor_8x8_neon| PROC + vld1.64 {d1}, [r3] + vdup.8 d0, d1[0] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[1] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[2] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[3] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[4] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[5] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[6] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[7] + vst1.64 {d0}, [r0], r1 + bx lr + ENDP ; |vpx_h_predictor_8x8_neon| + +;void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_h_predictor_16x16_neon| PROC + vld1.8 {q1}, [r3] + vdup.8 q0, d2[0] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[1] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[2] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[3] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[4] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[5] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[6] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[7] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[0] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[1] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[2] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[3] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[4] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[5] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[6] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[7] + vst1.8 {q0}, [r0], r1 + bx lr + ENDP ; |vpx_h_predictor_16x16_neon| + +;void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_h_predictor_32x32_neon| PROC + sub r1, r1, #16 + mov r2, #2 +loop_h + vld1.8 {q1}, [r3]! + vdup.8 q0, d2[0] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[1] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[2] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[3] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[4] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[5] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[6] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[7] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[0] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[1] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[2] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[3] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[4] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[5] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[6] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[7] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + subs r2, r2, #1 + bgt loop_h + bx lr + ENDP ; |vpx_h_predictor_32x32_neon| + +;void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_tm_predictor_4x4_neon| PROC + ; Load ytop_left = above[-1]; + sub r12, r2, #1 + vld1.u8 {d0[]}, [r12] + + ; Load above 4 pixels + vld1.32 {d2[0]}, [r2] + + ; Compute above - ytop_left + vsubl.u8 q3, d2, d0 + + ; Load left row by row and compute left + (above - ytop_left) + ; 1st row and 2nd row + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3]! + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqmovun.s16 d0, q1 + vqmovun.s16 d1, q2 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + + ; 3rd row and 4th row + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3] + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqmovun.s16 d0, q1 + vqmovun.s16 d1, q2 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + bx lr + ENDP ; |vpx_tm_predictor_4x4_neon| + +;void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_tm_predictor_8x8_neon| PROC + ; Load ytop_left = above[-1]; + sub r12, r2, #1 + vld1.8 {d0[]}, [r12] + + ; preload 8 left + vld1.8 {d30}, [r3] + + ; Load above 8 pixels + vld1.64 {d2}, [r2] + + vmovl.u8 q10, d30 + + ; Compute above - ytop_left + vsubl.u8 q3, d2, d0 + + ; Load left row by row and compute left + (above - ytop_left) + ; 1st row and 2nd row + vdup.16 q0, d20[0] + vdup.16 q1, d20[1] + vadd.s16 q0, q3, q0 + vadd.s16 q1, q3, q1 + + ; 3rd row and 4th row + vdup.16 q8, d20[2] + vdup.16 q9, d20[3] + vadd.s16 q8, q3, q8 + vadd.s16 q9, q3, q9 + + vqmovun.s16 d0, q0 + vqmovun.s16 d1, q1 + vqmovun.s16 d2, q8 + vqmovun.s16 d3, q9 + + vst1.64 {d0}, [r0], r1 + vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 + + ; 5th row and 6th row + vdup.16 q0, d21[0] + vdup.16 q1, d21[1] + vadd.s16 q0, q3, q0 + vadd.s16 q1, q3, q1 + + ; 7th row and 8th row + vdup.16 q8, d21[2] + vdup.16 q9, d21[3] + vadd.s16 q8, q3, q8 + vadd.s16 q9, q3, q9 + + vqmovun.s16 d0, q0 + vqmovun.s16 d1, q1 + vqmovun.s16 d2, q8 + vqmovun.s16 d3, q9 + + vst1.64 {d0}, [r0], r1 + vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 + + bx lr + ENDP ; |vpx_tm_predictor_8x8_neon| + +;void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_tm_predictor_16x16_neon| PROC + ; Load ytop_left = above[-1]; + sub r12, r2, #1 + vld1.8 {d0[]}, [r12] + + ; Load above 8 pixels + vld1.8 {q1}, [r2] + + ; preload 8 left into r12 + vld1.8 {d18}, [r3]! + + ; Compute above - ytop_left + vsubl.u8 q2, d2, d0 + vsubl.u8 q3, d3, d0 + + vmovl.u8 q10, d18 + + ; Load left row by row and compute left + (above - ytop_left) + ; Process 8 rows in each single loop and loop 2 times to process 16 rows. + mov r2, #2 + +loop_16x16_neon + ; Process two rows. + vdup.16 q0, d20[0] + vdup.16 q8, d20[1] + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vdup.16 q0, d20[2] ; proload next 2 rows data + vdup.16 q8, d20[3] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + ; Process two rows. + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vdup.16 q0, d21[0] ; proload next 2 rows data + vdup.16 q8, d21[1] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vdup.16 q0, d21[2] ; proload next 2 rows data + vdup.16 q8, d21[3] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vld1.8 {d18}, [r3]! ; preload 8 left into r12 + vmovl.u8 q10, d18 + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + subs r2, r2, #1 + bgt loop_16x16_neon + + bx lr + ENDP ; |vpx_tm_predictor_16x16_neon| + +;void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vpx_tm_predictor_32x32_neon| PROC + ; Load ytop_left = above[-1]; + sub r12, r2, #1 + vld1.8 {d0[]}, [r12] + + ; Load above 32 pixels + vld1.8 {q1}, [r2]! + vld1.8 {q2}, [r2] + + ; preload 8 left pixels + vld1.8 {d26}, [r3]! + + ; Compute above - ytop_left + vsubl.u8 q8, d2, d0 + vsubl.u8 q9, d3, d0 + vsubl.u8 q10, d4, d0 + vsubl.u8 q11, d5, d0 + + vmovl.u8 q3, d26 + + ; Load left row by row and compute left + (above - ytop_left) + ; Process 8 rows in each single loop and loop 4 times to process 32 rows. + mov r2, #4 + +loop_32x32_neon + ; Process two rows. + vdup.16 q0, d6[0] + vdup.16 q2, d6[1] + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vdup.16 q1, d6[2] + vdup.16 q2, d6[3] + vst1.64 {d24-d27}, [r0], r1 + + ; Process two rows. + vadd.s16 q12, q1, q8 + vadd.s16 q13, q1, q9 + vadd.s16 q14, q1, q10 + vadd.s16 q15, q1, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vdup.16 q0, d7[0] + vdup.16 q2, d7[1] + vst1.64 {d24-d27}, [r0], r1 + + ; Process two rows. + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vdup.16 q0, d7[2] + vdup.16 q2, d7[3] + vst1.64 {d24-d27}, [r0], r1 + + ; Process two rows. + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vld1.8 {d0}, [r3]! ; preload 8 left pixels + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vmovl.u8 q3, d0 + vst1.64 {d24-d27}, [r0], r1 + + subs r2, r2, #1 + bgt loop_32x32_neon + + bx lr + ENDP ; |vpx_tm_predictor_32x32_neon| + + END diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c new file mode 100644 index 00000000000..d24e6adc8a6 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" + +static INLINE void loop_filter_neon_16( + uint8x16_t qblimit, // blimit + uint8x16_t qlimit, // limit + uint8x16_t qthresh, // thresh + uint8x16_t q3, // p3 + uint8x16_t q4, // p2 + uint8x16_t q5, // p1 + uint8x16_t q6, // p0 + uint8x16_t q7, // q0 + uint8x16_t q8, // q1 + uint8x16_t q9, // q2 + uint8x16_t q10, // q3 + uint8x16_t *q5r, // p1 + uint8x16_t *q6r, // p0 + uint8x16_t *q7r, // q0 + uint8x16_t *q8r) { // q1 + uint8x16_t q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8; + int16x8_t q2s16, q11s16; + uint16x8_t q4u16; + int8x16_t q0s8, q1s8, q2s8, q11s8, q12s8, q13s8; + int8x8_t d2s8, d3s8; + + q11u8 = vabdq_u8(q3, q4); + q12u8 = vabdq_u8(q4, q5); + q13u8 = vabdq_u8(q5, q6); + q14u8 = vabdq_u8(q8, q7); + q3 = vabdq_u8(q9, q8); + q4 = vabdq_u8(q10, q9); + + q11u8 = vmaxq_u8(q11u8, q12u8); + q12u8 = vmaxq_u8(q13u8, q14u8); + q3 = vmaxq_u8(q3, q4); + q15u8 = vmaxq_u8(q11u8, q12u8); + + q9 = vabdq_u8(q6, q7); + + // vp8_hevmask + q13u8 = vcgtq_u8(q13u8, qthresh); + q14u8 = vcgtq_u8(q14u8, qthresh); + q15u8 = vmaxq_u8(q15u8, q3); + + q2u8 = vabdq_u8(q5, q8); + q9 = vqaddq_u8(q9, q9); + + q15u8 = vcgeq_u8(qlimit, q15u8); + + // vp8_filter() function + // convert to signed + q10 = vdupq_n_u8(0x80); + q8 = veorq_u8(q8, q10); + q7 = veorq_u8(q7, q10); + q6 = veorq_u8(q6, q10); + q5 = veorq_u8(q5, q10); + + q2u8 = vshrq_n_u8(q2u8, 1); + q9 = vqaddq_u8(q9, q2u8); + + q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), + vget_low_s8(vreinterpretq_s8_u8(q6))); + q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), + vget_high_s8(vreinterpretq_s8_u8(q6))); + + q9 = vcgeq_u8(qblimit, q9); + + q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), + vreinterpretq_s8_u8(q8)); + + q14u8 = vorrq_u8(q13u8, q14u8); + + q4u16 = vdupq_n_u16(3); + q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16)); + q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16)); + + q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8); + q15u8 = vandq_u8(q15u8, q9); + + q1s8 = vreinterpretq_s8_u8(q1u8); + q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); + q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8)); + + q4 = vdupq_n_u8(3); + q9 = vdupq_n_u8(4); + // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) + d2s8 = vqmovn_s16(q2s16); + d3s8 = vqmovn_s16(q11s16); + q1s8 = vcombine_s8(d2s8, d3s8); + q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8); + q1s8 = vreinterpretq_s8_u8(q1u8); + + q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q4)); + q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9)); + q2s8 = vshrq_n_s8(q2s8, 3); + q1s8 = vshrq_n_s8(q1s8, 3); + + q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8); + q0s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8); + + q1s8 = vrshrq_n_s8(q1s8, 1); + q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); + + q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8); + q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8); + + *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q10); + *q7r = veorq_u8(vreinterpretq_u8_s8(q0s8), q10); + *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q10); + *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q10); + return; +} + +void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + uint8x8_t dblimit0, dlimit0, dthresh0, dblimit1, dlimit1, dthresh1; + uint8x16_t qblimit, qlimit, qthresh; + uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8; + + dblimit0 = vld1_u8(blimit0); + dlimit0 = vld1_u8(limit0); + dthresh0 = vld1_u8(thresh0); + dblimit1 = vld1_u8(blimit1); + dlimit1 = vld1_u8(limit1); + dthresh1 = vld1_u8(thresh1); + qblimit = vcombine_u8(dblimit0, dblimit1); + qlimit = vcombine_u8(dlimit0, dlimit1); + qthresh = vcombine_u8(dthresh0, dthresh1); + + s -= (p << 2); + + q3u8 = vld1q_u8(s); + s += p; + q4u8 = vld1q_u8(s); + s += p; + q5u8 = vld1q_u8(s); + s += p; + q6u8 = vld1q_u8(s); + s += p; + q7u8 = vld1q_u8(s); + s += p; + q8u8 = vld1q_u8(s); + s += p; + q9u8 = vld1q_u8(s); + s += p; + q10u8 = vld1q_u8(s); + + loop_filter_neon_16(qblimit, qlimit, qthresh, + q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, + &q5u8, &q6u8, &q7u8, &q8u8); + + s -= (p * 5); + vst1q_u8(s, q5u8); + s += p; + vst1q_u8(s, q6u8); + s += p; + vst1q_u8(s, q7u8); + s += p; + vst1q_u8(s, q8u8); + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c new file mode 100644 index 00000000000..7f3ee70b948 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" + +static INLINE void loop_filter_neon( + uint8x8_t dblimit, // flimit + uint8x8_t dlimit, // limit + uint8x8_t dthresh, // thresh + uint8x8_t d3u8, // p3 + uint8x8_t d4u8, // p2 + uint8x8_t d5u8, // p1 + uint8x8_t d6u8, // p0 + uint8x8_t d7u8, // q0 + uint8x8_t d16u8, // q1 + uint8x8_t d17u8, // q2 + uint8x8_t d18u8, // q3 + uint8x8_t *d4ru8, // p1 + uint8x8_t *d5ru8, // p0 + uint8x8_t *d6ru8, // q0 + uint8x8_t *d7ru8) { // q1 + uint8x8_t d19u8, d20u8, d21u8, d22u8, d23u8, d27u8, d28u8; + int16x8_t q12s16; + int8x8_t d19s8, d20s8, d21s8, d26s8, d27s8, d28s8; + + d19u8 = vabd_u8(d3u8, d4u8); + d20u8 = vabd_u8(d4u8, d5u8); + d21u8 = vabd_u8(d5u8, d6u8); + d22u8 = vabd_u8(d16u8, d7u8); + d3u8 = vabd_u8(d17u8, d16u8); + d4u8 = vabd_u8(d18u8, d17u8); + + d19u8 = vmax_u8(d19u8, d20u8); + d20u8 = vmax_u8(d21u8, d22u8); + d3u8 = vmax_u8(d3u8, d4u8); + d23u8 = vmax_u8(d19u8, d20u8); + + d17u8 = vabd_u8(d6u8, d7u8); + + d21u8 = vcgt_u8(d21u8, dthresh); + d22u8 = vcgt_u8(d22u8, dthresh); + d23u8 = vmax_u8(d23u8, d3u8); + + d28u8 = vabd_u8(d5u8, d16u8); + d17u8 = vqadd_u8(d17u8, d17u8); + + d23u8 = vcge_u8(dlimit, d23u8); + + d18u8 = vdup_n_u8(0x80); + d5u8 = veor_u8(d5u8, d18u8); + d6u8 = veor_u8(d6u8, d18u8); + d7u8 = veor_u8(d7u8, d18u8); + d16u8 = veor_u8(d16u8, d18u8); + + d28u8 = vshr_n_u8(d28u8, 1); + d17u8 = vqadd_u8(d17u8, d28u8); + + d19u8 = vdup_n_u8(3); + + d28s8 = vsub_s8(vreinterpret_s8_u8(d7u8), + vreinterpret_s8_u8(d6u8)); + + d17u8 = vcge_u8(dblimit, d17u8); + + d27s8 = vqsub_s8(vreinterpret_s8_u8(d5u8), + vreinterpret_s8_u8(d16u8)); + + d22u8 = vorr_u8(d21u8, d22u8); + + q12s16 = vmull_s8(d28s8, vreinterpret_s8_u8(d19u8)); + + d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d22u8); + d23u8 = vand_u8(d23u8, d17u8); + + q12s16 = vaddw_s8(q12s16, vreinterpret_s8_u8(d27u8)); + + d17u8 = vdup_n_u8(4); + + d27s8 = vqmovn_s16(q12s16); + d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d23u8); + d27s8 = vreinterpret_s8_u8(d27u8); + + d28s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d19u8)); + d27s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d17u8)); + d28s8 = vshr_n_s8(d28s8, 3); + d27s8 = vshr_n_s8(d27s8, 3); + + d19s8 = vqadd_s8(vreinterpret_s8_u8(d6u8), d28s8); + d26s8 = vqsub_s8(vreinterpret_s8_u8(d7u8), d27s8); + + d27s8 = vrshr_n_s8(d27s8, 1); + d27s8 = vbic_s8(d27s8, vreinterpret_s8_u8(d22u8)); + + d21s8 = vqadd_s8(vreinterpret_s8_u8(d5u8), d27s8); + d20s8 = vqsub_s8(vreinterpret_s8_u8(d16u8), d27s8); + + *d4ru8 = veor_u8(vreinterpret_u8_s8(d21s8), d18u8); + *d5ru8 = veor_u8(vreinterpret_u8_s8(d19s8), d18u8); + *d6ru8 = veor_u8(vreinterpret_u8_s8(d26s8), d18u8); + *d7ru8 = veor_u8(vreinterpret_u8_s8(d20s8), d18u8); + return; +} + +void vpx_lpf_horizontal_4_neon( + uint8_t *src, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + int i; + uint8_t *s, *psrc; + uint8x8_t dblimit, dlimit, dthresh; + uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8; + + dblimit = vld1_u8(blimit); + dlimit = vld1_u8(limit); + dthresh = vld1_u8(thresh); + + psrc = src - (pitch << 2); + for (i = 0; i < 1; i++) { + s = psrc + i * 8; + + d3u8 = vld1_u8(s); + s += pitch; + d4u8 = vld1_u8(s); + s += pitch; + d5u8 = vld1_u8(s); + s += pitch; + d6u8 = vld1_u8(s); + s += pitch; + d7u8 = vld1_u8(s); + s += pitch; + d16u8 = vld1_u8(s); + s += pitch; + d17u8 = vld1_u8(s); + s += pitch; + d18u8 = vld1_u8(s); + + loop_filter_neon(dblimit, dlimit, dthresh, + d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, + &d4u8, &d5u8, &d6u8, &d7u8); + + s -= (pitch * 5); + vst1_u8(s, d4u8); + s += pitch; + vst1_u8(s, d5u8); + s += pitch; + vst1_u8(s, d6u8); + s += pitch; + vst1_u8(s, d7u8); + } + return; +} + +void vpx_lpf_vertical_4_neon( + uint8_t *src, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + int i, pitch8; + uint8_t *s; + uint8x8_t dblimit, dlimit, dthresh; + uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8; + uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3; + uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7; + uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; + uint8x8x4_t d4Result; + + dblimit = vld1_u8(blimit); + dlimit = vld1_u8(limit); + dthresh = vld1_u8(thresh); + + pitch8 = pitch * 8; + for (i = 0; i < 1; i++, src += pitch8) { + s = src - (i + 1) * 4; + + d3u8 = vld1_u8(s); + s += pitch; + d4u8 = vld1_u8(s); + s += pitch; + d5u8 = vld1_u8(s); + s += pitch; + d6u8 = vld1_u8(s); + s += pitch; + d7u8 = vld1_u8(s); + s += pitch; + d16u8 = vld1_u8(s); + s += pitch; + d17u8 = vld1_u8(s); + s += pitch; + d18u8 = vld1_u8(s); + + d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), + vreinterpret_u32_u8(d7u8)); + d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), + vreinterpret_u32_u8(d16u8)); + d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), + vreinterpret_u32_u8(d17u8)); + d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), + vreinterpret_u32_u8(d18u8)); + + d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]), + vreinterpret_u16_u32(d2tmp2.val[0])); + d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]), + vreinterpret_u16_u32(d2tmp3.val[0])); + d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]), + vreinterpret_u16_u32(d2tmp2.val[1])); + d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]), + vreinterpret_u16_u32(d2tmp3.val[1])); + + d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]), + vreinterpret_u8_u16(d2tmp5.val[0])); + d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]), + vreinterpret_u8_u16(d2tmp5.val[1])); + d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]), + vreinterpret_u8_u16(d2tmp7.val[0])); + d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]), + vreinterpret_u8_u16(d2tmp7.val[1])); + + d3u8 = d2tmp8.val[0]; + d4u8 = d2tmp8.val[1]; + d5u8 = d2tmp9.val[0]; + d6u8 = d2tmp9.val[1]; + d7u8 = d2tmp10.val[0]; + d16u8 = d2tmp10.val[1]; + d17u8 = d2tmp11.val[0]; + d18u8 = d2tmp11.val[1]; + + loop_filter_neon(dblimit, dlimit, dthresh, + d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, + &d4u8, &d5u8, &d6u8, &d7u8); + + d4Result.val[0] = d4u8; + d4Result.val[1] = d5u8; + d4Result.val[2] = d6u8; + d4Result.val[3] = d7u8; + + src -= 2; + vst4_lane_u8(src, d4Result, 0); + src += pitch; + vst4_lane_u8(src, d4Result, 1); + src += pitch; + vst4_lane_u8(src, d4Result, 2); + src += pitch; + vst4_lane_u8(src, d4Result, 3); + src += pitch; + vst4_lane_u8(src, d4Result, 4); + src += pitch; + vst4_lane_u8(src, d4Result, 5); + src += pitch; + vst4_lane_u8(src, d4Result, 6); + src += pitch; + vst4_lane_u8(src, d4Result, 7); + } + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c new file mode 100644 index 00000000000..ec3757380d5 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c @@ -0,0 +1,445 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" + +static INLINE void mbloop_filter_neon( + uint8x8_t dblimit, // mblimit + uint8x8_t dlimit, // limit + uint8x8_t dthresh, // thresh + uint8x8_t d3u8, // p2 + uint8x8_t d4u8, // p2 + uint8x8_t d5u8, // p1 + uint8x8_t d6u8, // p0 + uint8x8_t d7u8, // q0 + uint8x8_t d16u8, // q1 + uint8x8_t d17u8, // q2 + uint8x8_t d18u8, // q3 + uint8x8_t *d0ru8, // p1 + uint8x8_t *d1ru8, // p1 + uint8x8_t *d2ru8, // p0 + uint8x8_t *d3ru8, // q0 + uint8x8_t *d4ru8, // q1 + uint8x8_t *d5ru8) { // q1 + uint32_t flat; + uint8x8_t d0u8, d1u8, d2u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8; + uint8x8_t d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8; + int16x8_t q15s16; + uint16x8_t q10u16, q14u16; + int8x8_t d21s8, d24s8, d25s8, d26s8, d28s8, d29s8, d30s8; + + d19u8 = vabd_u8(d3u8, d4u8); + d20u8 = vabd_u8(d4u8, d5u8); + d21u8 = vabd_u8(d5u8, d6u8); + d22u8 = vabd_u8(d16u8, d7u8); + d23u8 = vabd_u8(d17u8, d16u8); + d24u8 = vabd_u8(d18u8, d17u8); + + d19u8 = vmax_u8(d19u8, d20u8); + d20u8 = vmax_u8(d21u8, d22u8); + + d25u8 = vabd_u8(d6u8, d4u8); + + d23u8 = vmax_u8(d23u8, d24u8); + + d26u8 = vabd_u8(d7u8, d17u8); + + d19u8 = vmax_u8(d19u8, d20u8); + + d24u8 = vabd_u8(d6u8, d7u8); + d27u8 = vabd_u8(d3u8, d6u8); + d28u8 = vabd_u8(d18u8, d7u8); + + d19u8 = vmax_u8(d19u8, d23u8); + + d23u8 = vabd_u8(d5u8, d16u8); + d24u8 = vqadd_u8(d24u8, d24u8); + + + d19u8 = vcge_u8(dlimit, d19u8); + + + d25u8 = vmax_u8(d25u8, d26u8); + d26u8 = vmax_u8(d27u8, d28u8); + + d23u8 = vshr_n_u8(d23u8, 1); + + d25u8 = vmax_u8(d25u8, d26u8); + + d24u8 = vqadd_u8(d24u8, d23u8); + + d20u8 = vmax_u8(d20u8, d25u8); + + d23u8 = vdup_n_u8(1); + d24u8 = vcge_u8(dblimit, d24u8); + + d21u8 = vcgt_u8(d21u8, dthresh); + + d20u8 = vcge_u8(d23u8, d20u8); + + d19u8 = vand_u8(d19u8, d24u8); + + d23u8 = vcgt_u8(d22u8, dthresh); + + d20u8 = vand_u8(d20u8, d19u8); + + d22u8 = vdup_n_u8(0x80); + + d23u8 = vorr_u8(d21u8, d23u8); + + q10u16 = vcombine_u16(vreinterpret_u16_u8(d20u8), + vreinterpret_u16_u8(d21u8)); + + d30u8 = vshrn_n_u16(q10u16, 4); + flat = vget_lane_u32(vreinterpret_u32_u8(d30u8), 0); + + if (flat == 0xffffffff) { // Check for all 1's, power_branch_only + d27u8 = vdup_n_u8(3); + d21u8 = vdup_n_u8(2); + q14u16 = vaddl_u8(d6u8, d7u8); + q14u16 = vmlal_u8(q14u16, d3u8, d27u8); + q14u16 = vmlal_u8(q14u16, d4u8, d21u8); + q14u16 = vaddw_u8(q14u16, d5u8); + *d0ru8 = vqrshrn_n_u16(q14u16, 3); + + q14u16 = vsubw_u8(q14u16, d3u8); + q14u16 = vsubw_u8(q14u16, d4u8); + q14u16 = vaddw_u8(q14u16, d5u8); + q14u16 = vaddw_u8(q14u16, d16u8); + *d1ru8 = vqrshrn_n_u16(q14u16, 3); + + q14u16 = vsubw_u8(q14u16, d3u8); + q14u16 = vsubw_u8(q14u16, d5u8); + q14u16 = vaddw_u8(q14u16, d6u8); + q14u16 = vaddw_u8(q14u16, d17u8); + *d2ru8 = vqrshrn_n_u16(q14u16, 3); + + q14u16 = vsubw_u8(q14u16, d3u8); + q14u16 = vsubw_u8(q14u16, d6u8); + q14u16 = vaddw_u8(q14u16, d7u8); + q14u16 = vaddw_u8(q14u16, d18u8); + *d3ru8 = vqrshrn_n_u16(q14u16, 3); + + q14u16 = vsubw_u8(q14u16, d4u8); + q14u16 = vsubw_u8(q14u16, d7u8); + q14u16 = vaddw_u8(q14u16, d16u8); + q14u16 = vaddw_u8(q14u16, d18u8); + *d4ru8 = vqrshrn_n_u16(q14u16, 3); + + q14u16 = vsubw_u8(q14u16, d5u8); + q14u16 = vsubw_u8(q14u16, d16u8); + q14u16 = vaddw_u8(q14u16, d17u8); + q14u16 = vaddw_u8(q14u16, d18u8); + *d5ru8 = vqrshrn_n_u16(q14u16, 3); + } else { + d21u8 = veor_u8(d7u8, d22u8); + d24u8 = veor_u8(d6u8, d22u8); + d25u8 = veor_u8(d5u8, d22u8); + d26u8 = veor_u8(d16u8, d22u8); + + d27u8 = vdup_n_u8(3); + + d28s8 = vsub_s8(vreinterpret_s8_u8(d21u8), vreinterpret_s8_u8(d24u8)); + d29s8 = vqsub_s8(vreinterpret_s8_u8(d25u8), vreinterpret_s8_u8(d26u8)); + + q15s16 = vmull_s8(d28s8, vreinterpret_s8_u8(d27u8)); + + d29s8 = vand_s8(d29s8, vreinterpret_s8_u8(d23u8)); + + q15s16 = vaddw_s8(q15s16, d29s8); + + d29u8 = vdup_n_u8(4); + + d28s8 = vqmovn_s16(q15s16); + + d28s8 = vand_s8(d28s8, vreinterpret_s8_u8(d19u8)); + + d30s8 = vqadd_s8(d28s8, vreinterpret_s8_u8(d27u8)); + d29s8 = vqadd_s8(d28s8, vreinterpret_s8_u8(d29u8)); + d30s8 = vshr_n_s8(d30s8, 3); + d29s8 = vshr_n_s8(d29s8, 3); + + d24s8 = vqadd_s8(vreinterpret_s8_u8(d24u8), d30s8); + d21s8 = vqsub_s8(vreinterpret_s8_u8(d21u8), d29s8); + + d29s8 = vrshr_n_s8(d29s8, 1); + d29s8 = vbic_s8(d29s8, vreinterpret_s8_u8(d23u8)); + + d25s8 = vqadd_s8(vreinterpret_s8_u8(d25u8), d29s8); + d26s8 = vqsub_s8(vreinterpret_s8_u8(d26u8), d29s8); + + if (flat == 0) { // filter_branch_only + *d0ru8 = d4u8; + *d1ru8 = veor_u8(vreinterpret_u8_s8(d25s8), d22u8); + *d2ru8 = veor_u8(vreinterpret_u8_s8(d24s8), d22u8); + *d3ru8 = veor_u8(vreinterpret_u8_s8(d21s8), d22u8); + *d4ru8 = veor_u8(vreinterpret_u8_s8(d26s8), d22u8); + *d5ru8 = d17u8; + return; + } + + d21u8 = veor_u8(vreinterpret_u8_s8(d21s8), d22u8); + d24u8 = veor_u8(vreinterpret_u8_s8(d24s8), d22u8); + d25u8 = veor_u8(vreinterpret_u8_s8(d25s8), d22u8); + d26u8 = veor_u8(vreinterpret_u8_s8(d26s8), d22u8); + + d23u8 = vdup_n_u8(2); + q14u16 = vaddl_u8(d6u8, d7u8); + q14u16 = vmlal_u8(q14u16, d3u8, d27u8); + q14u16 = vmlal_u8(q14u16, d4u8, d23u8); + + d0u8 = vbsl_u8(d20u8, dblimit, d4u8); + + q14u16 = vaddw_u8(q14u16, d5u8); + + d1u8 = vbsl_u8(d20u8, dlimit, d25u8); + + d30u8 = vqrshrn_n_u16(q14u16, 3); + + q14u16 = vsubw_u8(q14u16, d3u8); + q14u16 = vsubw_u8(q14u16, d4u8); + q14u16 = vaddw_u8(q14u16, d5u8); + q14u16 = vaddw_u8(q14u16, d16u8); + + d2u8 = vbsl_u8(d20u8, dthresh, d24u8); + + d31u8 = vqrshrn_n_u16(q14u16, 3); + + q14u16 = vsubw_u8(q14u16, d3u8); + q14u16 = vsubw_u8(q14u16, d5u8); + q14u16 = vaddw_u8(q14u16, d6u8); + q14u16 = vaddw_u8(q14u16, d17u8); + + *d0ru8 = vbsl_u8(d20u8, d30u8, d0u8); + + d23u8 = vqrshrn_n_u16(q14u16, 3); + + q14u16 = vsubw_u8(q14u16, d3u8); + q14u16 = vsubw_u8(q14u16, d6u8); + q14u16 = vaddw_u8(q14u16, d7u8); + + *d1ru8 = vbsl_u8(d20u8, d31u8, d1u8); + + q14u16 = vaddw_u8(q14u16, d18u8); + + *d2ru8 = vbsl_u8(d20u8, d23u8, d2u8); + + d22u8 = vqrshrn_n_u16(q14u16, 3); + + q14u16 = vsubw_u8(q14u16, d4u8); + q14u16 = vsubw_u8(q14u16, d7u8); + q14u16 = vaddw_u8(q14u16, d16u8); + + d3u8 = vbsl_u8(d20u8, d3u8, d21u8); + + q14u16 = vaddw_u8(q14u16, d18u8); + + d4u8 = vbsl_u8(d20u8, d4u8, d26u8); + + d6u8 = vqrshrn_n_u16(q14u16, 3); + + q14u16 = vsubw_u8(q14u16, d5u8); + q14u16 = vsubw_u8(q14u16, d16u8); + q14u16 = vaddw_u8(q14u16, d17u8); + q14u16 = vaddw_u8(q14u16, d18u8); + + d5u8 = vbsl_u8(d20u8, d5u8, d17u8); + + d7u8 = vqrshrn_n_u16(q14u16, 3); + + *d3ru8 = vbsl_u8(d20u8, d22u8, d3u8); + *d4ru8 = vbsl_u8(d20u8, d6u8, d4u8); + *d5ru8 = vbsl_u8(d20u8, d7u8, d5u8); + } + return; +} + +void vpx_lpf_horizontal_8_neon( + uint8_t *src, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + int i; + uint8_t *s, *psrc; + uint8x8_t dblimit, dlimit, dthresh; + uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; + uint8x8_t d16u8, d17u8, d18u8; + + dblimit = vld1_u8(blimit); + dlimit = vld1_u8(limit); + dthresh = vld1_u8(thresh); + + psrc = src - (pitch << 2); + for (i = 0; i < 1; i++) { + s = psrc + i * 8; + + d3u8 = vld1_u8(s); + s += pitch; + d4u8 = vld1_u8(s); + s += pitch; + d5u8 = vld1_u8(s); + s += pitch; + d6u8 = vld1_u8(s); + s += pitch; + d7u8 = vld1_u8(s); + s += pitch; + d16u8 = vld1_u8(s); + s += pitch; + d17u8 = vld1_u8(s); + s += pitch; + d18u8 = vld1_u8(s); + + mbloop_filter_neon(dblimit, dlimit, dthresh, + d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, + &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8); + + s -= (pitch * 6); + vst1_u8(s, d0u8); + s += pitch; + vst1_u8(s, d1u8); + s += pitch; + vst1_u8(s, d2u8); + s += pitch; + vst1_u8(s, d3u8); + s += pitch; + vst1_u8(s, d4u8); + s += pitch; + vst1_u8(s, d5u8); + } + return; +} + +void vpx_lpf_vertical_8_neon( + uint8_t *src, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + int i; + uint8_t *s; + uint8x8_t dblimit, dlimit, dthresh; + uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; + uint8x8_t d16u8, d17u8, d18u8; + uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3; + uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7; + uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; + uint8x8x4_t d4Result; + uint8x8x2_t d2Result; + + dblimit = vld1_u8(blimit); + dlimit = vld1_u8(limit); + dthresh = vld1_u8(thresh); + + for (i = 0; i < 1; i++) { + s = src + (i * (pitch << 3)) - 4; + + d3u8 = vld1_u8(s); + s += pitch; + d4u8 = vld1_u8(s); + s += pitch; + d5u8 = vld1_u8(s); + s += pitch; + d6u8 = vld1_u8(s); + s += pitch; + d7u8 = vld1_u8(s); + s += pitch; + d16u8 = vld1_u8(s); + s += pitch; + d17u8 = vld1_u8(s); + s += pitch; + d18u8 = vld1_u8(s); + + d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), + vreinterpret_u32_u8(d7u8)); + d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), + vreinterpret_u32_u8(d16u8)); + d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), + vreinterpret_u32_u8(d17u8)); + d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), + vreinterpret_u32_u8(d18u8)); + + d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]), + vreinterpret_u16_u32(d2tmp2.val[0])); + d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]), + vreinterpret_u16_u32(d2tmp3.val[0])); + d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]), + vreinterpret_u16_u32(d2tmp2.val[1])); + d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]), + vreinterpret_u16_u32(d2tmp3.val[1])); + + d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]), + vreinterpret_u8_u16(d2tmp5.val[0])); + d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]), + vreinterpret_u8_u16(d2tmp5.val[1])); + d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]), + vreinterpret_u8_u16(d2tmp7.val[0])); + d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]), + vreinterpret_u8_u16(d2tmp7.val[1])); + + d3u8 = d2tmp8.val[0]; + d4u8 = d2tmp8.val[1]; + d5u8 = d2tmp9.val[0]; + d6u8 = d2tmp9.val[1]; + d7u8 = d2tmp10.val[0]; + d16u8 = d2tmp10.val[1]; + d17u8 = d2tmp11.val[0]; + d18u8 = d2tmp11.val[1]; + + mbloop_filter_neon(dblimit, dlimit, dthresh, + d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, + &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8); + + d4Result.val[0] = d0u8; + d4Result.val[1] = d1u8; + d4Result.val[2] = d2u8; + d4Result.val[3] = d3u8; + + d2Result.val[0] = d4u8; + d2Result.val[1] = d5u8; + + s = src - 3; + vst4_lane_u8(s, d4Result, 0); + s += pitch; + vst4_lane_u8(s, d4Result, 1); + s += pitch; + vst4_lane_u8(s, d4Result, 2); + s += pitch; + vst4_lane_u8(s, d4Result, 3); + s += pitch; + vst4_lane_u8(s, d4Result, 4); + s += pitch; + vst4_lane_u8(s, d4Result, 5); + s += pitch; + vst4_lane_u8(s, d4Result, 6); + s += pitch; + vst4_lane_u8(s, d4Result, 7); + + s = src + 1; + vst2_lane_u8(s, d2Result, 0); + s += pitch; + vst2_lane_u8(s, d2Result, 1); + s += pitch; + vst2_lane_u8(s, d2Result, 2); + s += pitch; + vst2_lane_u8(s, d2Result, 3); + s += pitch; + vst2_lane_u8(s, d2Result, 4); + s += pitch; + vst2_lane_u8(s, d2Result, 5); + s += pitch; + vst2_lane_u8(s, d2Result, 6); + s += pitch; + vst2_lane_u8(s, d2Result, 7); + } + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm new file mode 100644 index 00000000000..d5da7a8409c --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm @@ -0,0 +1,635 @@ +; +; Copyright (c) 2013 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + EXPORT |vpx_lpf_horizontal_edge_8_neon| + EXPORT |vpx_lpf_horizontal_edge_16_neon| + EXPORT |vpx_lpf_vertical_16_neon| + ARM + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; void mb_lpf_horizontal_edge(uint8_t *s, int p, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh, +; int count) +; r0 uint8_t *s, +; r1 int p, /* pitch */ +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh, +; r12 int count +|mb_lpf_horizontal_edge| PROC + push {r4-r8, lr} + vpush {d8-d15} + ldr r4, [sp, #88] ; load thresh + +h_count + vld1.8 {d16[]}, [r2] ; load *blimit + vld1.8 {d17[]}, [r3] ; load *limit + vld1.8 {d18[]}, [r4] ; load *thresh + + sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines + + vld1.u8 {d0}, [r8@64], r1 ; p7 + vld1.u8 {d1}, [r8@64], r1 ; p6 + vld1.u8 {d2}, [r8@64], r1 ; p5 + vld1.u8 {d3}, [r8@64], r1 ; p4 + vld1.u8 {d4}, [r8@64], r1 ; p3 + vld1.u8 {d5}, [r8@64], r1 ; p2 + vld1.u8 {d6}, [r8@64], r1 ; p1 + vld1.u8 {d7}, [r8@64], r1 ; p0 + vld1.u8 {d8}, [r8@64], r1 ; q0 + vld1.u8 {d9}, [r8@64], r1 ; q1 + vld1.u8 {d10}, [r8@64], r1 ; q2 + vld1.u8 {d11}, [r8@64], r1 ; q3 + vld1.u8 {d12}, [r8@64], r1 ; q4 + vld1.u8 {d13}, [r8@64], r1 ; q5 + vld1.u8 {d14}, [r8@64], r1 ; q6 + vld1.u8 {d15}, [r8@64], r1 ; q7 + + bl vpx_wide_mbfilter_neon + + tst r7, #1 + beq h_mbfilter + + ; flat && mask were not set for any of the channels. Just store the values + ; from filter. + sub r8, r0, r1, lsl #1 + + vst1.u8 {d25}, [r8@64], r1 ; store op1 + vst1.u8 {d24}, [r8@64], r1 ; store op0 + vst1.u8 {d23}, [r8@64], r1 ; store oq0 + vst1.u8 {d26}, [r8@64], r1 ; store oq1 + + b h_next + +h_mbfilter + tst r7, #2 + beq h_wide_mbfilter + + ; flat2 was not set for any of the channels. Just store the values from + ; mbfilter. + sub r8, r0, r1, lsl #1 + sub r8, r8, r1 + + vst1.u8 {d18}, [r8@64], r1 ; store op2 + vst1.u8 {d19}, [r8@64], r1 ; store op1 + vst1.u8 {d20}, [r8@64], r1 ; store op0 + vst1.u8 {d21}, [r8@64], r1 ; store oq0 + vst1.u8 {d22}, [r8@64], r1 ; store oq1 + vst1.u8 {d23}, [r8@64], r1 ; store oq2 + + b h_next + +h_wide_mbfilter + sub r8, r0, r1, lsl #3 + add r8, r8, r1 + + vst1.u8 {d16}, [r8@64], r1 ; store op6 + vst1.u8 {d24}, [r8@64], r1 ; store op5 + vst1.u8 {d25}, [r8@64], r1 ; store op4 + vst1.u8 {d26}, [r8@64], r1 ; store op3 + vst1.u8 {d27}, [r8@64], r1 ; store op2 + vst1.u8 {d18}, [r8@64], r1 ; store op1 + vst1.u8 {d19}, [r8@64], r1 ; store op0 + vst1.u8 {d20}, [r8@64], r1 ; store oq0 + vst1.u8 {d21}, [r8@64], r1 ; store oq1 + vst1.u8 {d22}, [r8@64], r1 ; store oq2 + vst1.u8 {d23}, [r8@64], r1 ; store oq3 + vst1.u8 {d1}, [r8@64], r1 ; store oq4 + vst1.u8 {d2}, [r8@64], r1 ; store oq5 + vst1.u8 {d3}, [r8@64], r1 ; store oq6 + +h_next + add r0, r0, #8 + subs r12, r12, #1 + bne h_count + + vpop {d8-d15} + pop {r4-r8, pc} + + ENDP ; |mb_lpf_horizontal_edge| + +; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh) +; r0 uint8_t *s, +; r1 int pitch, +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh +|vpx_lpf_horizontal_edge_8_neon| PROC + mov r12, #1 + b mb_lpf_horizontal_edge + ENDP ; |vpx_lpf_horizontal_edge_8_neon| + +; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh) +; r0 uint8_t *s, +; r1 int pitch, +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh +|vpx_lpf_horizontal_edge_16_neon| PROC + mov r12, #2 + b mb_lpf_horizontal_edge + ENDP ; |vpx_lpf_horizontal_edge_16_neon| + +; void vpx_lpf_vertical_16_neon(uint8_t *s, int p, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh) +; r0 uint8_t *s, +; r1 int p, /* pitch */ +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh, +|vpx_lpf_vertical_16_neon| PROC + push {r4-r8, lr} + vpush {d8-d15} + ldr r4, [sp, #88] ; load thresh + + vld1.8 {d16[]}, [r2] ; load *blimit + vld1.8 {d17[]}, [r3] ; load *limit + vld1.8 {d18[]}, [r4] ; load *thresh + + sub r8, r0, #8 + + vld1.8 {d0}, [r8@64], r1 + vld1.8 {d8}, [r0@64], r1 + vld1.8 {d1}, [r8@64], r1 + vld1.8 {d9}, [r0@64], r1 + vld1.8 {d2}, [r8@64], r1 + vld1.8 {d10}, [r0@64], r1 + vld1.8 {d3}, [r8@64], r1 + vld1.8 {d11}, [r0@64], r1 + vld1.8 {d4}, [r8@64], r1 + vld1.8 {d12}, [r0@64], r1 + vld1.8 {d5}, [r8@64], r1 + vld1.8 {d13}, [r0@64], r1 + vld1.8 {d6}, [r8@64], r1 + vld1.8 {d14}, [r0@64], r1 + vld1.8 {d7}, [r8@64], r1 + vld1.8 {d15}, [r0@64], r1 + + sub r0, r0, r1, lsl #3 + + vtrn.32 q0, q2 + vtrn.32 q1, q3 + vtrn.32 q4, q6 + vtrn.32 q5, q7 + + vtrn.16 q0, q1 + vtrn.16 q2, q3 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + + vtrn.8 d0, d1 + vtrn.8 d2, d3 + vtrn.8 d4, d5 + vtrn.8 d6, d7 + + vtrn.8 d8, d9 + vtrn.8 d10, d11 + vtrn.8 d12, d13 + vtrn.8 d14, d15 + + bl vpx_wide_mbfilter_neon + + tst r7, #1 + beq v_mbfilter + + ; flat && mask were not set for any of the channels. Just store the values + ; from filter. + sub r8, r0, #2 + + vswp d23, d25 + + vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 + vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 + vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 + vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 + vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 + vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 + vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 + vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 + + b v_end + +v_mbfilter + tst r7, #2 + beq v_wide_mbfilter + + ; flat2 was not set for any of the channels. Just store the values from + ; mbfilter. + sub r8, r0, #3 + + vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 + vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 + vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 + vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 + vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 + vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 + vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 + vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 + vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 + vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 + vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 + vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 + vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 + vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 + vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 + vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 + + b v_end + +v_wide_mbfilter + sub r8, r0, #8 + + vtrn.32 d0, d26 + vtrn.32 d16, d27 + vtrn.32 d24, d18 + vtrn.32 d25, d19 + + vtrn.16 d0, d24 + vtrn.16 d16, d25 + vtrn.16 d26, d18 + vtrn.16 d27, d19 + + vtrn.8 d0, d16 + vtrn.8 d24, d25 + vtrn.8 d26, d27 + vtrn.8 d18, d19 + + vtrn.32 d20, d1 + vtrn.32 d21, d2 + vtrn.32 d22, d3 + vtrn.32 d23, d15 + + vtrn.16 d20, d22 + vtrn.16 d21, d23 + vtrn.16 d1, d3 + vtrn.16 d2, d15 + + vtrn.8 d20, d21 + vtrn.8 d22, d23 + vtrn.8 d1, d2 + vtrn.8 d3, d15 + + vst1.8 {d0}, [r8@64], r1 + vst1.8 {d20}, [r0@64], r1 + vst1.8 {d16}, [r8@64], r1 + vst1.8 {d21}, [r0@64], r1 + vst1.8 {d24}, [r8@64], r1 + vst1.8 {d22}, [r0@64], r1 + vst1.8 {d25}, [r8@64], r1 + vst1.8 {d23}, [r0@64], r1 + vst1.8 {d26}, [r8@64], r1 + vst1.8 {d1}, [r0@64], r1 + vst1.8 {d27}, [r8@64], r1 + vst1.8 {d2}, [r0@64], r1 + vst1.8 {d18}, [r8@64], r1 + vst1.8 {d3}, [r0@64], r1 + vst1.8 {d19}, [r8@64], r1 + vst1.8 {d15}, [r0@64], r1 + +v_end + vpop {d8-d15} + pop {r4-r8, pc} + + ENDP ; |vpx_lpf_vertical_16_neon| + +; void vpx_wide_mbfilter_neon(); +; This is a helper function for the loopfilters. The invidual functions do the +; necessary load, transpose (if necessary) and store. +; +; r0-r3 PRESERVE +; d16 blimit +; d17 limit +; d18 thresh +; d0 p7 +; d1 p6 +; d2 p5 +; d3 p4 +; d4 p3 +; d5 p2 +; d6 p1 +; d7 p0 +; d8 q0 +; d9 q1 +; d10 q2 +; d11 q3 +; d12 q4 +; d13 q5 +; d14 q6 +; d15 q7 +|vpx_wide_mbfilter_neon| PROC + mov r7, #0 + + ; filter_mask + vabd.u8 d19, d4, d5 ; abs(p3 - p2) + vabd.u8 d20, d5, d6 ; abs(p2 - p1) + vabd.u8 d21, d6, d7 ; abs(p1 - p0) + vabd.u8 d22, d9, d8 ; abs(q1 - q0) + vabd.u8 d23, d10, d9 ; abs(q2 - q1) + vabd.u8 d24, d11, d10 ; abs(q3 - q2) + + ; only compare the largest value to limit + vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1)) + vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0)) + vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2)) + vmax.u8 d19, d19, d20 + + vabd.u8 d24, d7, d8 ; abs(p0 - q0) + + vmax.u8 d19, d19, d23 + + vabd.u8 d23, d6, d9 ; a = abs(p1 - q1) + vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2 + + ; abs () > limit + vcge.u8 d19, d17, d19 + + ; flatmask4 + vabd.u8 d25, d7, d5 ; abs(p0 - p2) + vabd.u8 d26, d8, d10 ; abs(q0 - q2) + vabd.u8 d27, d4, d7 ; abs(p3 - p0) + vabd.u8 d28, d11, d8 ; abs(q3 - q0) + + ; only compare the largest value to thresh + vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2)) + vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0)) + vmax.u8 d25, d25, d26 + vmax.u8 d20, d20, d25 + + vshr.u8 d23, d23, #1 ; a = a / 2 + vqadd.u8 d24, d24, d23 ; a = b + a + + vmov.u8 d30, #1 + vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1 + + vcge.u8 d20, d30, d20 ; flat + + vand d19, d19, d24 ; mask + + ; hevmask + vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1 + vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1 + vorr d21, d21, d22 ; hev + + vand d16, d20, d19 ; flat && mask + vmov r5, r6, d16 + + ; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) + vabd.u8 d22, d3, d7 ; abs(p4 - p0) + vabd.u8 d23, d12, d8 ; abs(q4 - q0) + vabd.u8 d24, d7, d2 ; abs(p0 - p5) + vabd.u8 d25, d8, d13 ; abs(q0 - q5) + vabd.u8 d26, d1, d7 ; abs(p6 - p0) + vabd.u8 d27, d14, d8 ; abs(q6 - q0) + vabd.u8 d28, d0, d7 ; abs(p7 - p0) + vabd.u8 d29, d15, d8 ; abs(q7 - q0) + + ; only compare the largest value to thresh + vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0)) + vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5)) + vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0)) + vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0)) + + vmax.u8 d26, d22, d23 + vmax.u8 d27, d24, d25 + vmax.u8 d23, d26, d27 + + vcge.u8 d18, d30, d23 ; flat2 + + vmov.u8 d22, #0x80 + + orrs r5, r5, r6 ; Check for 0 + orreq r7, r7, #1 ; Only do filter branch + + vand d17, d18, d16 ; flat2 && flat && mask + vmov r5, r6, d17 + + ; mbfilter() function + + ; filter() function + ; convert to signed + veor d23, d8, d22 ; qs0 + veor d24, d7, d22 ; ps0 + veor d25, d6, d22 ; ps1 + veor d26, d9, d22 ; qs1 + + vmov.u8 d27, #3 + + vsub.s8 d28, d23, d24 ; ( qs0 - ps0) + vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1) + vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0) + vand d29, d29, d21 ; filter &= hev + vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0) + vmov.u8 d29, #4 + + ; filter = clamp(filter + 3 * ( qs0 - ps0)) + vqmovn.s16 d28, q15 + + vand d28, d28, d19 ; filter &= mask + + vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3) + vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4) + vshr.s8 d30, d30, #3 ; filter2 >>= 3 + vshr.s8 d29, d29, #3 ; filter1 >>= 3 + + + vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2) + vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1) + + ; outer tap adjustments: ++filter1 >> 1 + vrshr.s8 d29, d29, #1 + vbic d29, d29, d21 ; filter &= ~hev + + vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter) + vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter) + + veor d24, d24, d22 ; *f_op0 = u^0x80 + veor d23, d23, d22 ; *f_oq0 = u^0x80 + veor d25, d25, d22 ; *f_op1 = u^0x80 + veor d26, d26, d22 ; *f_oq1 = u^0x80 + + tst r7, #1 + bxne lr + + orrs r5, r5, r6 ; Check for 0 + orreq r7, r7, #2 ; Only do mbfilter branch + + ; mbfilter flat && mask branch + ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's + ; and using vibt on the q's? + vmov.u8 d29, #2 + vaddl.u8 q15, d7, d8 ; op2 = p0 + q0 + vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3 + vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2 + vaddl.u8 q10, d4, d5 + vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2 + vaddl.u8 q14, d6, d9 + vqrshrn.u16 d18, q15, #3 ; r_op2 + + vsub.i16 q15, q10 + vaddl.u8 q10, d4, d6 + vadd.i16 q15, q14 + vaddl.u8 q14, d7, d10 + vqrshrn.u16 d19, q15, #3 ; r_op1 + + vsub.i16 q15, q10 + vadd.i16 q15, q14 + vaddl.u8 q14, d8, d11 + vqrshrn.u16 d20, q15, #3 ; r_op0 + + vsubw.u8 q15, d4 ; oq0 = op0 - p3 + vsubw.u8 q15, d7 ; oq0 -= p0 + vadd.i16 q15, q14 + vaddl.u8 q14, d9, d11 + vqrshrn.u16 d21, q15, #3 ; r_oq0 + + vsubw.u8 q15, d5 ; oq1 = oq0 - p2 + vsubw.u8 q15, d8 ; oq1 -= q0 + vadd.i16 q15, q14 + vaddl.u8 q14, d10, d11 + vqrshrn.u16 d22, q15, #3 ; r_oq1 + + vsubw.u8 q15, d6 ; oq2 = oq0 - p1 + vsubw.u8 q15, d9 ; oq2 -= q1 + vadd.i16 q15, q14 + vqrshrn.u16 d27, q15, #3 ; r_oq2 + + ; Filter does not set op2 or oq2, so use p2 and q2. + vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask) + vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask) + vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask) + vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask) + vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask) + + vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask) + vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask) + + tst r7, #2 + bxne lr + + ; wide_mbfilter flat2 && flat && mask branch + vmov.u8 d16, #7 + vaddl.u8 q15, d7, d8 ; op6 = p0 + q0 + vaddl.u8 q12, d2, d3 + vaddl.u8 q13, d4, d5 + vaddl.u8 q14, d1, d6 + vmlal.u8 q15, d0, d16 ; op6 += p7 * 3 + vadd.i16 q12, q13 + vadd.i16 q15, q14 + vaddl.u8 q14, d2, d9 + vadd.i16 q15, q12 + vaddl.u8 q12, d0, d1 + vaddw.u8 q15, d1 + vaddl.u8 q13, d0, d2 + vadd.i16 q14, q15, q14 + vqrshrn.u16 d16, q15, #4 ; w_op6 + + vsub.i16 q15, q14, q12 + vaddl.u8 q14, d3, d10 + vqrshrn.u16 d24, q15, #4 ; w_op5 + + vsub.i16 q15, q13 + vaddl.u8 q13, d0, d3 + vadd.i16 q15, q14 + vaddl.u8 q14, d4, d11 + vqrshrn.u16 d25, q15, #4 ; w_op4 + + vadd.i16 q15, q14 + vaddl.u8 q14, d0, d4 + vsub.i16 q15, q13 + vsub.i16 q14, q15, q14 + vqrshrn.u16 d26, q15, #4 ; w_op3 + + vaddw.u8 q15, q14, d5 ; op2 += p2 + vaddl.u8 q14, d0, d5 + vaddw.u8 q15, d12 ; op2 += q4 + vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m) + vqrshrn.u16 d27, q15, #4 ; w_op2 + + vsub.i16 q15, q14 + vaddl.u8 q14, d0, d6 + vaddw.u8 q15, d6 ; op1 += p1 + vaddw.u8 q15, d13 ; op1 += q5 + vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m) + vqrshrn.u16 d18, q15, #4 ; w_op1 + + vsub.i16 q15, q14 + vaddl.u8 q14, d0, d7 + vaddw.u8 q15, d7 ; op0 += p0 + vaddw.u8 q15, d14 ; op0 += q6 + vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m) + vqrshrn.u16 d19, q15, #4 ; w_op0 + + vsub.i16 q15, q14 + vaddl.u8 q14, d1, d8 + vaddw.u8 q15, d8 ; oq0 += q0 + vaddw.u8 q15, d15 ; oq0 += q7 + vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m) + vqrshrn.u16 d20, q15, #4 ; w_oq0 + + vsub.i16 q15, q14 + vaddl.u8 q14, d2, d9 + vaddw.u8 q15, d9 ; oq1 += q1 + vaddl.u8 q4, d10, d15 + vaddw.u8 q15, d15 ; oq1 += q7 + vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m) + vqrshrn.u16 d21, q15, #4 ; w_oq1 + + vsub.i16 q15, q14 + vaddl.u8 q14, d3, d10 + vadd.i16 q15, q4 + vaddl.u8 q4, d11, d15 + vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m) + vqrshrn.u16 d22, q15, #4 ; w_oq2 + + vsub.i16 q15, q14 + vaddl.u8 q14, d4, d11 + vadd.i16 q15, q4 + vaddl.u8 q4, d12, d15 + vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m) + vqrshrn.u16 d23, q15, #4 ; w_oq3 + + vsub.i16 q15, q14 + vaddl.u8 q14, d5, d12 + vadd.i16 q15, q4 + vaddl.u8 q4, d13, d15 + vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m) + vqrshrn.u16 d1, q15, #4 ; w_oq4 + + vsub.i16 q15, q14 + vaddl.u8 q14, d6, d13 + vadd.i16 q15, q4 + vaddl.u8 q4, d14, d15 + vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m) + vqrshrn.u16 d2, q15, #4 ; w_oq5 + + vsub.i16 q15, q14 + vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m) + vadd.i16 q15, q4 + vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m) + vqrshrn.u16 d3, q15, #4 ; w_oq6 + vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m) + vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m) + vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m) + + bx lr + ENDP ; |vpx_wide_mbfilter_neon| + + END diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c new file mode 100644 index 00000000000..aa31f293588 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" + +void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1); +} + +#if HAVE_NEON_ASM +void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1); +} + +void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1); +} + +void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh); +} +#endif // HAVE_NEON_ASM diff --git a/thirdparty/libvpx/vpx_dsp/arm/save_reg_neon.asm b/thirdparty/libvpx/vpx_dsp/arm/save_reg_neon.asm new file mode 100644 index 00000000000..c9ca10801df --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/save_reg_neon.asm @@ -0,0 +1,36 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vpx_push_neon| + EXPORT |vpx_pop_neon| + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +|vpx_push_neon| PROC + vst1.i64 {d8, d9, d10, d11}, [r0]! + vst1.i64 {d12, d13, d14, d15}, [r0]! + bx lr + + ENDP + +|vpx_pop_neon| PROC + vld1.i64 {d8, d9, d10, d11}, [r0]! + vld1.i64 {d12, d13, d14, d15}, [r0]! + bx lr + + ENDP + + END + diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c new file mode 100644 index 00000000000..8632250138c --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +static INLINE int32x4_t MULTIPLY_BY_Q0( + int16x4_t dsrc0, + int16x4_t dsrc1, + int16x4_t dsrc2, + int16x4_t dsrc3, + int16x4_t dsrc4, + int16x4_t dsrc5, + int16x4_t dsrc6, + int16x4_t dsrc7, + int16x8_t q0s16) { + int32x4_t qdst; + int16x4_t d0s16, d1s16; + + d0s16 = vget_low_s16(q0s16); + d1s16 = vget_high_s16(q0s16); + + qdst = vmull_lane_s16(dsrc0, d0s16, 0); + qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1); + qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2); + qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3); + qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0); + qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1); + qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2); + qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3); + return qdst; +} + +void vpx_convolve8_avg_horiz_neon( + const uint8_t *src, + ptrdiff_t src_stride, + uint8_t *dst, + ptrdiff_t dst_stride, + const int16_t *filter_x, + int x_step_q4, + const int16_t *filter_y, // unused + int y_step_q4, // unused + int w, + int h) { + int width; + const uint8_t *s; + uint8_t *d; + uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8; + uint32x2_t d2u32, d3u32, d6u32, d7u32, d28u32, d29u32, d30u32, d31u32; + uint8x16_t q1u8, q3u8, q12u8, q13u8, q14u8, q15u8; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16; + uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16; + int16x8_t q0s16; + uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; + int32x4_t q1s32, q2s32, q14s32, q15s32; + uint16x8x2_t q0x2u16; + uint8x8x2_t d0x2u8, d1x2u8; + uint32x2x2_t d0x2u32; + uint16x4x2_t d0x2u16, d1x2u16; + uint32x4x2_t q0x2u32; + + assert(x_step_q4 == 16); + + q0s16 = vld1q_s16(filter_x); + + src -= 3; // adjust for taps + for (; h > 0; h -= 4) { // loop_horiz_v + s = src; + d24u8 = vld1_u8(s); + s += src_stride; + d25u8 = vld1_u8(s); + s += src_stride; + d26u8 = vld1_u8(s); + s += src_stride; + d27u8 = vld1_u8(s); + + q12u8 = vcombine_u8(d24u8, d25u8); + q13u8 = vcombine_u8(d26u8, d27u8); + + q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8), + vreinterpretq_u16_u8(q13u8)); + d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0])); + d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0])); + d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1])); + d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1])); + d0x2u8 = vtrn_u8(d24u8, d25u8); + d1x2u8 = vtrn_u8(d26u8, d27u8); + + __builtin_prefetch(src + src_stride * 4); + __builtin_prefetch(src + src_stride * 5); + + q8u16 = vmovl_u8(d0x2u8.val[0]); + q9u16 = vmovl_u8(d0x2u8.val[1]); + q10u16 = vmovl_u8(d1x2u8.val[0]); + q11u16 = vmovl_u8(d1x2u8.val[1]); + + src += 7; + d16u16 = vget_low_u16(q8u16); + d17u16 = vget_high_u16(q8u16); + d18u16 = vget_low_u16(q9u16); + d19u16 = vget_high_u16(q9u16); + q8u16 = vcombine_u16(d16u16, d18u16); // vswp 17 18 + q9u16 = vcombine_u16(d17u16, d19u16); + + d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); + d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); // vmov 23 21 + for (width = w; + width > 0; + width -= 4, src += 4, dst += 4) { // loop_horiz + s = src; + d28u32 = vld1_dup_u32((const uint32_t *)s); + s += src_stride; + d29u32 = vld1_dup_u32((const uint32_t *)s); + s += src_stride; + d31u32 = vld1_dup_u32((const uint32_t *)s); + s += src_stride; + d30u32 = vld1_dup_u32((const uint32_t *)s); + + __builtin_prefetch(src + 64); + + d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32), + vreinterpret_u16_u32(d31u32)); + d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32), + vreinterpret_u16_u32(d30u32)); + d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]), // d28 + vreinterpret_u8_u16(d1x2u16.val[0])); // d29 + d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]), // d31 + vreinterpret_u8_u16(d1x2u16.val[1])); // d30 + + __builtin_prefetch(src + 64 + src_stride); + + q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]); + q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]); + q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8), + vreinterpretq_u32_u8(q15u8)); + + d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0])); + d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0])); + q12u16 = vmovl_u8(d28u8); + q13u16 = vmovl_u8(d29u8); + + __builtin_prefetch(src + 64 + src_stride * 2); + + d = dst; + d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0); + d += dst_stride; + d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0); + d += dst_stride; + d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1); + d += dst_stride; + d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1); + + d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); + d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); + d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); + d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); + d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); + d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); + d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); + d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); + d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); + + q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16, + d18s16, d19s16, d23s16, d24s16, q0s16); + q2s32 = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16, + d19s16, d23s16, d24s16, d26s16, q0s16); + q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16, + d23s16, d24s16, d26s16, d27s16, q0s16); + q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16, + d24s16, d26s16, d27s16, d25s16, q0s16); + + __builtin_prefetch(src + 64 + src_stride * 3); + + d2u16 = vqrshrun_n_s32(q1s32, 7); + d3u16 = vqrshrun_n_s32(q2s32, 7); + d4u16 = vqrshrun_n_s32(q14s32, 7); + d5u16 = vqrshrun_n_s32(q15s32, 7); + + q1u16 = vcombine_u16(d2u16, d3u16); + q2u16 = vcombine_u16(d4u16, d5u16); + + d2u8 = vqmovn_u16(q1u16); + d3u8 = vqmovn_u16(q2u16); + + d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8), + vreinterpret_u16_u8(d3u8)); + d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]), + vreinterpret_u32_u16(d0x2u16.val[1])); + d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]), + vreinterpret_u8_u32(d0x2u32.val[1])); + + q1u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]); + q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32)); + + q1u8 = vrhaddq_u8(q1u8, q3u8); + + d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8)); + d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8)); + + d = dst; + vst1_lane_u32((uint32_t *)d, d2u32, 0); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d3u32, 0); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d2u32, 1); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d3u32, 1); + + q8u16 = q9u16; + d20s16 = d23s16; + q11u16 = q12u16; + q9u16 = q13u16; + d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); + } + src += src_stride * 4 - w - 7; + dst += dst_stride * 4 - w; + } + return; +} + +void vpx_convolve8_avg_vert_neon( + const uint8_t *src, + ptrdiff_t src_stride, + uint8_t *dst, + ptrdiff_t dst_stride, + const int16_t *filter_x, // unused + int x_step_q4, // unused + const int16_t *filter_y, + int y_step_q4, + int w, + int h) { + int height; + const uint8_t *s; + uint8_t *d; + uint8x8_t d2u8, d3u8; + uint32x2_t d2u32, d3u32, d6u32, d7u32; + uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32; + uint8x16_t q1u8, q3u8; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16; + int16x4_t d24s16, d25s16, d26s16, d27s16; + uint16x4_t d2u16, d3u16, d4u16, d5u16; + int16x8_t q0s16; + uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; + int32x4_t q1s32, q2s32, q14s32, q15s32; + + assert(y_step_q4 == 16); + + src -= src_stride * 3; + q0s16 = vld1q_s16(filter_y); + for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h + s = src; + d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0); + s += src_stride; + d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1); + s += src_stride; + d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0); + s += src_stride; + d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1); + s += src_stride; + d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0); + s += src_stride; + d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1); + s += src_stride; + d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0); + s += src_stride; + + q8u16 = vmovl_u8(vreinterpret_u8_u32(d16u32)); + q9u16 = vmovl_u8(vreinterpret_u8_u32(d18u32)); + q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32)); + q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32)); + + d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); + d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); + d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); + d = dst; + for (height = h; height > 0; height -= 4) { // loop_vert + d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0); + s += src_stride; + d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0); + s += src_stride; + d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1); + s += src_stride; + d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1); + s += src_stride; + + q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32)); + q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32)); + + d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0); + d += dst_stride; + d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1); + d += dst_stride; + d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0); + d += dst_stride; + d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1); + d -= dst_stride * 3; + + d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); + d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); + d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); + d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); + d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); + d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); + d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); + d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); + + __builtin_prefetch(s); + __builtin_prefetch(s + src_stride); + q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16, + d20s16, d21s16, d22s16, d24s16, q0s16); + __builtin_prefetch(s + src_stride * 2); + __builtin_prefetch(s + src_stride * 3); + q2s32 = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16, + d21s16, d22s16, d24s16, d26s16, q0s16); + __builtin_prefetch(d); + __builtin_prefetch(d + dst_stride); + q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16, + d22s16, d24s16, d26s16, d27s16, q0s16); + __builtin_prefetch(d + dst_stride * 2); + __builtin_prefetch(d + dst_stride * 3); + q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16, + d24s16, d26s16, d27s16, d25s16, q0s16); + + d2u16 = vqrshrun_n_s32(q1s32, 7); + d3u16 = vqrshrun_n_s32(q2s32, 7); + d4u16 = vqrshrun_n_s32(q14s32, 7); + d5u16 = vqrshrun_n_s32(q15s32, 7); + + q1u16 = vcombine_u16(d2u16, d3u16); + q2u16 = vcombine_u16(d4u16, d5u16); + + d2u8 = vqmovn_u16(q1u16); + d3u8 = vqmovn_u16(q2u16); + + q1u8 = vcombine_u8(d2u8, d3u8); + q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32)); + + q1u8 = vrhaddq_u8(q1u8, q3u8); + + d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8)); + d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8)); + + vst1_lane_u32((uint32_t *)d, d2u32, 0); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d2u32, 1); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d3u32, 0); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d3u32, 1); + d += dst_stride; + + q8u16 = q10u16; + d18s16 = d22s16; + d19s16 = d24s16; + q10u16 = q13u16; + d22s16 = d25s16; + } + } + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c new file mode 100644 index 00000000000..9bd715e2c63 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c @@ -0,0 +1,340 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +static INLINE int32x4_t MULTIPLY_BY_Q0( + int16x4_t dsrc0, + int16x4_t dsrc1, + int16x4_t dsrc2, + int16x4_t dsrc3, + int16x4_t dsrc4, + int16x4_t dsrc5, + int16x4_t dsrc6, + int16x4_t dsrc7, + int16x8_t q0s16) { + int32x4_t qdst; + int16x4_t d0s16, d1s16; + + d0s16 = vget_low_s16(q0s16); + d1s16 = vget_high_s16(q0s16); + + qdst = vmull_lane_s16(dsrc0, d0s16, 0); + qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1); + qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2); + qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3); + qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0); + qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1); + qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2); + qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3); + return qdst; +} + +void vpx_convolve8_horiz_neon( + const uint8_t *src, + ptrdiff_t src_stride, + uint8_t *dst, + ptrdiff_t dst_stride, + const int16_t *filter_x, + int x_step_q4, + const int16_t *filter_y, // unused + int y_step_q4, // unused + int w, + int h) { + int width; + const uint8_t *s, *psrc; + uint8_t *d, *pdst; + uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8; + uint32x2_t d2u32, d3u32, d28u32, d29u32, d30u32, d31u32; + uint8x16_t q12u8, q13u8, q14u8, q15u8; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16; + uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16; + int16x8_t q0s16; + uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; + int32x4_t q1s32, q2s32, q14s32, q15s32; + uint16x8x2_t q0x2u16; + uint8x8x2_t d0x2u8, d1x2u8; + uint32x2x2_t d0x2u32; + uint16x4x2_t d0x2u16, d1x2u16; + uint32x4x2_t q0x2u32; + + assert(x_step_q4 == 16); + + q0s16 = vld1q_s16(filter_x); + + src -= 3; // adjust for taps + for (; h > 0; h -= 4, + src += src_stride * 4, + dst += dst_stride * 4) { // loop_horiz_v + s = src; + d24u8 = vld1_u8(s); + s += src_stride; + d25u8 = vld1_u8(s); + s += src_stride; + d26u8 = vld1_u8(s); + s += src_stride; + d27u8 = vld1_u8(s); + + q12u8 = vcombine_u8(d24u8, d25u8); + q13u8 = vcombine_u8(d26u8, d27u8); + + q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8), + vreinterpretq_u16_u8(q13u8)); + d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0])); + d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0])); + d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1])); + d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1])); + d0x2u8 = vtrn_u8(d24u8, d25u8); + d1x2u8 = vtrn_u8(d26u8, d27u8); + + __builtin_prefetch(src + src_stride * 4); + __builtin_prefetch(src + src_stride * 5); + __builtin_prefetch(src + src_stride * 6); + + q8u16 = vmovl_u8(d0x2u8.val[0]); + q9u16 = vmovl_u8(d0x2u8.val[1]); + q10u16 = vmovl_u8(d1x2u8.val[0]); + q11u16 = vmovl_u8(d1x2u8.val[1]); + + d16u16 = vget_low_u16(q8u16); + d17u16 = vget_high_u16(q8u16); + d18u16 = vget_low_u16(q9u16); + d19u16 = vget_high_u16(q9u16); + q8u16 = vcombine_u16(d16u16, d18u16); // vswp 17 18 + q9u16 = vcombine_u16(d17u16, d19u16); + + d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); + d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); // vmov 23 21 + for (width = w, psrc = src + 7, pdst = dst; + width > 0; + width -= 4, psrc += 4, pdst += 4) { // loop_horiz + s = psrc; + d28u32 = vld1_dup_u32((const uint32_t *)s); + s += src_stride; + d29u32 = vld1_dup_u32((const uint32_t *)s); + s += src_stride; + d31u32 = vld1_dup_u32((const uint32_t *)s); + s += src_stride; + d30u32 = vld1_dup_u32((const uint32_t *)s); + + __builtin_prefetch(psrc + 64); + + d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32), + vreinterpret_u16_u32(d31u32)); + d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32), + vreinterpret_u16_u32(d30u32)); + d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]), // d28 + vreinterpret_u8_u16(d1x2u16.val[0])); // d29 + d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]), // d31 + vreinterpret_u8_u16(d1x2u16.val[1])); // d30 + + __builtin_prefetch(psrc + 64 + src_stride); + + q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]); + q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]); + q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8), + vreinterpretq_u32_u8(q15u8)); + + d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0])); + d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0])); + q12u16 = vmovl_u8(d28u8); + q13u16 = vmovl_u8(d29u8); + + __builtin_prefetch(psrc + 64 + src_stride * 2); + + d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); + d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); + d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); + d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); + d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); + d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); + d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); + d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); + d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); + + q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16, + d18s16, d19s16, d23s16, d24s16, q0s16); + q2s32 = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16, + d19s16, d23s16, d24s16, d26s16, q0s16); + q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16, + d23s16, d24s16, d26s16, d27s16, q0s16); + q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16, + d24s16, d26s16, d27s16, d25s16, q0s16); + + __builtin_prefetch(psrc + 60 + src_stride * 3); + + d2u16 = vqrshrun_n_s32(q1s32, 7); + d3u16 = vqrshrun_n_s32(q2s32, 7); + d4u16 = vqrshrun_n_s32(q14s32, 7); + d5u16 = vqrshrun_n_s32(q15s32, 7); + + q1u16 = vcombine_u16(d2u16, d3u16); + q2u16 = vcombine_u16(d4u16, d5u16); + + d2u8 = vqmovn_u16(q1u16); + d3u8 = vqmovn_u16(q2u16); + + d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8), + vreinterpret_u16_u8(d3u8)); + d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]), + vreinterpret_u32_u16(d0x2u16.val[1])); + d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]), + vreinterpret_u8_u32(d0x2u32.val[1])); + + d2u32 = vreinterpret_u32_u8(d0x2u8.val[0]); + d3u32 = vreinterpret_u32_u8(d0x2u8.val[1]); + + d = pdst; + vst1_lane_u32((uint32_t *)d, d2u32, 0); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d3u32, 0); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d2u32, 1); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d3u32, 1); + + q8u16 = q9u16; + d20s16 = d23s16; + q11u16 = q12u16; + q9u16 = q13u16; + d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); + } + } + return; +} + +void vpx_convolve8_vert_neon( + const uint8_t *src, + ptrdiff_t src_stride, + uint8_t *dst, + ptrdiff_t dst_stride, + const int16_t *filter_x, // unused + int x_step_q4, // unused + const int16_t *filter_y, + int y_step_q4, + int w, + int h) { + int height; + const uint8_t *s; + uint8_t *d; + uint32x2_t d2u32, d3u32; + uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16; + int16x4_t d24s16, d25s16, d26s16, d27s16; + uint16x4_t d2u16, d3u16, d4u16, d5u16; + int16x8_t q0s16; + uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; + int32x4_t q1s32, q2s32, q14s32, q15s32; + + assert(y_step_q4 == 16); + + src -= src_stride * 3; + q0s16 = vld1q_s16(filter_y); + for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h + s = src; + d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0); + s += src_stride; + d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1); + s += src_stride; + d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0); + s += src_stride; + d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1); + s += src_stride; + d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0); + s += src_stride; + d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1); + s += src_stride; + d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0); + s += src_stride; + + q8u16 = vmovl_u8(vreinterpret_u8_u32(d16u32)); + q9u16 = vmovl_u8(vreinterpret_u8_u32(d18u32)); + q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32)); + q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32)); + + d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); + d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); + d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); + d = dst; + for (height = h; height > 0; height -= 4) { // loop_vert + d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0); + s += src_stride; + d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0); + s += src_stride; + d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1); + s += src_stride; + d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1); + s += src_stride; + + q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32)); + q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32)); + + d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); + d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); + d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); + d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); + d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); + d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); + d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); + d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); + + __builtin_prefetch(d); + __builtin_prefetch(d + dst_stride); + q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16, + d20s16, d21s16, d22s16, d24s16, q0s16); + __builtin_prefetch(d + dst_stride * 2); + __builtin_prefetch(d + dst_stride * 3); + q2s32 = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16, + d21s16, d22s16, d24s16, d26s16, q0s16); + __builtin_prefetch(s); + __builtin_prefetch(s + src_stride); + q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16, + d22s16, d24s16, d26s16, d27s16, q0s16); + __builtin_prefetch(s + src_stride * 2); + __builtin_prefetch(s + src_stride * 3); + q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16, + d24s16, d26s16, d27s16, d25s16, q0s16); + + d2u16 = vqrshrun_n_s32(q1s32, 7); + d3u16 = vqrshrun_n_s32(q2s32, 7); + d4u16 = vqrshrun_n_s32(q14s32, 7); + d5u16 = vqrshrun_n_s32(q15s32, 7); + + q1u16 = vcombine_u16(d2u16, d3u16); + q2u16 = vcombine_u16(d4u16, d5u16); + + d2u32 = vreinterpret_u32_u8(vqmovn_u16(q1u16)); + d3u32 = vreinterpret_u32_u8(vqmovn_u16(q2u16)); + + vst1_lane_u32((uint32_t *)d, d2u32, 0); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d2u32, 1); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d3u32, 0); + d += dst_stride; + vst1_lane_u32((uint32_t *)d, d3u32, 1); + d += dst_stride; + + q8u16 = q10u16; + d18s16 = d22s16; + d19s16 = d24s16; + q10u16 = q13u16; + d22s16 = d25s16; + } + } + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c new file mode 100644 index 00000000000..dc58a332f81 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" + +void vpx_convolve_avg_neon( + const uint8_t *src, // r0 + ptrdiff_t src_stride, // r1 + uint8_t *dst, // r2 + ptrdiff_t dst_stride, // r3 + const int16_t *filter_x, + int filter_x_stride, + const int16_t *filter_y, + int filter_y_stride, + int w, + int h) { + uint8_t *d; + uint8x8_t d0u8, d1u8, d2u8, d3u8; + uint32x2_t d0u32, d2u32; + uint8x16_t q0u8, q1u8, q2u8, q3u8, q8u8, q9u8, q10u8, q11u8; + (void)filter_x; (void)filter_x_stride; + (void)filter_y; (void)filter_y_stride; + + d = dst; + if (w > 32) { // avg64 + for (; h > 0; h -= 1) { + q0u8 = vld1q_u8(src); + q1u8 = vld1q_u8(src + 16); + q2u8 = vld1q_u8(src + 32); + q3u8 = vld1q_u8(src + 48); + src += src_stride; + q8u8 = vld1q_u8(d); + q9u8 = vld1q_u8(d + 16); + q10u8 = vld1q_u8(d + 32); + q11u8 = vld1q_u8(d + 48); + d += dst_stride; + + q0u8 = vrhaddq_u8(q0u8, q8u8); + q1u8 = vrhaddq_u8(q1u8, q9u8); + q2u8 = vrhaddq_u8(q2u8, q10u8); + q3u8 = vrhaddq_u8(q3u8, q11u8); + + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q1u8); + vst1q_u8(dst + 32, q2u8); + vst1q_u8(dst + 48, q3u8); + dst += dst_stride; + } + } else if (w == 32) { // avg32 + for (; h > 0; h -= 2) { + q0u8 = vld1q_u8(src); + q1u8 = vld1q_u8(src + 16); + src += src_stride; + q2u8 = vld1q_u8(src); + q3u8 = vld1q_u8(src + 16); + src += src_stride; + q8u8 = vld1q_u8(d); + q9u8 = vld1q_u8(d + 16); + d += dst_stride; + q10u8 = vld1q_u8(d); + q11u8 = vld1q_u8(d + 16); + d += dst_stride; + + q0u8 = vrhaddq_u8(q0u8, q8u8); + q1u8 = vrhaddq_u8(q1u8, q9u8); + q2u8 = vrhaddq_u8(q2u8, q10u8); + q3u8 = vrhaddq_u8(q3u8, q11u8); + + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q1u8); + dst += dst_stride; + vst1q_u8(dst, q2u8); + vst1q_u8(dst + 16, q3u8); + dst += dst_stride; + } + } else if (w > 8) { // avg16 + for (; h > 0; h -= 2) { + q0u8 = vld1q_u8(src); + src += src_stride; + q1u8 = vld1q_u8(src); + src += src_stride; + q2u8 = vld1q_u8(d); + d += dst_stride; + q3u8 = vld1q_u8(d); + d += dst_stride; + + q0u8 = vrhaddq_u8(q0u8, q2u8); + q1u8 = vrhaddq_u8(q1u8, q3u8); + + vst1q_u8(dst, q0u8); + dst += dst_stride; + vst1q_u8(dst, q1u8); + dst += dst_stride; + } + } else if (w == 8) { // avg8 + for (; h > 0; h -= 2) { + d0u8 = vld1_u8(src); + src += src_stride; + d1u8 = vld1_u8(src); + src += src_stride; + d2u8 = vld1_u8(d); + d += dst_stride; + d3u8 = vld1_u8(d); + d += dst_stride; + + q0u8 = vcombine_u8(d0u8, d1u8); + q1u8 = vcombine_u8(d2u8, d3u8); + q0u8 = vrhaddq_u8(q0u8, q1u8); + + vst1_u8(dst, vget_low_u8(q0u8)); + dst += dst_stride; + vst1_u8(dst, vget_high_u8(q0u8)); + dst += dst_stride; + } + } else { // avg4 + for (; h > 0; h -= 2) { + d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 0); + src += src_stride; + d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 1); + src += src_stride; + d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 0); + d += dst_stride; + d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 1); + d += dst_stride; + + d0u8 = vrhadd_u8(vreinterpret_u8_u32(d0u32), + vreinterpret_u8_u32(d2u32)); + + d0u32 = vreinterpret_u32_u8(d0u8); + vst1_lane_u32((uint32_t *)dst, d0u32, 0); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, d0u32, 1); + dst += dst_stride; + } + } + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c new file mode 100644 index 00000000000..d8fb97a8619 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" + +void vpx_convolve_copy_neon( + const uint8_t *src, // r0 + ptrdiff_t src_stride, // r1 + uint8_t *dst, // r2 + ptrdiff_t dst_stride, // r3 + const int16_t *filter_x, + int filter_x_stride, + const int16_t *filter_y, + int filter_y_stride, + int w, + int h) { + uint8x8_t d0u8, d2u8; + uint8x16_t q0u8, q1u8, q2u8, q3u8; + (void)filter_x; (void)filter_x_stride; + (void)filter_y; (void)filter_y_stride; + + if (w > 32) { // copy64 + for (; h > 0; h--) { + q0u8 = vld1q_u8(src); + q1u8 = vld1q_u8(src + 16); + q2u8 = vld1q_u8(src + 32); + q3u8 = vld1q_u8(src + 48); + src += src_stride; + + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q1u8); + vst1q_u8(dst + 32, q2u8); + vst1q_u8(dst + 48, q3u8); + dst += dst_stride; + } + } else if (w == 32) { // copy32 + for (; h > 0; h -= 2) { + q0u8 = vld1q_u8(src); + q1u8 = vld1q_u8(src + 16); + src += src_stride; + q2u8 = vld1q_u8(src); + q3u8 = vld1q_u8(src + 16); + src += src_stride; + + vst1q_u8(dst, q0u8); + vst1q_u8(dst + 16, q1u8); + dst += dst_stride; + vst1q_u8(dst, q2u8); + vst1q_u8(dst + 16, q3u8); + dst += dst_stride; + } + } else if (w > 8) { // copy16 + for (; h > 0; h -= 2) { + q0u8 = vld1q_u8(src); + src += src_stride; + q1u8 = vld1q_u8(src); + src += src_stride; + + vst1q_u8(dst, q0u8); + dst += dst_stride; + vst1q_u8(dst, q1u8); + dst += dst_stride; + } + } else if (w == 8) { // copy8 + for (; h > 0; h -= 2) { + d0u8 = vld1_u8(src); + src += src_stride; + d2u8 = vld1_u8(src); + src += src_stride; + + vst1_u8(dst, d0u8); + dst += dst_stride; + vst1_u8(dst, d2u8); + dst += dst_stride; + } + } else { // copy4 + for (; h > 0; h--) { + *(uint32_t *)dst = *(const uint32_t *)src; + src += src_stride; + dst += dst_stride; + } + } + return; +} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c new file mode 100644 index 00000000000..1506ce6203d --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_ports/mem.h" + +void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the + * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4). + */ + DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]); + + // Account for the vertical phase needing 3 lines prior and 4 lines post + int intermediate_height = h + 7; + + assert(y_step_q4 == 16); + assert(x_step_q4 == 16); + + /* Filter starting 3 lines back. The neon implementation will ignore the + * given height and filter a multiple of 4 lines. Since this goes in to + * the temp buffer which has lots of extra room and is subsequently discarded + * this is safe if somewhat less than ideal. + */ + vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, + temp, 64, + filter_x, x_step_q4, filter_y, y_step_q4, + w, intermediate_height); + + /* Step into the temp buffer 3 lines to get the actual frame data */ + vpx_convolve8_vert_neon(temp + 64 * 3, 64, + dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h); +} + +void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]); + int intermediate_height = h + 7; + + assert(y_step_q4 == 16); + assert(x_step_q4 == 16); + + /* This implementation has the same issues as above. In addition, we only want + * to average the values after both passes. + */ + vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, + temp, 64, + filter_x, x_step_q4, filter_y, y_step_q4, + w, intermediate_height); + vpx_convolve8_avg_vert_neon(temp + 64 * 3, + 64, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h); +} diff --git a/thirdparty/libvpx/vpx_dsp/bitreader.c b/thirdparty/libvpx/vpx_dsp/bitreader.c new file mode 100644 index 00000000000..8140e78e70e --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/bitreader.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include + +#include "./vpx_config.h" + +#include "vpx_dsp/bitreader.h" +#include "vpx_dsp/prob.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_ports/mem.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_util/endian_inl.h" + +int vpx_reader_init(vpx_reader *r, + const uint8_t *buffer, + size_t size, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) { + if (size && !buffer) { + return 1; + } else { + r->buffer_end = buffer + size; + r->buffer = buffer; + r->value = 0; + r->count = -8; + r->range = 255; + r->decrypt_cb = decrypt_cb; + r->decrypt_state = decrypt_state; + vpx_reader_fill(r); + return vpx_read_bit(r) != 0; // marker bit + } +} + +void vpx_reader_fill(vpx_reader *r) { + const uint8_t *const buffer_end = r->buffer_end; + const uint8_t *buffer = r->buffer; + const uint8_t *buffer_start = buffer; + BD_VALUE value = r->value; + int count = r->count; + const size_t bytes_left = buffer_end - buffer; + const size_t bits_left = bytes_left * CHAR_BIT; + int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); + + if (r->decrypt_cb) { + size_t n = VPXMIN(sizeof(r->clear_buffer), bytes_left); + r->decrypt_cb(r->decrypt_state, buffer, r->clear_buffer, (int)n); + buffer = r->clear_buffer; + buffer_start = r->clear_buffer; + } + if (bits_left > BD_VALUE_SIZE) { + const int bits = (shift & 0xfffffff8) + CHAR_BIT; + BD_VALUE nv; + BD_VALUE big_endian_values; + memcpy(&big_endian_values, buffer, sizeof(BD_VALUE)); +#if SIZE_MAX == 0xffffffffffffffffULL + big_endian_values = HToBE64(big_endian_values); +#else + big_endian_values = HToBE32(big_endian_values); +#endif + nv = big_endian_values >> (BD_VALUE_SIZE - bits); + count += bits; + buffer += (bits >> 3); + value = r->value | (nv << (shift & 0x7)); + } else { + const int bits_over = (int)(shift + CHAR_BIT - (int)bits_left); + int loop_end = 0; + if (bits_over >= 0) { + count += LOTS_OF_BITS; + loop_end = bits_over; + } + + if (bits_over < 0 || bits_left) { + while (shift >= loop_end) { + count += CHAR_BIT; + value |= (BD_VALUE)*buffer++ << shift; + shift -= CHAR_BIT; + } + } + } + + // NOTE: Variable 'buffer' may not relate to 'r->buffer' after decryption, + // so we increase 'r->buffer' by the amount that 'buffer' moved, rather than + // assign 'buffer' to 'r->buffer'. + r->buffer += buffer - buffer_start; + r->value = value; + r->count = count; +} + +const uint8_t *vpx_reader_find_end(vpx_reader *r) { + // Find the end of the coded buffer + while (r->count > CHAR_BIT && r->count < BD_VALUE_SIZE) { + r->count -= CHAR_BIT; + r->buffer--; + } + return r->buffer; +} diff --git a/thirdparty/libvpx/vpx_dsp/bitreader.h b/thirdparty/libvpx/vpx_dsp/bitreader.h new file mode 100644 index 00000000000..9a441b41077 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/bitreader.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_BITREADER_H_ +#define VPX_DSP_BITREADER_H_ + +#include +#include + +#include "./vpx_config.h" +#include "vpx_ports/mem.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/prob.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef size_t BD_VALUE; + +#define BD_VALUE_SIZE ((int)sizeof(BD_VALUE) * CHAR_BIT) + +// This is meant to be a large, positive constant that can still be efficiently +// loaded as an immediate (on platforms like ARM, for example). +// Even relatively modest values like 100 would work fine. +#define LOTS_OF_BITS 0x40000000 + +typedef struct { + // Be careful when reordering this struct, it may impact the cache negatively. + BD_VALUE value; + unsigned int range; + int count; + const uint8_t *buffer_end; + const uint8_t *buffer; + vpx_decrypt_cb decrypt_cb; + void *decrypt_state; + uint8_t clear_buffer[sizeof(BD_VALUE) + 1]; +} vpx_reader; + +int vpx_reader_init(vpx_reader *r, + const uint8_t *buffer, + size_t size, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state); + +void vpx_reader_fill(vpx_reader *r); + +const uint8_t *vpx_reader_find_end(vpx_reader *r); + +static INLINE int vpx_reader_has_error(vpx_reader *r) { + // Check if we have reached the end of the buffer. + // + // Variable 'count' stores the number of bits in the 'value' buffer, minus + // 8. The top byte is part of the algorithm, and the remainder is buffered + // to be shifted into it. So if count == 8, the top 16 bits of 'value' are + // occupied, 8 for the algorithm and 8 in the buffer. + // + // When reading a byte from the user's buffer, count is filled with 8 and + // one byte is filled into the value buffer. When we reach the end of the + // data, count is additionally filled with LOTS_OF_BITS. So when + // count == LOTS_OF_BITS - 1, the user's data has been exhausted. + // + // 1 if we have tried to decode bits after the end of stream was encountered. + // 0 No error. + return r->count > BD_VALUE_SIZE && r->count < LOTS_OF_BITS; +} + +static INLINE int vpx_read(vpx_reader *r, int prob) { + unsigned int bit = 0; + BD_VALUE value; + BD_VALUE bigsplit; + int count; + unsigned int range; + unsigned int split = (r->range * prob + (256 - prob)) >> CHAR_BIT; + + if (r->count < 0) + vpx_reader_fill(r); + + value = r->value; + count = r->count; + + bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT); + + range = split; + + if (value >= bigsplit) { + range = r->range - split; + value = value - bigsplit; + bit = 1; + } + + { + register int shift = vpx_norm[range]; + range <<= shift; + value <<= shift; + count -= shift; + } + r->value = value; + r->count = count; + r->range = range; + + return bit; +} + +static INLINE int vpx_read_bit(vpx_reader *r) { + return vpx_read(r, 128); // vpx_prob_half +} + +static INLINE int vpx_read_literal(vpx_reader *r, int bits) { + int literal = 0, bit; + + for (bit = bits - 1; bit >= 0; bit--) + literal |= vpx_read_bit(r) << bit; + + return literal; +} + +static INLINE int vpx_read_tree(vpx_reader *r, const vpx_tree_index *tree, + const vpx_prob *probs) { + vpx_tree_index i = 0; + + while ((i = tree[i + vpx_read(r, probs[i >> 1])]) > 0) + continue; + + return -i; +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_DSP_BITREADER_H_ diff --git a/thirdparty/libvpx/vpx_dsp/bitreader_buffer.c b/thirdparty/libvpx/vpx_dsp/bitreader_buffer.c new file mode 100644 index 00000000000..d7b55cf9f41 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/bitreader_buffer.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#include "./bitreader_buffer.h" + +size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb) { + return (rb->bit_offset + 7) >> 3; +} + +int vpx_rb_read_bit(struct vpx_read_bit_buffer *rb) { + const size_t off = rb->bit_offset; + const size_t p = off >> 3; + const int q = 7 - (int)(off & 0x7); + if (rb->bit_buffer + p < rb->bit_buffer_end) { + const int bit = (rb->bit_buffer[p] >> q) & 1; + rb->bit_offset = off + 1; + return bit; + } else { + rb->error_handler(rb->error_handler_data); + return 0; + } +} + +int vpx_rb_read_literal(struct vpx_read_bit_buffer *rb, int bits) { + int value = 0, bit; + for (bit = bits - 1; bit >= 0; bit--) + value |= vpx_rb_read_bit(rb) << bit; + return value; +} + +int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, + int bits) { + const int value = vpx_rb_read_literal(rb, bits); + return vpx_rb_read_bit(rb) ? -value : value; +} + +int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, + int bits) { +#if CONFIG_MISC_FIXES + const int nbits = sizeof(unsigned) * 8 - bits - 1; + const unsigned value = (unsigned)vpx_rb_read_literal(rb, bits + 1) << nbits; + return ((int) value) >> nbits; +#else + return vpx_rb_read_signed_literal(rb, bits); +#endif +} diff --git a/thirdparty/libvpx/vpx_dsp/bitreader_buffer.h b/thirdparty/libvpx/vpx_dsp/bitreader_buffer.h new file mode 100644 index 00000000000..8a48a95ed19 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/bitreader_buffer.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_BITREADER_BUFFER_H_ +#define VPX_DSP_BITREADER_BUFFER_H_ + +#include + +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*vpx_rb_error_handler)(void *data); + +struct vpx_read_bit_buffer { + const uint8_t *bit_buffer; + const uint8_t *bit_buffer_end; + size_t bit_offset; + + void *error_handler_data; + vpx_rb_error_handler error_handler; +}; + +size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb); + +int vpx_rb_read_bit(struct vpx_read_bit_buffer *rb); + +int vpx_rb_read_literal(struct vpx_read_bit_buffer *rb, int bits); + +int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, int bits); + +int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, int bits); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_DSP_BITREADER_BUFFER_H_ diff --git a/thirdparty/libvpx/vpx_dsp/intrapred.c b/thirdparty/libvpx/vpx_dsp/intrapred.c new file mode 100644 index 00000000000..cc4a74bd260 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/intrapred.c @@ -0,0 +1,870 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" + +#define DST(x, y) dst[(x) + (y) * stride] +#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) +#define AVG2(a, b) (((a) + (b) + 1) >> 1) + +static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + (void) above; + // first column + for (r = 0; r < bs - 1; ++r) + dst[r * stride] = AVG2(left[r], left[r + 1]); + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // second column + for (r = 0; r < bs - 2; ++r) + dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); + dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // rest of last row + for (c = 0; c < bs - 2; ++c) + dst[(bs - 1) * stride + c] = left[bs - 1]; + + for (r = bs - 2; r >= 0; --r) + for (c = 0; c < bs - 2; ++c) + dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; +} + +#if CONFIG_MISC_FIXES +static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + (void) above; + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1], + left[(c >> 1) + r + 2]) + : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]); + } + dst += stride; + } +} +#endif // CONFIG_MISC_FIXES + +static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + int size; + (void)left; + for (c = 0; c < bs; ++c) { + dst[c] = AVG2(above[c], above[c + 1]); + dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]); + } + for (r = 2, size = bs - 2; r < bs; r += 2, --size) { + memcpy(dst + (r + 0) * stride, dst + (r >> 1), size); + memset(dst + (r + 0) * stride + size, above[bs - 1], bs - size); + memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1), size); + memset(dst + (r + 1) * stride + size, above[bs - 1], bs - size); + } +} + +#if CONFIG_MISC_FIXES +static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + (void) left; + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], + above[(r >> 1) + c + 2]) + : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); + } + dst += stride; + } +} +#endif // CONFIG_MISC_FIXES + +static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + const uint8_t above_right = above[bs - 1]; + const uint8_t *const dst_row0 = dst; + int x, size; + (void)left; + + for (x = 0; x < bs - 1; ++x) { + dst[x] = AVG3(above[x], above[x + 1], above[x + 2]); + } + dst[bs - 1] = above_right; + dst += stride; + for (x = 1, size = bs - 2; x < bs; ++x, --size) { + memcpy(dst, dst_row0 + x, size); + memset(dst + size, above_right, x + 1); + dst += stride; + } +} + +#if CONFIG_MISC_FIXES +static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + (void) left; + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = AVG3(above[r + c], above[r + c + 1], + above[r + c + 1 + (r + c + 2 < bs * 2)]); + } + dst += stride; + } +} +#endif // CONFIG_MISC_FIXES + +static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + + // first row + for (c = 0; c < bs; c++) + dst[c] = AVG2(above[c - 1], above[c]); + dst += stride; + + // second row + dst[0] = AVG3(left[0], above[-1], above[0]); + for (c = 1; c < bs; c++) + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); + dst += stride; + + // the rest of first col + dst[0] = AVG3(above[-1], left[0], left[1]); + for (r = 3; r < bs; ++r) + dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); + + // the rest of the block + for (r = 2; r < bs; ++r) { + for (c = 1; c < bs; c++) + dst[c] = dst[-2 * stride + c - 1]; + dst += stride; + } +} + +static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int i; +#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7 + // silence a spurious -Warray-bounds warning, possibly related to: + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273 + uint8_t border[69]; +#else + uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right +#endif + + // dst(bs, bs - 2)[0], i.e., border starting at bottom-left + for (i = 0; i < bs - 2; ++i) { + border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]); + } + border[bs - 2] = AVG3(above[-1], left[0], left[1]); + border[bs - 1] = AVG3(left[0], above[-1], above[0]); + border[bs - 0] = AVG3(above[-1], above[0], above[1]); + // dst[0][2, size), i.e., remaining top border ascending + for (i = 0; i < bs - 2; ++i) { + border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]); + } + + for (i = 0; i < bs; ++i) { + memcpy(dst + i * stride, border + bs - 1 - i, bs); + } +} + +static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + dst[0] = AVG2(above[-1], left[0]); + for (r = 1; r < bs; r++) + dst[r * stride] = AVG2(left[r - 1], left[r]); + dst++; + + dst[0] = AVG3(left[0], above[-1], above[0]); + dst[stride] = AVG3(above[-1], left[0], left[1]); + for (r = 2; r < bs; r++) + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); + dst++; + + for (c = 0; c < bs - 2; c++) + dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); + dst += stride; + + for (r = 1; r < bs; ++r) { + for (c = 0; c < bs - 2; c++) + dst[c] = dst[-stride + c - 2]; + dst += stride; + } +} + +static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r; + (void) left; + + for (r = 0; r < bs; r++) { + memcpy(dst, above, bs); + dst += stride; + } +} + +static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r; + (void) above; + + for (r = 0; r < bs; r++) { + memset(dst, left[r], bs); + dst += stride; + } +} + +static INLINE void tm_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r, c; + int ytop_left = above[-1]; + + for (r = 0; r < bs; r++) { + for (c = 0; c < bs; c++) + dst[c] = clip_pixel(left[r] + above[c] - ytop_left); + dst += stride; + } +} + +static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int r; + (void) above; + (void) left; + + for (r = 0; r < bs; r++) { + memset(dst, 128, bs); + dst += stride; + } +} + +static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, + const uint8_t *left) { + int i, r, expected_dc, sum = 0; + (void) above; + + for (i = 0; i < bs; i++) + sum += left[i]; + expected_dc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) { + memset(dst, expected_dc, bs); + dst += stride; + } +} + +static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int i, r, expected_dc, sum = 0; + (void) left; + + for (i = 0; i < bs; i++) + sum += above[i]; + expected_dc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) { + memset(dst, expected_dc, bs); + dst += stride; + } +} + +static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs, + const uint8_t *above, const uint8_t *left) { + int i, r, expected_dc, sum = 0; + const int count = 2 * bs; + + for (i = 0; i < bs; i++) { + sum += above[i]; + sum += left[i]; + } + + expected_dc = (sum + (count >> 1)) / count; + + for (r = 0; r < bs; r++) { + memset(dst, expected_dc, bs); + dst += stride; + } +} + +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int H = above[-1]; + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int L = left[3]; + + memset(dst + stride * 0, AVG3(H, I, J), 4); + memset(dst + stride * 1, AVG3(I, J, K), 4); + memset(dst + stride * 2, AVG3(J, K, L), 4); + memset(dst + stride * 3, AVG3(K, L, L), 4); +} + +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int H = above[-1]; + const int I = above[0]; + const int J = above[1]; + const int K = above[2]; + const int L = above[3]; + const int M = above[4]; + (void)left; + + dst[0] = AVG3(H, I, J); + dst[1] = AVG3(I, J, K); + dst[2] = AVG3(J, K, L); + dst[3] = AVG3(K, L, M); + memcpy(dst + stride * 1, dst, 4); + memcpy(dst + stride * 2, dst, 4); + memcpy(dst + stride * 3, dst, 4); +} + +void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int L = left[3]; + (void)above; + DST(0, 0) = AVG2(I, J); + DST(2, 0) = DST(0, 1) = AVG2(J, K); + DST(2, 1) = DST(0, 2) = AVG2(K, L); + DST(1, 0) = AVG3(I, J, K); + DST(3, 0) = DST(1, 1) = AVG3(J, K, L); + DST(3, 1) = DST(1, 2) = AVG3(K, L, L); + DST(3, 2) = DST(2, 2) = + DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; +} + +void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + const int E = above[4]; + const int F = above[5]; + const int G = above[6]; + (void)left; + DST(0, 0) = AVG2(A, B); + DST(1, 0) = DST(0, 2) = AVG2(B, C); + DST(2, 0) = DST(1, 2) = AVG2(C, D); + DST(3, 0) = DST(2, 2) = AVG2(D, E); + DST(3, 2) = AVG2(E, F); // differs from vp8 + + DST(0, 1) = AVG3(A, B, C); + DST(1, 1) = DST(0, 3) = AVG3(B, C, D); + DST(2, 1) = DST(1, 3) = AVG3(C, D, E); + DST(3, 1) = DST(2, 3) = AVG3(D, E, F); + DST(3, 3) = AVG3(E, F, G); // differs from vp8 +} + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + const int E = above[4]; + const int F = above[5]; + const int G = above[6]; + const int H = above[7]; + (void)left; + DST(0, 0) = AVG2(A, B); + DST(1, 0) = DST(0, 2) = AVG2(B, C); + DST(2, 0) = DST(1, 2) = AVG2(C, D); + DST(3, 0) = DST(2, 2) = AVG2(D, E); + DST(3, 2) = AVG3(E, F, G); + + DST(0, 1) = AVG3(A, B, C); + DST(1, 1) = DST(0, 3) = AVG3(B, C, D); + DST(2, 1) = DST(1, 3) = AVG3(C, D, E); + DST(3, 1) = DST(2, 3) = AVG3(D, E, F); + DST(3, 3) = AVG3(F, G, H); +} + +void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + const int E = above[4]; + const int F = above[5]; + const int G = above[6]; + const int H = above[7]; + (void)stride; + (void)left; + DST(0, 0) = AVG3(A, B, C); + DST(1, 0) = DST(0, 1) = AVG3(B, C, D); + DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); + DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); + DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); + DST(3, 2) = DST(2, 3) = AVG3(F, G, H); + DST(3, 3) = H; // differs from vp8 +} + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + const int E = above[4]; + const int F = above[5]; + const int G = above[6]; + const int H = above[7]; + (void)stride; + (void)left; + DST(0, 0) = AVG3(A, B, C); + DST(1, 0) = DST(0, 1) = AVG3(B, C, D); + DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); + DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); + DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); + DST(3, 2) = DST(2, 3) = AVG3(F, G, H); + DST(3, 3) = AVG3(G, H, H); +} + +void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int X = above[-1]; + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + DST(0, 0) = DST(1, 2) = AVG2(X, A); + DST(1, 0) = DST(2, 2) = AVG2(A, B); + DST(2, 0) = DST(3, 2) = AVG2(B, C); + DST(3, 0) = AVG2(C, D); + + DST(0, 3) = AVG3(K, J, I); + DST(0, 2) = AVG3(J, I, X); + DST(0, 1) = DST(1, 3) = AVG3(I, X, A); + DST(1, 1) = DST(2, 3) = AVG3(X, A, B); + DST(2, 1) = DST(3, 3) = AVG3(A, B, C); + DST(3, 1) = AVG3(B, C, D); +} + +void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int L = left[3]; + const int X = above[-1]; + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + const int D = above[3]; + (void)stride; + DST(0, 3) = AVG3(J, K, L); + DST(1, 3) = DST(0, 2) = AVG3(I, J, K); + DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); + DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); + DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); + DST(3, 1) = DST(2, 0) = AVG3(C, B, A); + DST(3, 0) = AVG3(D, C, B); +} + +void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const int I = left[0]; + const int J = left[1]; + const int K = left[2]; + const int L = left[3]; + const int X = above[-1]; + const int A = above[0]; + const int B = above[1]; + const int C = above[2]; + + DST(0, 0) = DST(2, 1) = AVG2(I, X); + DST(0, 1) = DST(2, 2) = AVG2(J, I); + DST(0, 2) = DST(2, 3) = AVG2(K, J); + DST(0, 3) = AVG2(L, K); + + DST(3, 0) = AVG3(A, B, C); + DST(2, 0) = AVG3(X, A, B); + DST(1, 0) = DST(3, 1) = AVG3(I, X, A); + DST(1, 1) = DST(3, 2) = AVG3(J, I, X); + DST(1, 2) = DST(3, 3) = AVG3(K, J, I); + DST(1, 3) = AVG3(L, K, J); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) above; + (void) bd; + + // First column. + for (r = 0; r < bs - 1; ++r) { + dst[r * stride] = AVG2(left[r], left[r + 1]); + } + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // Second column. + for (r = 0; r < bs - 2; ++r) { + dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); + } + dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // Rest of last row. + for (c = 0; c < bs - 2; ++c) + dst[(bs - 1) * stride + c] = left[bs - 1]; + + for (r = bs - 2; r >= 0; --r) { + for (c = 0; c < bs - 2; ++c) + dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; + } +} + +#if CONFIG_MISC_FIXES +static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) above; + (void) bd; + + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1], + left[(c >> 1) + r + 2]) + : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]); + } + dst += stride; + } +} +#endif // CONFIG_MISC_FIXES + +static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) left; + (void) bd; + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], + above[(r >> 1) + c + 2]) + : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); + } + dst += stride; + } +} + +#define highbd_d63e_predictor highbd_d63_predictor + +static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) left; + (void) bd; + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = r + c + 2 < bs * 2 ? AVG3(above[r + c], above[r + c + 1], + above[r + c + 2]) + : above[bs * 2 - 1]; + } + dst += stride; + } +} + +#if CONFIG_MISC_FIXES +static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) left; + (void) bd; + for (r = 0; r < bs; ++r) { + for (c = 0; c < bs; ++c) { + dst[c] = AVG3(above[r + c], above[r + c + 1], + above[r + c + 1 + (r + c + 2 < bs * 2)]); + } + dst += stride; + } +} +#endif // CONFIG_MISC_FIXES + +static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) bd; + + // first row + for (c = 0; c < bs; c++) + dst[c] = AVG2(above[c - 1], above[c]); + dst += stride; + + // second row + dst[0] = AVG3(left[0], above[-1], above[0]); + for (c = 1; c < bs; c++) + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); + dst += stride; + + // the rest of first col + dst[0] = AVG3(above[-1], left[0], left[1]); + for (r = 3; r < bs; ++r) + dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); + + // the rest of the block + for (r = 2; r < bs; ++r) { + for (c = 1; c < bs; c++) + dst[c] = dst[-2 * stride + c - 1]; + dst += stride; + } +} + +static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) bd; + dst[0] = AVG3(left[0], above[-1], above[0]); + for (c = 1; c < bs; c++) + dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); + + dst[stride] = AVG3(above[-1], left[0], left[1]); + for (r = 2; r < bs; ++r) + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); + + dst += stride; + for (r = 1; r < bs; ++r) { + for (c = 1; c < bs; c++) + dst[c] = dst[-stride + c - 1]; + dst += stride; + } +} + +static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + (void) bd; + dst[0] = AVG2(above[-1], left[0]); + for (r = 1; r < bs; r++) + dst[r * stride] = AVG2(left[r - 1], left[r]); + dst++; + + dst[0] = AVG3(left[0], above[-1], above[0]); + dst[stride] = AVG3(above[-1], left[0], left[1]); + for (r = 2; r < bs; r++) + dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); + dst++; + + for (c = 0; c < bs - 2; c++) + dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); + dst += stride; + + for (r = 1; r < bs; ++r) { + for (c = 0; c < bs - 2; c++) + dst[c] = dst[-stride + c - 2]; + dst += stride; + } +} + +static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r; + (void) left; + (void) bd; + for (r = 0; r < bs; r++) { + memcpy(dst, above, bs * sizeof(uint16_t)); + dst += stride; + } +} + +static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r; + (void) above; + (void) bd; + for (r = 0; r < bs; r++) { + vpx_memset16(dst, left[r], bs); + dst += stride; + } +} + +static INLINE void highbd_tm_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r, c; + int ytop_left = above[-1]; + (void) bd; + + for (r = 0; r < bs; r++) { + for (c = 0; c < bs; c++) + dst[c] = clip_pixel_highbd(left[r] + above[c] - ytop_left, bd); + dst += stride; + } +} + +static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int r; + (void) above; + (void) left; + + for (r = 0; r < bs; r++) { + vpx_memset16(dst, 128 << (bd - 8), bs); + dst += stride; + } +} + +static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int i, r, expected_dc, sum = 0; + (void) above; + (void) bd; + + for (i = 0; i < bs; i++) + sum += left[i]; + expected_dc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) { + vpx_memset16(dst, expected_dc, bs); + dst += stride; + } +} + +static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int i, r, expected_dc, sum = 0; + (void) left; + (void) bd; + + for (i = 0; i < bs; i++) + sum += above[i]; + expected_dc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) { + vpx_memset16(dst, expected_dc, bs); + dst += stride; + } +} + +static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, + int bs, const uint16_t *above, + const uint16_t *left, int bd) { + int i, r, expected_dc, sum = 0; + const int count = 2 * bs; + (void) bd; + + for (i = 0; i < bs; i++) { + sum += above[i]; + sum += left[i]; + } + + expected_dc = (sum + (count >> 1)) / count; + + for (r = 0; r < bs; r++) { + vpx_memset16(dst, expected_dc, bs); + dst += stride; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +// This serves as a wrapper function, so that all the prediction functions +// can be unified and accessed as a pointer array. Note that the boundary +// above and left are not necessarily used all the time. +#define intra_pred_sized(type, size) \ + void vpx_##type##_predictor_##size##x##size##_c(uint8_t *dst, \ + ptrdiff_t stride, \ + const uint8_t *above, \ + const uint8_t *left) { \ + type##_predictor(dst, stride, size, above, left); \ + } + +#if CONFIG_VP9_HIGHBITDEPTH +#define intra_pred_highbd_sized(type, size) \ + void vpx_highbd_##type##_predictor_##size##x##size##_c( \ + uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \ + const uint16_t *left, int bd) { \ + highbd_##type##_predictor(dst, stride, size, above, left, bd); \ + } + +#define intra_pred_allsizes(type) \ + intra_pred_sized(type, 4) \ + intra_pred_sized(type, 8) \ + intra_pred_sized(type, 16) \ + intra_pred_sized(type, 32) \ + intra_pred_highbd_sized(type, 4) \ + intra_pred_highbd_sized(type, 8) \ + intra_pred_highbd_sized(type, 16) \ + intra_pred_highbd_sized(type, 32) + +#define intra_pred_no_4x4(type) \ + intra_pred_sized(type, 8) \ + intra_pred_sized(type, 16) \ + intra_pred_sized(type, 32) \ + intra_pred_highbd_sized(type, 4) \ + intra_pred_highbd_sized(type, 8) \ + intra_pred_highbd_sized(type, 16) \ + intra_pred_highbd_sized(type, 32) + +#else +#define intra_pred_allsizes(type) \ + intra_pred_sized(type, 4) \ + intra_pred_sized(type, 8) \ + intra_pred_sized(type, 16) \ + intra_pred_sized(type, 32) + +#define intra_pred_no_4x4(type) \ + intra_pred_sized(type, 8) \ + intra_pred_sized(type, 16) \ + intra_pred_sized(type, 32) +#endif // CONFIG_VP9_HIGHBITDEPTH + +intra_pred_no_4x4(d207) +intra_pred_no_4x4(d63) +intra_pred_no_4x4(d45) +#if CONFIG_MISC_FIXES +intra_pred_allsizes(d207e) +intra_pred_allsizes(d63e) +intra_pred_no_4x4(d45e) +#endif +intra_pred_no_4x4(d117) +intra_pred_no_4x4(d135) +intra_pred_no_4x4(d153) +intra_pred_allsizes(v) +intra_pred_allsizes(h) +intra_pred_allsizes(tm) +intra_pred_allsizes(dc_128) +intra_pred_allsizes(dc_left) +intra_pred_allsizes(dc_top) +intra_pred_allsizes(dc) +#undef intra_pred_allsizes diff --git a/thirdparty/libvpx/vpx_dsp/inv_txfm.c b/thirdparty/libvpx/vpx_dsp/inv_txfm.c new file mode 100644 index 00000000000..e18d31d7aa2 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/inv_txfm.c @@ -0,0 +1,2518 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/inv_txfm.h" + +void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { +/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, + 0.5 shifts per pixel. */ + int i; + tran_low_t output[16]; + tran_high_t a1, b1, c1, d1, e1; + const tran_low_t *ip = input; + tran_low_t *op = output; + + for (i = 0; i < 4; i++) { + a1 = ip[0] >> UNIT_QUANT_SHIFT; + c1 = ip[1] >> UNIT_QUANT_SHIFT; + d1 = ip[2] >> UNIT_QUANT_SHIFT; + b1 = ip[3] >> UNIT_QUANT_SHIFT; + a1 += c1; + d1 -= b1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= b1; + d1 += c1; + op[0] = WRAPLOW(a1); + op[1] = WRAPLOW(b1); + op[2] = WRAPLOW(c1); + op[3] = WRAPLOW(d1); + ip += 4; + op += 4; + } + + ip = output; + for (i = 0; i < 4; i++) { + a1 = ip[4 * 0]; + c1 = ip[4 * 1]; + d1 = ip[4 * 2]; + b1 = ip[4 * 3]; + a1 += c1; + d1 -= b1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= b1; + d1 += c1; + dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1)); + dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1)); + dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1)); + dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1)); + + ip++; + dest++; + } +} + +void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { + int i; + tran_high_t a1, e1; + tran_low_t tmp[4]; + const tran_low_t *ip = in; + tran_low_t *op = tmp; + + a1 = ip[0] >> UNIT_QUANT_SHIFT; + e1 = a1 >> 1; + a1 -= e1; + op[0] = WRAPLOW(a1); + op[1] = op[2] = op[3] = WRAPLOW(e1); + + ip = tmp; + for (i = 0; i < 4; i++) { + e1 = ip[0] >> 1; + a1 = ip[0] - e1; + dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1); + dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1); + dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1); + dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1); + ip++; + dest++; + } +} + +void idct4_c(const tran_low_t *input, tran_low_t *output) { + tran_low_t step[4]; + tran_high_t temp1, temp2; + // stage 1 + temp1 = (input[0] + input[2]) * cospi_16_64; + temp2 = (input[0] - input[2]) * cospi_16_64; + step[0] = WRAPLOW(dct_const_round_shift(temp1)); + step[1] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; + temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; + step[2] = WRAPLOW(dct_const_round_shift(temp1)); + step[3] = WRAPLOW(dct_const_round_shift(temp2)); + + // stage 2 + output[0] = WRAPLOW(step[0] + step[3]); + output[1] = WRAPLOW(step[1] + step[2]); + output[2] = WRAPLOW(step[1] - step[2]); + output[3] = WRAPLOW(step[0] - step[3]); +} + +void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { + tran_low_t out[4 * 4]; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[4], temp_out[4]; + + // Rows + for (i = 0; i < 4; ++i) { + idct4_c(input, outptr); + input += 4; + outptr += 4; + } + + // Columns + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = out[j * 4 + i]; + idct4_c(temp_in, temp_out); + for (j = 0; j < 4; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 4)); + } + } +} + +void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, + int dest_stride) { + int i; + tran_high_t a1; + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); + a1 = ROUND_POWER_OF_TWO(out, 4); + + for (i = 0; i < 4; i++) { + dest[0] = clip_pixel_add(dest[0], a1); + dest[1] = clip_pixel_add(dest[1], a1); + dest[2] = clip_pixel_add(dest[2], a1); + dest[3] = clip_pixel_add(dest[3], a1); + dest += dest_stride; + } +} + +void idct8_c(const tran_low_t *input, tran_low_t *output) { + tran_low_t step1[8], step2[8]; + tran_high_t temp1, temp2; + // stage 1 + step1[0] = input[0]; + step1[2] = input[4]; + step1[1] = input[2]; + step1[3] = input[6]; + temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; + temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; + step1[4] = WRAPLOW(dct_const_round_shift(temp1)); + step1[7] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; + temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); + + // stage 2 + temp1 = (step1[0] + step1[2]) * cospi_16_64; + temp2 = (step1[0] - step1[2]) * cospi_16_64; + step2[0] = WRAPLOW(dct_const_round_shift(temp1)); + step2[1] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; + temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; + step2[2] = WRAPLOW(dct_const_round_shift(temp1)); + step2[3] = WRAPLOW(dct_const_round_shift(temp2)); + step2[4] = WRAPLOW(step1[4] + step1[5]); + step2[5] = WRAPLOW(step1[4] - step1[5]); + step2[6] = WRAPLOW(-step1[6] + step1[7]); + step2[7] = WRAPLOW(step1[6] + step1[7]); + + // stage 3 + step1[0] = WRAPLOW(step2[0] + step2[3]); + step1[1] = WRAPLOW(step2[1] + step2[2]); + step1[2] = WRAPLOW(step2[1] - step2[2]); + step1[3] = WRAPLOW(step2[0] - step2[3]); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * cospi_16_64; + temp2 = (step2[5] + step2[6]) * cospi_16_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); + step1[7] = step2[7]; + + // stage 4 + output[0] = WRAPLOW(step1[0] + step1[7]); + output[1] = WRAPLOW(step1[1] + step1[6]); + output[2] = WRAPLOW(step1[2] + step1[5]); + output[3] = WRAPLOW(step1[3] + step1[4]); + output[4] = WRAPLOW(step1[3] - step1[4]); + output[5] = WRAPLOW(step1[2] - step1[5]); + output[6] = WRAPLOW(step1[1] - step1[6]); + output[7] = WRAPLOW(step1[0] - step1[7]); +} + +void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { + tran_low_t out[8 * 8]; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[8], temp_out[8]; + + // First transform rows + for (i = 0; i < 8; ++i) { + idct8_c(input, outptr); + input += 8; + outptr += 8; + } + + // Then transform columns + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j * 8 + i]; + idct8_c(temp_in, temp_out); + for (j = 0; j < 8; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 5)); + } + } +} + +void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { + int i, j; + tran_high_t a1; + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); + a1 = ROUND_POWER_OF_TWO(out, 5); + for (j = 0; j < 8; ++j) { + for (i = 0; i < 8; ++i) + dest[i] = clip_pixel_add(dest[i], a1); + dest += stride; + } +} + +void iadst4_c(const tran_low_t *input, tran_low_t *output) { + tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; + + tran_low_t x0 = input[0]; + tran_low_t x1 = input[1]; + tran_low_t x2 = input[2]; + tran_low_t x3 = input[3]; + + if (!(x0 | x1 | x2 | x3)) { + output[0] = output[1] = output[2] = output[3] = 0; + return; + } + + s0 = sinpi_1_9 * x0; + s1 = sinpi_2_9 * x0; + s2 = sinpi_3_9 * x1; + s3 = sinpi_4_9 * x2; + s4 = sinpi_1_9 * x2; + s5 = sinpi_2_9 * x3; + s6 = sinpi_4_9 * x3; + s7 = WRAPLOW(x0 - x2 + x3); + + s0 = s0 + s3 + s5; + s1 = s1 - s4 - s6; + s3 = s2; + s2 = sinpi_3_9 * s7; + + // 1-D transform scaling factor is sqrt(2). + // The overall dynamic range is 14b (input) + 14b (multiplication scaling) + // + 1b (addition) = 29b. + // Hence the output bit depth is 15b. + output[0] = WRAPLOW(dct_const_round_shift(s0 + s3)); + output[1] = WRAPLOW(dct_const_round_shift(s1 + s3)); + output[2] = WRAPLOW(dct_const_round_shift(s2)); + output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3)); +} + +void iadst8_c(const tran_low_t *input, tran_low_t *output) { + int s0, s1, s2, s3, s4, s5, s6, s7; + + tran_high_t x0 = input[7]; + tran_high_t x1 = input[0]; + tran_high_t x2 = input[5]; + tran_high_t x3 = input[2]; + tran_high_t x4 = input[3]; + tran_high_t x5 = input[4]; + tran_high_t x6 = input[1]; + tran_high_t x7 = input[6]; + + if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { + output[0] = output[1] = output[2] = output[3] = output[4] + = output[5] = output[6] = output[7] = 0; + return; + } + + // stage 1 + s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1); + s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1); + s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3); + s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3); + s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5); + s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5); + s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); + s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); + + x0 = WRAPLOW(dct_const_round_shift(s0 + s4)); + x1 = WRAPLOW(dct_const_round_shift(s1 + s5)); + x2 = WRAPLOW(dct_const_round_shift(s2 + s6)); + x3 = WRAPLOW(dct_const_round_shift(s3 + s7)); + x4 = WRAPLOW(dct_const_round_shift(s0 - s4)); + x5 = WRAPLOW(dct_const_round_shift(s1 - s5)); + x6 = WRAPLOW(dct_const_round_shift(s2 - s6)); + x7 = WRAPLOW(dct_const_round_shift(s3 - s7)); + + // stage 2 + s0 = (int)x0; + s1 = (int)x1; + s2 = (int)x2; + s3 = (int)x3; + s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5); + s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5); + s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7); + s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7); + + x0 = WRAPLOW(s0 + s2); + x1 = WRAPLOW(s1 + s3); + x2 = WRAPLOW(s0 - s2); + x3 = WRAPLOW(s1 - s3); + x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); + x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); + x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); + x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); + + // stage 3 + s2 = (int)(cospi_16_64 * (x2 + x3)); + s3 = (int)(cospi_16_64 * (x2 - x3)); + s6 = (int)(cospi_16_64 * (x6 + x7)); + s7 = (int)(cospi_16_64 * (x6 - x7)); + + x2 = WRAPLOW(dct_const_round_shift(s2)); + x3 = WRAPLOW(dct_const_round_shift(s3)); + x6 = WRAPLOW(dct_const_round_shift(s6)); + x7 = WRAPLOW(dct_const_round_shift(s7)); + + output[0] = WRAPLOW(x0); + output[1] = WRAPLOW(-x4); + output[2] = WRAPLOW(x6); + output[3] = WRAPLOW(-x2); + output[4] = WRAPLOW(x3); + output[5] = WRAPLOW(-x7); + output[6] = WRAPLOW(x5); + output[7] = WRAPLOW(-x1); +} + +void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { + tran_low_t out[8 * 8] = { 0 }; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[8], temp_out[8]; + + // First transform rows + // only first 4 row has non-zero coefs + for (i = 0; i < 4; ++i) { + idct8_c(input, outptr); + input += 8; + outptr += 8; + } + + // Then transform columns + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j * 8 + i]; + idct8_c(temp_in, temp_out); + for (j = 0; j < 8; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 5)); + } + } +} + +void idct16_c(const tran_low_t *input, tran_low_t *output) { + tran_low_t step1[16], step2[16]; + tran_high_t temp1, temp2; + + // stage 1 + step1[0] = input[0/2]; + step1[1] = input[16/2]; + step1[2] = input[8/2]; + step1[3] = input[24/2]; + step1[4] = input[4/2]; + step1[5] = input[20/2]; + step1[6] = input[12/2]; + step1[7] = input[28/2]; + step1[8] = input[2/2]; + step1[9] = input[18/2]; + step1[10] = input[10/2]; + step1[11] = input[26/2]; + step1[12] = input[6/2]; + step1[13] = input[22/2]; + step1[14] = input[14/2]; + step1[15] = input[30/2]; + + // stage 2 + step2[0] = step1[0]; + step2[1] = step1[1]; + step2[2] = step1[2]; + step2[3] = step1[3]; + step2[4] = step1[4]; + step2[5] = step1[5]; + step2[6] = step1[6]; + step2[7] = step1[7]; + + temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; + temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; + step2[8] = WRAPLOW(dct_const_round_shift(temp1)); + step2[15] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; + temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; + temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; + temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); + + // stage 3 + step1[0] = step2[0]; + step1[1] = step2[1]; + step1[2] = step2[2]; + step1[3] = step2[3]; + + temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; + temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; + step1[4] = WRAPLOW(dct_const_round_shift(temp1)); + step1[7] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; + temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); + + step1[8] = WRAPLOW(step2[8] + step2[9]); + step1[9] = WRAPLOW(step2[8] - step2[9]); + step1[10] = WRAPLOW(-step2[10] + step2[11]); + step1[11] = WRAPLOW(step2[10] + step2[11]); + step1[12] = WRAPLOW(step2[12] + step2[13]); + step1[13] = WRAPLOW(step2[12] - step2[13]); + step1[14] = WRAPLOW(-step2[14] + step2[15]); + step1[15] = WRAPLOW(step2[14] + step2[15]); + + // stage 4 + temp1 = (step1[0] + step1[1]) * cospi_16_64; + temp2 = (step1[0] - step1[1]) * cospi_16_64; + step2[0] = WRAPLOW(dct_const_round_shift(temp1)); + step2[1] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; + temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; + step2[2] = WRAPLOW(dct_const_round_shift(temp1)); + step2[3] = WRAPLOW(dct_const_round_shift(temp2)); + step2[4] = WRAPLOW(step1[4] + step1[5]); + step2[5] = WRAPLOW(step1[4] - step1[5]); + step2[6] = WRAPLOW(-step1[6] + step1[7]); + step2[7] = WRAPLOW(step1[6] + step1[7]); + + step2[8] = step1[8]; + step2[15] = step1[15]; + temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; + temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; + temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); + step2[11] = step1[11]; + step2[12] = step1[12]; + + // stage 5 + step1[0] = WRAPLOW(step2[0] + step2[3]); + step1[1] = WRAPLOW(step2[1] + step2[2]); + step1[2] = WRAPLOW(step2[1] - step2[2]); + step1[3] = WRAPLOW(step2[0] - step2[3]); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * cospi_16_64; + temp2 = (step2[5] + step2[6]) * cospi_16_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); + step1[7] = step2[7]; + + step1[8] = WRAPLOW(step2[8] + step2[11]); + step1[9] = WRAPLOW(step2[9] + step2[10]); + step1[10] = WRAPLOW(step2[9] - step2[10]); + step1[11] = WRAPLOW(step2[8] - step2[11]); + step1[12] = WRAPLOW(-step2[12] + step2[15]); + step1[13] = WRAPLOW(-step2[13] + step2[14]); + step1[14] = WRAPLOW(step2[13] + step2[14]); + step1[15] = WRAPLOW(step2[12] + step2[15]); + + // stage 6 + step2[0] = WRAPLOW(step1[0] + step1[7]); + step2[1] = WRAPLOW(step1[1] + step1[6]); + step2[2] = WRAPLOW(step1[2] + step1[5]); + step2[3] = WRAPLOW(step1[3] + step1[4]); + step2[4] = WRAPLOW(step1[3] - step1[4]); + step2[5] = WRAPLOW(step1[2] - step1[5]); + step2[6] = WRAPLOW(step1[1] - step1[6]); + step2[7] = WRAPLOW(step1[0] - step1[7]); + step2[8] = step1[8]; + step2[9] = step1[9]; + temp1 = (-step1[10] + step1[13]) * cospi_16_64; + temp2 = (step1[10] + step1[13]) * cospi_16_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = (-step1[11] + step1[12]) * cospi_16_64; + temp2 = (step1[11] + step1[12]) * cospi_16_64; + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); + step2[14] = step1[14]; + step2[15] = step1[15]; + + // stage 7 + output[0] = WRAPLOW(step2[0] + step2[15]); + output[1] = WRAPLOW(step2[1] + step2[14]); + output[2] = WRAPLOW(step2[2] + step2[13]); + output[3] = WRAPLOW(step2[3] + step2[12]); + output[4] = WRAPLOW(step2[4] + step2[11]); + output[5] = WRAPLOW(step2[5] + step2[10]); + output[6] = WRAPLOW(step2[6] + step2[9]); + output[7] = WRAPLOW(step2[7] + step2[8]); + output[8] = WRAPLOW(step2[7] - step2[8]); + output[9] = WRAPLOW(step2[6] - step2[9]); + output[10] = WRAPLOW(step2[5] - step2[10]); + output[11] = WRAPLOW(step2[4] - step2[11]); + output[12] = WRAPLOW(step2[3] - step2[12]); + output[13] = WRAPLOW(step2[2] - step2[13]); + output[14] = WRAPLOW(step2[1] - step2[14]); + output[15] = WRAPLOW(step2[0] - step2[15]); +} + +void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, + int stride) { + tran_low_t out[16 * 16]; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[16], temp_out[16]; + + // First transform rows + for (i = 0; i < 16; ++i) { + idct16_c(input, outptr); + input += 16; + outptr += 16; + } + + // Then transform columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j * 16 + i]; + idct16_c(temp_in, temp_out); + for (j = 0; j < 16; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 6)); + } + } +} + +void iadst16_c(const tran_low_t *input, tran_low_t *output) { + tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; + tran_high_t s9, s10, s11, s12, s13, s14, s15; + + tran_high_t x0 = input[15]; + tran_high_t x1 = input[0]; + tran_high_t x2 = input[13]; + tran_high_t x3 = input[2]; + tran_high_t x4 = input[11]; + tran_high_t x5 = input[4]; + tran_high_t x6 = input[9]; + tran_high_t x7 = input[6]; + tran_high_t x8 = input[7]; + tran_high_t x9 = input[8]; + tran_high_t x10 = input[5]; + tran_high_t x11 = input[10]; + tran_high_t x12 = input[3]; + tran_high_t x13 = input[12]; + tran_high_t x14 = input[1]; + tran_high_t x15 = input[14]; + + if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 + | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { + output[0] = output[1] = output[2] = output[3] = output[4] + = output[5] = output[6] = output[7] = output[8] + = output[9] = output[10] = output[11] = output[12] + = output[13] = output[14] = output[15] = 0; + return; + } + + // stage 1 + s0 = x0 * cospi_1_64 + x1 * cospi_31_64; + s1 = x0 * cospi_31_64 - x1 * cospi_1_64; + s2 = x2 * cospi_5_64 + x3 * cospi_27_64; + s3 = x2 * cospi_27_64 - x3 * cospi_5_64; + s4 = x4 * cospi_9_64 + x5 * cospi_23_64; + s5 = x4 * cospi_23_64 - x5 * cospi_9_64; + s6 = x6 * cospi_13_64 + x7 * cospi_19_64; + s7 = x6 * cospi_19_64 - x7 * cospi_13_64; + s8 = x8 * cospi_17_64 + x9 * cospi_15_64; + s9 = x8 * cospi_15_64 - x9 * cospi_17_64; + s10 = x10 * cospi_21_64 + x11 * cospi_11_64; + s11 = x10 * cospi_11_64 - x11 * cospi_21_64; + s12 = x12 * cospi_25_64 + x13 * cospi_7_64; + s13 = x12 * cospi_7_64 - x13 * cospi_25_64; + s14 = x14 * cospi_29_64 + x15 * cospi_3_64; + s15 = x14 * cospi_3_64 - x15 * cospi_29_64; + + x0 = WRAPLOW(dct_const_round_shift(s0 + s8)); + x1 = WRAPLOW(dct_const_round_shift(s1 + s9)); + x2 = WRAPLOW(dct_const_round_shift(s2 + s10)); + x3 = WRAPLOW(dct_const_round_shift(s3 + s11)); + x4 = WRAPLOW(dct_const_round_shift(s4 + s12)); + x5 = WRAPLOW(dct_const_round_shift(s5 + s13)); + x6 = WRAPLOW(dct_const_round_shift(s6 + s14)); + x7 = WRAPLOW(dct_const_round_shift(s7 + s15)); + x8 = WRAPLOW(dct_const_round_shift(s0 - s8)); + x9 = WRAPLOW(dct_const_round_shift(s1 - s9)); + x10 = WRAPLOW(dct_const_round_shift(s2 - s10)); + x11 = WRAPLOW(dct_const_round_shift(s3 - s11)); + x12 = WRAPLOW(dct_const_round_shift(s4 - s12)); + x13 = WRAPLOW(dct_const_round_shift(s5 - s13)); + x14 = WRAPLOW(dct_const_round_shift(s6 - s14)); + x15 = WRAPLOW(dct_const_round_shift(s7 - s15)); + + // stage 2 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = x4; + s5 = x5; + s6 = x6; + s7 = x7; + s8 = x8 * cospi_4_64 + x9 * cospi_28_64; + s9 = x8 * cospi_28_64 - x9 * cospi_4_64; + s10 = x10 * cospi_20_64 + x11 * cospi_12_64; + s11 = x10 * cospi_12_64 - x11 * cospi_20_64; + s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; + s13 = x12 * cospi_4_64 + x13 * cospi_28_64; + s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; + s15 = x14 * cospi_20_64 + x15 * cospi_12_64; + + x0 = WRAPLOW(s0 + s4); + x1 = WRAPLOW(s1 + s5); + x2 = WRAPLOW(s2 + s6); + x3 = WRAPLOW(s3 + s7); + x4 = WRAPLOW(s0 - s4); + x5 = WRAPLOW(s1 - s5); + x6 = WRAPLOW(s2 - s6); + x7 = WRAPLOW(s3 - s7); + x8 = WRAPLOW(dct_const_round_shift(s8 + s12)); + x9 = WRAPLOW(dct_const_round_shift(s9 + s13)); + x10 = WRAPLOW(dct_const_round_shift(s10 + s14)); + x11 = WRAPLOW(dct_const_round_shift(s11 + s15)); + x12 = WRAPLOW(dct_const_round_shift(s8 - s12)); + x13 = WRAPLOW(dct_const_round_shift(s9 - s13)); + x14 = WRAPLOW(dct_const_round_shift(s10 - s14)); + x15 = WRAPLOW(dct_const_round_shift(s11 - s15)); + + // stage 3 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = x4 * cospi_8_64 + x5 * cospi_24_64; + s5 = x4 * cospi_24_64 - x5 * cospi_8_64; + s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; + s7 = x6 * cospi_8_64 + x7 * cospi_24_64; + s8 = x8; + s9 = x9; + s10 = x10; + s11 = x11; + s12 = x12 * cospi_8_64 + x13 * cospi_24_64; + s13 = x12 * cospi_24_64 - x13 * cospi_8_64; + s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; + s15 = x14 * cospi_8_64 + x15 * cospi_24_64; + + x0 = WRAPLOW(s0 + s2); + x1 = WRAPLOW(s1 + s3); + x2 = WRAPLOW(s0 - s2); + x3 = WRAPLOW(s1 - s3); + x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); + x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); + x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); + x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); + x8 = WRAPLOW(s8 + s10); + x9 = WRAPLOW(s9 + s11); + x10 = WRAPLOW(s8 - s10); + x11 = WRAPLOW(s9 - s11); + x12 = WRAPLOW(dct_const_round_shift(s12 + s14)); + x13 = WRAPLOW(dct_const_round_shift(s13 + s15)); + x14 = WRAPLOW(dct_const_round_shift(s12 - s14)); + x15 = WRAPLOW(dct_const_round_shift(s13 - s15)); + + // stage 4 + s2 = (- cospi_16_64) * (x2 + x3); + s3 = cospi_16_64 * (x2 - x3); + s6 = cospi_16_64 * (x6 + x7); + s7 = cospi_16_64 * (- x6 + x7); + s10 = cospi_16_64 * (x10 + x11); + s11 = cospi_16_64 * (- x10 + x11); + s14 = (- cospi_16_64) * (x14 + x15); + s15 = cospi_16_64 * (x14 - x15); + + x2 = WRAPLOW(dct_const_round_shift(s2)); + x3 = WRAPLOW(dct_const_round_shift(s3)); + x6 = WRAPLOW(dct_const_round_shift(s6)); + x7 = WRAPLOW(dct_const_round_shift(s7)); + x10 = WRAPLOW(dct_const_round_shift(s10)); + x11 = WRAPLOW(dct_const_round_shift(s11)); + x14 = WRAPLOW(dct_const_round_shift(s14)); + x15 = WRAPLOW(dct_const_round_shift(s15)); + + output[0] = WRAPLOW(x0); + output[1] = WRAPLOW(-x8); + output[2] = WRAPLOW(x12); + output[3] = WRAPLOW(-x4); + output[4] = WRAPLOW(x6); + output[5] = WRAPLOW(x14); + output[6] = WRAPLOW(x10); + output[7] = WRAPLOW(x2); + output[8] = WRAPLOW(x3); + output[9] = WRAPLOW(x11); + output[10] = WRAPLOW(x15); + output[11] = WRAPLOW(x7); + output[12] = WRAPLOW(x5); + output[13] = WRAPLOW(-x13); + output[14] = WRAPLOW(x9); + output[15] = WRAPLOW(-x1); +} + +void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, + int stride) { + tran_low_t out[16 * 16] = { 0 }; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[16], temp_out[16]; + + // First transform rows. Since all non-zero dct coefficients are in + // upper-left 4x4 area, we only need to calculate first 4 rows here. + for (i = 0; i < 4; ++i) { + idct16_c(input, outptr); + input += 16; + outptr += 16; + } + + // Then transform columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j*16 + i]; + idct16_c(temp_in, temp_out); + for (j = 0; j < 16; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 6)); + } + } +} + +void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { + int i, j; + tran_high_t a1; + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); + a1 = ROUND_POWER_OF_TWO(out, 6); + for (j = 0; j < 16; ++j) { + for (i = 0; i < 16; ++i) + dest[i] = clip_pixel_add(dest[i], a1); + dest += stride; + } +} + +void idct32_c(const tran_low_t *input, tran_low_t *output) { + tran_low_t step1[32], step2[32]; + tran_high_t temp1, temp2; + + // stage 1 + step1[0] = input[0]; + step1[1] = input[16]; + step1[2] = input[8]; + step1[3] = input[24]; + step1[4] = input[4]; + step1[5] = input[20]; + step1[6] = input[12]; + step1[7] = input[28]; + step1[8] = input[2]; + step1[9] = input[18]; + step1[10] = input[10]; + step1[11] = input[26]; + step1[12] = input[6]; + step1[13] = input[22]; + step1[14] = input[14]; + step1[15] = input[30]; + + temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; + temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; + step1[16] = WRAPLOW(dct_const_round_shift(temp1)); + step1[31] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; + temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; + step1[17] = WRAPLOW(dct_const_round_shift(temp1)); + step1[30] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; + temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; + step1[18] = WRAPLOW(dct_const_round_shift(temp1)); + step1[29] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; + temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; + step1[19] = WRAPLOW(dct_const_round_shift(temp1)); + step1[28] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; + temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; + step1[20] = WRAPLOW(dct_const_round_shift(temp1)); + step1[27] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; + temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; + temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; + step1[22] = WRAPLOW(dct_const_round_shift(temp1)); + step1[25] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; + temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; + step1[23] = WRAPLOW(dct_const_round_shift(temp1)); + step1[24] = WRAPLOW(dct_const_round_shift(temp2)); + + // stage 2 + step2[0] = step1[0]; + step2[1] = step1[1]; + step2[2] = step1[2]; + step2[3] = step1[3]; + step2[4] = step1[4]; + step2[5] = step1[5]; + step2[6] = step1[6]; + step2[7] = step1[7]; + + temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; + temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; + step2[8] = WRAPLOW(dct_const_round_shift(temp1)); + step2[15] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; + temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; + temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); + + temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; + temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); + + step2[16] = WRAPLOW(step1[16] + step1[17]); + step2[17] = WRAPLOW(step1[16] - step1[17]); + step2[18] = WRAPLOW(-step1[18] + step1[19]); + step2[19] = WRAPLOW(step1[18] + step1[19]); + step2[20] = WRAPLOW(step1[20] + step1[21]); + step2[21] = WRAPLOW(step1[20] - step1[21]); + step2[22] = WRAPLOW(-step1[22] + step1[23]); + step2[23] = WRAPLOW(step1[22] + step1[23]); + step2[24] = WRAPLOW(step1[24] + step1[25]); + step2[25] = WRAPLOW(step1[24] - step1[25]); + step2[26] = WRAPLOW(-step1[26] + step1[27]); + step2[27] = WRAPLOW(step1[26] + step1[27]); + step2[28] = WRAPLOW(step1[28] + step1[29]); + step2[29] = WRAPLOW(step1[28] - step1[29]); + step2[30] = WRAPLOW(-step1[30] + step1[31]); + step2[31] = WRAPLOW(step1[30] + step1[31]); + + // stage 3 + step1[0] = step2[0]; + step1[1] = step2[1]; + step1[2] = step2[2]; + step1[3] = step2[3]; + + temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; + temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; + step1[4] = WRAPLOW(dct_const_round_shift(temp1)); + step1[7] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; + temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); + + step1[8] = WRAPLOW(step2[8] + step2[9]); + step1[9] = WRAPLOW(step2[8] - step2[9]); + step1[10] = WRAPLOW(-step2[10] + step2[11]); + step1[11] = WRAPLOW(step2[10] + step2[11]); + step1[12] = WRAPLOW(step2[12] + step2[13]); + step1[13] = WRAPLOW(step2[12] - step2[13]); + step1[14] = WRAPLOW(-step2[14] + step2[15]); + step1[15] = WRAPLOW(step2[14] + step2[15]); + + step1[16] = step2[16]; + step1[31] = step2[31]; + temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; + temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; + step1[17] = WRAPLOW(dct_const_round_shift(temp1)); + step1[30] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; + temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; + step1[18] = WRAPLOW(dct_const_round_shift(temp1)); + step1[29] = WRAPLOW(dct_const_round_shift(temp2)); + step1[19] = step2[19]; + step1[20] = step2[20]; + temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; + temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; + temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; + step1[22] = WRAPLOW(dct_const_round_shift(temp1)); + step1[25] = WRAPLOW(dct_const_round_shift(temp2)); + step1[23] = step2[23]; + step1[24] = step2[24]; + step1[27] = step2[27]; + step1[28] = step2[28]; + + // stage 4 + temp1 = (step1[0] + step1[1]) * cospi_16_64; + temp2 = (step1[0] - step1[1]) * cospi_16_64; + step2[0] = WRAPLOW(dct_const_round_shift(temp1)); + step2[1] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; + temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; + step2[2] = WRAPLOW(dct_const_round_shift(temp1)); + step2[3] = WRAPLOW(dct_const_round_shift(temp2)); + step2[4] = WRAPLOW(step1[4] + step1[5]); + step2[5] = WRAPLOW(step1[4] - step1[5]); + step2[6] = WRAPLOW(-step1[6] + step1[7]); + step2[7] = WRAPLOW(step1[6] + step1[7]); + + step2[8] = step1[8]; + step2[15] = step1[15]; + temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; + temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; + step2[9] = WRAPLOW(dct_const_round_shift(temp1)); + step2[14] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; + temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); + step2[11] = step1[11]; + step2[12] = step1[12]; + + step2[16] = WRAPLOW(step1[16] + step1[19]); + step2[17] = WRAPLOW(step1[17] + step1[18]); + step2[18] = WRAPLOW(step1[17] - step1[18]); + step2[19] = WRAPLOW(step1[16] - step1[19]); + step2[20] = WRAPLOW(-step1[20] + step1[23]); + step2[21] = WRAPLOW(-step1[21] + step1[22]); + step2[22] = WRAPLOW(step1[21] + step1[22]); + step2[23] = WRAPLOW(step1[20] + step1[23]); + + step2[24] = WRAPLOW(step1[24] + step1[27]); + step2[25] = WRAPLOW(step1[25] + step1[26]); + step2[26] = WRAPLOW(step1[25] - step1[26]); + step2[27] = WRAPLOW(step1[24] - step1[27]); + step2[28] = WRAPLOW(-step1[28] + step1[31]); + step2[29] = WRAPLOW(-step1[29] + step1[30]); + step2[30] = WRAPLOW(step1[29] + step1[30]); + step2[31] = WRAPLOW(step1[28] + step1[31]); + + // stage 5 + step1[0] = WRAPLOW(step2[0] + step2[3]); + step1[1] = WRAPLOW(step2[1] + step2[2]); + step1[2] = WRAPLOW(step2[1] - step2[2]); + step1[3] = WRAPLOW(step2[0] - step2[3]); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * cospi_16_64; + temp2 = (step2[5] + step2[6]) * cospi_16_64; + step1[5] = WRAPLOW(dct_const_round_shift(temp1)); + step1[6] = WRAPLOW(dct_const_round_shift(temp2)); + step1[7] = step2[7]; + + step1[8] = WRAPLOW(step2[8] + step2[11]); + step1[9] = WRAPLOW(step2[9] + step2[10]); + step1[10] = WRAPLOW(step2[9] - step2[10]); + step1[11] = WRAPLOW(step2[8] - step2[11]); + step1[12] = WRAPLOW(-step2[12] + step2[15]); + step1[13] = WRAPLOW(-step2[13] + step2[14]); + step1[14] = WRAPLOW(step2[13] + step2[14]); + step1[15] = WRAPLOW(step2[12] + step2[15]); + + step1[16] = step2[16]; + step1[17] = step2[17]; + temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; + temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; + step1[18] = WRAPLOW(dct_const_round_shift(temp1)); + step1[29] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; + temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; + step1[19] = WRAPLOW(dct_const_round_shift(temp1)); + step1[28] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; + temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; + step1[20] = WRAPLOW(dct_const_round_shift(temp1)); + step1[27] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; + temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); + step1[22] = step2[22]; + step1[23] = step2[23]; + step1[24] = step2[24]; + step1[25] = step2[25]; + step1[30] = step2[30]; + step1[31] = step2[31]; + + // stage 6 + step2[0] = WRAPLOW(step1[0] + step1[7]); + step2[1] = WRAPLOW(step1[1] + step1[6]); + step2[2] = WRAPLOW(step1[2] + step1[5]); + step2[3] = WRAPLOW(step1[3] + step1[4]); + step2[4] = WRAPLOW(step1[3] - step1[4]); + step2[5] = WRAPLOW(step1[2] - step1[5]); + step2[6] = WRAPLOW(step1[1] - step1[6]); + step2[7] = WRAPLOW(step1[0] - step1[7]); + step2[8] = step1[8]; + step2[9] = step1[9]; + temp1 = (-step1[10] + step1[13]) * cospi_16_64; + temp2 = (step1[10] + step1[13]) * cospi_16_64; + step2[10] = WRAPLOW(dct_const_round_shift(temp1)); + step2[13] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = (-step1[11] + step1[12]) * cospi_16_64; + temp2 = (step1[11] + step1[12]) * cospi_16_64; + step2[11] = WRAPLOW(dct_const_round_shift(temp1)); + step2[12] = WRAPLOW(dct_const_round_shift(temp2)); + step2[14] = step1[14]; + step2[15] = step1[15]; + + step2[16] = WRAPLOW(step1[16] + step1[23]); + step2[17] = WRAPLOW(step1[17] + step1[22]); + step2[18] = WRAPLOW(step1[18] + step1[21]); + step2[19] = WRAPLOW(step1[19] + step1[20]); + step2[20] = WRAPLOW(step1[19] - step1[20]); + step2[21] = WRAPLOW(step1[18] - step1[21]); + step2[22] = WRAPLOW(step1[17] - step1[22]); + step2[23] = WRAPLOW(step1[16] - step1[23]); + + step2[24] = WRAPLOW(-step1[24] + step1[31]); + step2[25] = WRAPLOW(-step1[25] + step1[30]); + step2[26] = WRAPLOW(-step1[26] + step1[29]); + step2[27] = WRAPLOW(-step1[27] + step1[28]); + step2[28] = WRAPLOW(step1[27] + step1[28]); + step2[29] = WRAPLOW(step1[26] + step1[29]); + step2[30] = WRAPLOW(step1[25] + step1[30]); + step2[31] = WRAPLOW(step1[24] + step1[31]); + + // stage 7 + step1[0] = WRAPLOW(step2[0] + step2[15]); + step1[1] = WRAPLOW(step2[1] + step2[14]); + step1[2] = WRAPLOW(step2[2] + step2[13]); + step1[3] = WRAPLOW(step2[3] + step2[12]); + step1[4] = WRAPLOW(step2[4] + step2[11]); + step1[5] = WRAPLOW(step2[5] + step2[10]); + step1[6] = WRAPLOW(step2[6] + step2[9]); + step1[7] = WRAPLOW(step2[7] + step2[8]); + step1[8] = WRAPLOW(step2[7] - step2[8]); + step1[9] = WRAPLOW(step2[6] - step2[9]); + step1[10] = WRAPLOW(step2[5] - step2[10]); + step1[11] = WRAPLOW(step2[4] - step2[11]); + step1[12] = WRAPLOW(step2[3] - step2[12]); + step1[13] = WRAPLOW(step2[2] - step2[13]); + step1[14] = WRAPLOW(step2[1] - step2[14]); + step1[15] = WRAPLOW(step2[0] - step2[15]); + + step1[16] = step2[16]; + step1[17] = step2[17]; + step1[18] = step2[18]; + step1[19] = step2[19]; + temp1 = (-step2[20] + step2[27]) * cospi_16_64; + temp2 = (step2[20] + step2[27]) * cospi_16_64; + step1[20] = WRAPLOW(dct_const_round_shift(temp1)); + step1[27] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = (-step2[21] + step2[26]) * cospi_16_64; + temp2 = (step2[21] + step2[26]) * cospi_16_64; + step1[21] = WRAPLOW(dct_const_round_shift(temp1)); + step1[26] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = (-step2[22] + step2[25]) * cospi_16_64; + temp2 = (step2[22] + step2[25]) * cospi_16_64; + step1[22] = WRAPLOW(dct_const_round_shift(temp1)); + step1[25] = WRAPLOW(dct_const_round_shift(temp2)); + temp1 = (-step2[23] + step2[24]) * cospi_16_64; + temp2 = (step2[23] + step2[24]) * cospi_16_64; + step1[23] = WRAPLOW(dct_const_round_shift(temp1)); + step1[24] = WRAPLOW(dct_const_round_shift(temp2)); + step1[28] = step2[28]; + step1[29] = step2[29]; + step1[30] = step2[30]; + step1[31] = step2[31]; + + // final stage + output[0] = WRAPLOW(step1[0] + step1[31]); + output[1] = WRAPLOW(step1[1] + step1[30]); + output[2] = WRAPLOW(step1[2] + step1[29]); + output[3] = WRAPLOW(step1[3] + step1[28]); + output[4] = WRAPLOW(step1[4] + step1[27]); + output[5] = WRAPLOW(step1[5] + step1[26]); + output[6] = WRAPLOW(step1[6] + step1[25]); + output[7] = WRAPLOW(step1[7] + step1[24]); + output[8] = WRAPLOW(step1[8] + step1[23]); + output[9] = WRAPLOW(step1[9] + step1[22]); + output[10] = WRAPLOW(step1[10] + step1[21]); + output[11] = WRAPLOW(step1[11] + step1[20]); + output[12] = WRAPLOW(step1[12] + step1[19]); + output[13] = WRAPLOW(step1[13] + step1[18]); + output[14] = WRAPLOW(step1[14] + step1[17]); + output[15] = WRAPLOW(step1[15] + step1[16]); + output[16] = WRAPLOW(step1[15] - step1[16]); + output[17] = WRAPLOW(step1[14] - step1[17]); + output[18] = WRAPLOW(step1[13] - step1[18]); + output[19] = WRAPLOW(step1[12] - step1[19]); + output[20] = WRAPLOW(step1[11] - step1[20]); + output[21] = WRAPLOW(step1[10] - step1[21]); + output[22] = WRAPLOW(step1[9] - step1[22]); + output[23] = WRAPLOW(step1[8] - step1[23]); + output[24] = WRAPLOW(step1[7] - step1[24]); + output[25] = WRAPLOW(step1[6] - step1[25]); + output[26] = WRAPLOW(step1[5] - step1[26]); + output[27] = WRAPLOW(step1[4] - step1[27]); + output[28] = WRAPLOW(step1[3] - step1[28]); + output[29] = WRAPLOW(step1[2] - step1[29]); + output[30] = WRAPLOW(step1[1] - step1[30]); + output[31] = WRAPLOW(step1[0] - step1[31]); +} + +void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, + int stride) { + tran_low_t out[32 * 32]; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[32], temp_out[32]; + + // Rows + for (i = 0; i < 32; ++i) { + int16_t zero_coeff[16]; + for (j = 0; j < 16; ++j) + zero_coeff[j] = input[2 * j] | input[2 * j + 1]; + for (j = 0; j < 8; ++j) + zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; + for (j = 0; j < 4; ++j) + zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; + for (j = 0; j < 2; ++j) + zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; + + if (zero_coeff[0] | zero_coeff[1]) + idct32_c(input, outptr); + else + memset(outptr, 0, sizeof(tran_low_t) * 32); + input += 32; + outptr += 32; + } + + // Columns + for (i = 0; i < 32; ++i) { + for (j = 0; j < 32; ++j) + temp_in[j] = out[j * 32 + i]; + idct32_c(temp_in, temp_out); + for (j = 0; j < 32; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 6)); + } + } +} + +void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, + int stride) { + tran_low_t out[32 * 32] = {0}; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[32], temp_out[32]; + + // Rows + // only upper-left 16x16 has non-zero coeff + for (i = 0; i < 16; ++i) { + idct32_c(input, outptr); + input += 32; + outptr += 32; + } + + // Columns + for (i = 0; i < 32; ++i) { + for (j = 0; j < 32; ++j) + temp_in[j] = out[j * 32 + i]; + idct32_c(temp_in, temp_out); + for (j = 0; j < 32; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 6)); + } + } +} + +void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, + int stride) { + tran_low_t out[32 * 32] = {0}; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[32], temp_out[32]; + + // Rows + // only upper-left 8x8 has non-zero coeff + for (i = 0; i < 8; ++i) { + idct32_c(input, outptr); + input += 32; + outptr += 32; + } + + // Columns + for (i = 0; i < 32; ++i) { + for (j = 0; j < 32; ++j) + temp_in[j] = out[j * 32 + i]; + idct32_c(temp_in, temp_out); + for (j = 0; j < 32; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 6)); + } + } +} + +void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { + int i, j; + tran_high_t a1; + + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); + a1 = ROUND_POWER_OF_TWO(out, 6); + + for (j = 0; j < 32; ++j) { + for (i = 0; i < 32; ++i) + dest[i] = clip_pixel_add(dest[i], a1); + dest += stride; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, + 0.5 shifts per pixel. */ + int i; + tran_low_t output[16]; + tran_high_t a1, b1, c1, d1, e1; + const tran_low_t *ip = input; + tran_low_t *op = output; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + for (i = 0; i < 4; i++) { + a1 = ip[0] >> UNIT_QUANT_SHIFT; + c1 = ip[1] >> UNIT_QUANT_SHIFT; + d1 = ip[2] >> UNIT_QUANT_SHIFT; + b1 = ip[3] >> UNIT_QUANT_SHIFT; + a1 += c1; + d1 -= b1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= b1; + d1 += c1; + op[0] = HIGHBD_WRAPLOW(a1, bd); + op[1] = HIGHBD_WRAPLOW(b1, bd); + op[2] = HIGHBD_WRAPLOW(c1, bd); + op[3] = HIGHBD_WRAPLOW(d1, bd); + ip += 4; + op += 4; + } + + ip = output; + for (i = 0; i < 4; i++) { + a1 = ip[4 * 0]; + c1 = ip[4 * 1]; + d1 = ip[4 * 2]; + b1 = ip[4 * 3]; + a1 += c1; + d1 -= b1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= b1; + d1 += c1; + dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], + HIGHBD_WRAPLOW(a1, bd), bd); + dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], + HIGHBD_WRAPLOW(b1, bd), bd); + dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], + HIGHBD_WRAPLOW(c1, bd), bd); + dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], + HIGHBD_WRAPLOW(d1, bd), bd); + + ip++; + dest++; + } +} + +void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, + int dest_stride, int bd) { + int i; + tran_high_t a1, e1; + tran_low_t tmp[4]; + const tran_low_t *ip = in; + tran_low_t *op = tmp; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + (void) bd; + + a1 = ip[0] >> UNIT_QUANT_SHIFT; + e1 = a1 >> 1; + a1 -= e1; + op[0] = HIGHBD_WRAPLOW(a1, bd); + op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd); + + ip = tmp; + for (i = 0; i < 4; i++) { + e1 = ip[0] >> 1; + a1 = ip[0] - e1; + dest[dest_stride * 0] = highbd_clip_pixel_add( + dest[dest_stride * 0], a1, bd); + dest[dest_stride * 1] = highbd_clip_pixel_add( + dest[dest_stride * 1], e1, bd); + dest[dest_stride * 2] = highbd_clip_pixel_add( + dest[dest_stride * 2], e1, bd); + dest[dest_stride * 3] = highbd_clip_pixel_add( + dest[dest_stride * 3], e1, bd); + ip++; + dest++; + } +} + +void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { + tran_low_t step[4]; + tran_high_t temp1, temp2; + (void) bd; + // stage 1 + temp1 = (input[0] + input[2]) * cospi_16_64; + temp2 = (input[0] - input[2]) * cospi_16_64; + step[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; + temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; + step[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + // stage 2 + output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd); + output[1] = HIGHBD_WRAPLOW(step[1] + step[2], bd); + output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd); + output[3] = HIGHBD_WRAPLOW(step[0] - step[3], bd); +} + +void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[4 * 4]; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[4], temp_out[4]; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + // Rows + for (i = 0; i < 4; ++i) { + vpx_highbd_idct4_c(input, outptr, bd); + input += 4; + outptr += 4; + } + + // Columns + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = out[j * 4 + i]; + vpx_highbd_idct4_c(temp_in, temp_out, bd); + for (j = 0; j < 4; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); + } + } +} + +void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, + int dest_stride, int bd) { + int i; + tran_high_t a1; + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); + a1 = ROUND_POWER_OF_TWO(out, 4); + + for (i = 0; i < 4; i++) { + dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); + dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); + dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); + dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); + dest += dest_stride; + } +} + +void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { + tran_low_t step1[8], step2[8]; + tran_high_t temp1, temp2; + // stage 1 + step1[0] = input[0]; + step1[2] = input[4]; + step1[1] = input[2]; + step1[3] = input[6]; + temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; + temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; + step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; + temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + // stage 2 & stage 3 - even half + vpx_highbd_idct4_c(step1, step1, bd); + + // stage 2 - odd half + step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); + step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); + step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); + + // stage 3 - odd half + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * cospi_16_64; + temp2 = (step2[5] + step2[6]) * cospi_16_64; + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step1[7] = step2[7]; + + // stage 4 + output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); + output[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); + output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); + output[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); + output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); + output[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); + output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); + output[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); +} + +void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[8 * 8]; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[8], temp_out[8]; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + // First transform rows. + for (i = 0; i < 8; ++i) { + vpx_highbd_idct8_c(input, outptr, bd); + input += 8; + outptr += 8; + } + + // Then transform columns. + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j * 8 + i]; + vpx_highbd_idct8_c(temp_in, temp_out, bd); + for (j = 0; j < 8; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); + } + } +} + +void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + int i, j; + tran_high_t a1; + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); + a1 = ROUND_POWER_OF_TWO(out, 5); + for (j = 0; j < 8; ++j) { + for (i = 0; i < 8; ++i) + dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); + dest += stride; + } +} + +void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { + tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; + + tran_low_t x0 = input[0]; + tran_low_t x1 = input[1]; + tran_low_t x2 = input[2]; + tran_low_t x3 = input[3]; + (void) bd; + + if (!(x0 | x1 | x2 | x3)) { + memset(output, 0, 4 * sizeof(*output)); + return; + } + + s0 = sinpi_1_9 * x0; + s1 = sinpi_2_9 * x0; + s2 = sinpi_3_9 * x1; + s3 = sinpi_4_9 * x2; + s4 = sinpi_1_9 * x2; + s5 = sinpi_2_9 * x3; + s6 = sinpi_4_9 * x3; + s7 = (tran_high_t)HIGHBD_WRAPLOW(x0 - x2 + x3, bd); + + s0 = s0 + s3 + s5; + s1 = s1 - s4 - s6; + s3 = s2; + s2 = sinpi_3_9 * s7; + + // 1-D transform scaling factor is sqrt(2). + // The overall dynamic range is 14b (input) + 14b (multiplication scaling) + // + 1b (addition) = 29b. + // Hence the output bit depth is 15b. + output[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s3), bd); + output[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s3), bd); + output[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); + output[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3), bd); +} + +void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { + tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; + + tran_low_t x0 = input[7]; + tran_low_t x1 = input[0]; + tran_low_t x2 = input[5]; + tran_low_t x3 = input[2]; + tran_low_t x4 = input[3]; + tran_low_t x5 = input[4]; + tran_low_t x6 = input[1]; + tran_low_t x7 = input[6]; + (void) bd; + + if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { + memset(output, 0, 8 * sizeof(*output)); + return; + } + + // stage 1 + s0 = cospi_2_64 * x0 + cospi_30_64 * x1; + s1 = cospi_30_64 * x0 - cospi_2_64 * x1; + s2 = cospi_10_64 * x2 + cospi_22_64 * x3; + s3 = cospi_22_64 * x2 - cospi_10_64 * x3; + s4 = cospi_18_64 * x4 + cospi_14_64 * x5; + s5 = cospi_14_64 * x4 - cospi_18_64 * x5; + s6 = cospi_26_64 * x6 + cospi_6_64 * x7; + s7 = cospi_6_64 * x6 - cospi_26_64 * x7; + + x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s4), bd); + x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s5), bd); + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s6), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s7), bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s4), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s5), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s7), bd); + + // stage 2 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = cospi_8_64 * x4 + cospi_24_64 * x5; + s5 = cospi_24_64 * x4 - cospi_8_64 * x5; + s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; + s7 = cospi_8_64 * x6 + cospi_24_64 * x7; + + x0 = HIGHBD_WRAPLOW(s0 + s2, bd); + x1 = HIGHBD_WRAPLOW(s1 + s3, bd); + x2 = HIGHBD_WRAPLOW(s0 - s2, bd); + x3 = HIGHBD_WRAPLOW(s1 - s3, bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd); + + // stage 3 + s2 = cospi_16_64 * (x2 + x3); + s3 = cospi_16_64 * (x2 - x3); + s6 = cospi_16_64 * (x6 + x7); + s7 = cospi_16_64 * (x6 - x7); + + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd); + + output[0] = HIGHBD_WRAPLOW(x0, bd); + output[1] = HIGHBD_WRAPLOW(-x4, bd); + output[2] = HIGHBD_WRAPLOW(x6, bd); + output[3] = HIGHBD_WRAPLOW(-x2, bd); + output[4] = HIGHBD_WRAPLOW(x3, bd); + output[5] = HIGHBD_WRAPLOW(-x7, bd); + output[6] = HIGHBD_WRAPLOW(x5, bd); + output[7] = HIGHBD_WRAPLOW(-x1, bd); +} + +void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[8 * 8] = { 0 }; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[8], temp_out[8]; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + // First transform rows. + // Only first 4 row has non-zero coefs. + for (i = 0; i < 4; ++i) { + vpx_highbd_idct8_c(input, outptr, bd); + input += 8; + outptr += 8; + } + // Then transform columns. + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j * 8 + i]; + vpx_highbd_idct8_c(temp_in, temp_out, bd); + for (j = 0; j < 8; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); + } + } +} + +void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { + tran_low_t step1[16], step2[16]; + tran_high_t temp1, temp2; + (void) bd; + + // stage 1 + step1[0] = input[0/2]; + step1[1] = input[16/2]; + step1[2] = input[8/2]; + step1[3] = input[24/2]; + step1[4] = input[4/2]; + step1[5] = input[20/2]; + step1[6] = input[12/2]; + step1[7] = input[28/2]; + step1[8] = input[2/2]; + step1[9] = input[18/2]; + step1[10] = input[10/2]; + step1[11] = input[26/2]; + step1[12] = input[6/2]; + step1[13] = input[22/2]; + step1[14] = input[14/2]; + step1[15] = input[30/2]; + + // stage 2 + step2[0] = step1[0]; + step2[1] = step1[1]; + step2[2] = step1[2]; + step2[3] = step1[3]; + step2[4] = step1[4]; + step2[5] = step1[5]; + step2[6] = step1[6]; + step2[7] = step1[7]; + + temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; + temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; + step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; + temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; + temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; + temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + // stage 3 + step1[0] = step2[0]; + step1[1] = step2[1]; + step1[2] = step2[2]; + step1[3] = step2[3]; + + temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; + temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; + step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; + temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); + step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); + step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); + step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); + step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); + step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); + step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); + + // stage 4 + temp1 = (step1[0] + step1[1]) * cospi_16_64; + temp2 = (step1[0] - step1[1]) * cospi_16_64; + step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; + temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; + step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); + step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); + step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); + + step2[8] = step1[8]; + step2[15] = step1[15]; + temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; + temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; + temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step2[11] = step1[11]; + step2[12] = step1[12]; + + // stage 5 + step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); + step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); + step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); + step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * cospi_16_64; + temp2 = (step2[5] + step2[6]) * cospi_16_64; + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step1[7] = step2[7]; + + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); + step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); + step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); + step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); + step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); + step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); + step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); + + // stage 6 + step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); + step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); + step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); + step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); + step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); + step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); + step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); + step2[8] = step1[8]; + step2[9] = step1[9]; + temp1 = (-step1[10] + step1[13]) * cospi_16_64; + temp2 = (step1[10] + step1[13]) * cospi_16_64; + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = (-step1[11] + step1[12]) * cospi_16_64; + temp2 = (step1[11] + step1[12]) * cospi_16_64; + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step2[14] = step1[14]; + step2[15] = step1[15]; + + // stage 7 + output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); + output[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); + output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); + output[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); + output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); + output[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); + output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); + output[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); + output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); + output[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); + output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); + output[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); + output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); + output[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); + output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); + output[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); +} + +void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[16 * 16]; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[16], temp_out[16]; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + // First transform rows. + for (i = 0; i < 16; ++i) { + vpx_highbd_idct16_c(input, outptr, bd); + input += 16; + outptr += 16; + } + + // Then transform columns. + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j * 16 + i]; + vpx_highbd_idct16_c(temp_in, temp_out, bd); + for (j = 0; j < 16; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + } + } +} + +void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { + tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; + tran_high_t s9, s10, s11, s12, s13, s14, s15; + + tran_low_t x0 = input[15]; + tran_low_t x1 = input[0]; + tran_low_t x2 = input[13]; + tran_low_t x3 = input[2]; + tran_low_t x4 = input[11]; + tran_low_t x5 = input[4]; + tran_low_t x6 = input[9]; + tran_low_t x7 = input[6]; + tran_low_t x8 = input[7]; + tran_low_t x9 = input[8]; + tran_low_t x10 = input[5]; + tran_low_t x11 = input[10]; + tran_low_t x12 = input[3]; + tran_low_t x13 = input[12]; + tran_low_t x14 = input[1]; + tran_low_t x15 = input[14]; + (void) bd; + + if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 + | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { + memset(output, 0, 16 * sizeof(*output)); + return; + } + + // stage 1 + s0 = x0 * cospi_1_64 + x1 * cospi_31_64; + s1 = x0 * cospi_31_64 - x1 * cospi_1_64; + s2 = x2 * cospi_5_64 + x3 * cospi_27_64; + s3 = x2 * cospi_27_64 - x3 * cospi_5_64; + s4 = x4 * cospi_9_64 + x5 * cospi_23_64; + s5 = x4 * cospi_23_64 - x5 * cospi_9_64; + s6 = x6 * cospi_13_64 + x7 * cospi_19_64; + s7 = x6 * cospi_19_64 - x7 * cospi_13_64; + s8 = x8 * cospi_17_64 + x9 * cospi_15_64; + s9 = x8 * cospi_15_64 - x9 * cospi_17_64; + s10 = x10 * cospi_21_64 + x11 * cospi_11_64; + s11 = x10 * cospi_11_64 - x11 * cospi_21_64; + s12 = x12 * cospi_25_64 + x13 * cospi_7_64; + s13 = x12 * cospi_7_64 - x13 * cospi_25_64; + s14 = x14 * cospi_29_64 + x15 * cospi_3_64; + s15 = x14 * cospi_3_64 - x15 * cospi_29_64; + + x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s8), bd); + x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s9), bd); + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s10), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s11), bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s12), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s13), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 + s14), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 + s15), bd); + x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s8), bd); + x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s9), bd); + x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s10), bd); + x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s11), bd); + x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s12), bd); + x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s13), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 - s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 - s15), bd); + + // stage 2 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = x4; + s5 = x5; + s6 = x6; + s7 = x7; + s8 = x8 * cospi_4_64 + x9 * cospi_28_64; + s9 = x8 * cospi_28_64 - x9 * cospi_4_64; + s10 = x10 * cospi_20_64 + x11 * cospi_12_64; + s11 = x10 * cospi_12_64 - x11 * cospi_20_64; + s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; + s13 = x12 * cospi_4_64 + x13 * cospi_28_64; + s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; + s15 = x14 * cospi_20_64 + x15 * cospi_12_64; + + x0 = HIGHBD_WRAPLOW(s0 + s4, bd); + x1 = HIGHBD_WRAPLOW(s1 + s5, bd); + x2 = HIGHBD_WRAPLOW(s2 + s6, bd); + x3 = HIGHBD_WRAPLOW(s3 + s7, bd); + x4 = HIGHBD_WRAPLOW(s0 - s4, bd); + x5 = HIGHBD_WRAPLOW(s1 - s5, bd); + x6 = HIGHBD_WRAPLOW(s2 - s6, bd); + x7 = HIGHBD_WRAPLOW(s3 - s7, bd); + x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 + s12), bd); + x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 + s13), bd); + x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 + s14), bd); + x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 + s15), bd); + x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 - s12), bd); + x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 - s13), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 - s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 - s15), bd); + + // stage 3 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = x4 * cospi_8_64 + x5 * cospi_24_64; + s5 = x4 * cospi_24_64 - x5 * cospi_8_64; + s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; + s7 = x6 * cospi_8_64 + x7 * cospi_24_64; + s8 = x8; + s9 = x9; + s10 = x10; + s11 = x11; + s12 = x12 * cospi_8_64 + x13 * cospi_24_64; + s13 = x12 * cospi_24_64 - x13 * cospi_8_64; + s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; + s15 = x14 * cospi_8_64 + x15 * cospi_24_64; + + x0 = HIGHBD_WRAPLOW(s0 + s2, bd); + x1 = HIGHBD_WRAPLOW(s1 + s3, bd); + x2 = HIGHBD_WRAPLOW(s0 - s2, bd); + x3 = HIGHBD_WRAPLOW(s1 - s3, bd); + x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd); + x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd); + x8 = HIGHBD_WRAPLOW(s8 + s10, bd); + x9 = HIGHBD_WRAPLOW(s9 + s11, bd); + x10 = HIGHBD_WRAPLOW(s8 - s10, bd); + x11 = HIGHBD_WRAPLOW(s9 - s11, bd); + x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 + s14), bd); + x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 + s15), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 - s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 - s15), bd); + + // stage 4 + s2 = (- cospi_16_64) * (x2 + x3); + s3 = cospi_16_64 * (x2 - x3); + s6 = cospi_16_64 * (x6 + x7); + s7 = cospi_16_64 * (-x6 + x7); + s10 = cospi_16_64 * (x10 + x11); + s11 = cospi_16_64 * (-x10 + x11); + s14 = (- cospi_16_64) * (x14 + x15); + s15 = cospi_16_64 * (x14 - x15); + + x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); + x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd); + x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd); + x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd); + x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10), bd); + x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11), bd); + x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s14), bd); + x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s15), bd); + + output[0] = HIGHBD_WRAPLOW(x0, bd); + output[1] = HIGHBD_WRAPLOW(-x8, bd); + output[2] = HIGHBD_WRAPLOW(x12, bd); + output[3] = HIGHBD_WRAPLOW(-x4, bd); + output[4] = HIGHBD_WRAPLOW(x6, bd); + output[5] = HIGHBD_WRAPLOW(x14, bd); + output[6] = HIGHBD_WRAPLOW(x10, bd); + output[7] = HIGHBD_WRAPLOW(x2, bd); + output[8] = HIGHBD_WRAPLOW(x3, bd); + output[9] = HIGHBD_WRAPLOW(x11, bd); + output[10] = HIGHBD_WRAPLOW(x15, bd); + output[11] = HIGHBD_WRAPLOW(x7, bd); + output[12] = HIGHBD_WRAPLOW(x5, bd); + output[13] = HIGHBD_WRAPLOW(-x13, bd); + output[14] = HIGHBD_WRAPLOW(x9, bd); + output[15] = HIGHBD_WRAPLOW(-x1, bd); +} + +void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[16 * 16] = { 0 }; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[16], temp_out[16]; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + // First transform rows. Since all non-zero dct coefficients are in + // upper-left 4x4 area, we only need to calculate first 4 rows here. + for (i = 0; i < 4; ++i) { + vpx_highbd_idct16_c(input, outptr, bd); + input += 16; + outptr += 16; + } + + // Then transform columns. + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j*16 + i]; + vpx_highbd_idct16_c(temp_in, temp_out, bd); + for (j = 0; j < 16; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + } + } +} + +void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + int i, j; + tran_high_t a1; + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); + a1 = ROUND_POWER_OF_TWO(out, 6); + for (j = 0; j < 16; ++j) { + for (i = 0; i < 16; ++i) + dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); + dest += stride; + } +} + +static void highbd_idct32_c(const tran_low_t *input, + tran_low_t *output, int bd) { + tran_low_t step1[32], step2[32]; + tran_high_t temp1, temp2; + (void) bd; + + // stage 1 + step1[0] = input[0]; + step1[1] = input[16]; + step1[2] = input[8]; + step1[3] = input[24]; + step1[4] = input[4]; + step1[5] = input[20]; + step1[6] = input[12]; + step1[7] = input[28]; + step1[8] = input[2]; + step1[9] = input[18]; + step1[10] = input[10]; + step1[11] = input[26]; + step1[12] = input[6]; + step1[13] = input[22]; + step1[14] = input[14]; + step1[15] = input[30]; + + temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; + temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; + step1[16] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[31] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; + temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; + step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; + temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; + step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; + temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; + step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; + temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; + step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; + temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; + temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; + step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; + temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; + step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + // stage 2 + step2[0] = step1[0]; + step2[1] = step1[1]; + step2[2] = step1[2]; + step2[3] = step1[3]; + step2[4] = step1[4]; + step2[5] = step1[5]; + step2[6] = step1[6]; + step2[7] = step1[7]; + + temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; + temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; + step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; + temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; + temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; + temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[17], bd); + step2[17] = HIGHBD_WRAPLOW(step1[16] - step1[17], bd); + step2[18] = HIGHBD_WRAPLOW(-step1[18] + step1[19], bd); + step2[19] = HIGHBD_WRAPLOW(step1[18] + step1[19], bd); + step2[20] = HIGHBD_WRAPLOW(step1[20] + step1[21], bd); + step2[21] = HIGHBD_WRAPLOW(step1[20] - step1[21], bd); + step2[22] = HIGHBD_WRAPLOW(-step1[22] + step1[23], bd); + step2[23] = HIGHBD_WRAPLOW(step1[22] + step1[23], bd); + step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[25], bd); + step2[25] = HIGHBD_WRAPLOW(step1[24] - step1[25], bd); + step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[27], bd); + step2[27] = HIGHBD_WRAPLOW(step1[26] + step1[27], bd); + step2[28] = HIGHBD_WRAPLOW(step1[28] + step1[29], bd); + step2[29] = HIGHBD_WRAPLOW(step1[28] - step1[29], bd); + step2[30] = HIGHBD_WRAPLOW(-step1[30] + step1[31], bd); + step2[31] = HIGHBD_WRAPLOW(step1[30] + step1[31], bd); + + // stage 3 + step1[0] = step2[0]; + step1[1] = step2[1]; + step1[2] = step2[2]; + step1[3] = step2[3]; + + temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; + temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; + step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; + temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); + step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); + step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); + step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); + step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); + step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); + step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); + + step1[16] = step2[16]; + step1[31] = step2[31]; + temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; + temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; + step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; + temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; + step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step1[19] = step2[19]; + step1[20] = step2[20]; + temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; + temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; + temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; + step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step1[23] = step2[23]; + step1[24] = step2[24]; + step1[27] = step2[27]; + step1[28] = step2[28]; + + // stage 4 + temp1 = (step1[0] + step1[1]) * cospi_16_64; + temp2 = (step1[0] - step1[1]) * cospi_16_64; + step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; + temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; + step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); + step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); + step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); + + step2[8] = step1[8]; + step2[15] = step1[15]; + temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; + temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; + step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; + temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step2[11] = step1[11]; + step2[12] = step1[12]; + + step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[19], bd); + step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[18], bd); + step2[18] = HIGHBD_WRAPLOW(step1[17] - step1[18], bd); + step2[19] = HIGHBD_WRAPLOW(step1[16] - step1[19], bd); + step2[20] = HIGHBD_WRAPLOW(-step1[20] + step1[23], bd); + step2[21] = HIGHBD_WRAPLOW(-step1[21] + step1[22], bd); + step2[22] = HIGHBD_WRAPLOW(step1[21] + step1[22], bd); + step2[23] = HIGHBD_WRAPLOW(step1[20] + step1[23], bd); + + step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[27], bd); + step2[25] = HIGHBD_WRAPLOW(step1[25] + step1[26], bd); + step2[26] = HIGHBD_WRAPLOW(step1[25] - step1[26], bd); + step2[27] = HIGHBD_WRAPLOW(step1[24] - step1[27], bd); + step2[28] = HIGHBD_WRAPLOW(-step1[28] + step1[31], bd); + step2[29] = HIGHBD_WRAPLOW(-step1[29] + step1[30], bd); + step2[30] = HIGHBD_WRAPLOW(step1[29] + step1[30], bd); + step2[31] = HIGHBD_WRAPLOW(step1[28] + step1[31], bd); + + // stage 5 + step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); + step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); + step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); + step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * cospi_16_64; + temp2 = (step2[5] + step2[6]) * cospi_16_64; + step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step1[7] = step2[7]; + + step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); + step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); + step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); + step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); + step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); + step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); + step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); + + step1[16] = step2[16]; + step1[17] = step2[17]; + temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; + temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; + step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; + temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; + step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; + temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; + step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; + temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step1[22] = step2[22]; + step1[23] = step2[23]; + step1[24] = step2[24]; + step1[25] = step2[25]; + step1[30] = step2[30]; + step1[31] = step2[31]; + + // stage 6 + step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); + step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); + step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); + step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); + step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); + step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); + step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); + step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); + step2[8] = step1[8]; + step2[9] = step1[9]; + temp1 = (-step1[10] + step1[13]) * cospi_16_64; + temp2 = (step1[10] + step1[13]) * cospi_16_64; + step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = (-step1[11] + step1[12]) * cospi_16_64; + temp2 = (step1[11] + step1[12]) * cospi_16_64; + step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step2[14] = step1[14]; + step2[15] = step1[15]; + + step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[23], bd); + step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[22], bd); + step2[18] = HIGHBD_WRAPLOW(step1[18] + step1[21], bd); + step2[19] = HIGHBD_WRAPLOW(step1[19] + step1[20], bd); + step2[20] = HIGHBD_WRAPLOW(step1[19] - step1[20], bd); + step2[21] = HIGHBD_WRAPLOW(step1[18] - step1[21], bd); + step2[22] = HIGHBD_WRAPLOW(step1[17] - step1[22], bd); + step2[23] = HIGHBD_WRAPLOW(step1[16] - step1[23], bd); + + step2[24] = HIGHBD_WRAPLOW(-step1[24] + step1[31], bd); + step2[25] = HIGHBD_WRAPLOW(-step1[25] + step1[30], bd); + step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[29], bd); + step2[27] = HIGHBD_WRAPLOW(-step1[27] + step1[28], bd); + step2[28] = HIGHBD_WRAPLOW(step1[27] + step1[28], bd); + step2[29] = HIGHBD_WRAPLOW(step1[26] + step1[29], bd); + step2[30] = HIGHBD_WRAPLOW(step1[25] + step1[30], bd); + step2[31] = HIGHBD_WRAPLOW(step1[24] + step1[31], bd); + + // stage 7 + step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); + step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); + step1[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); + step1[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); + step1[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); + step1[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); + step1[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); + step1[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); + step1[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); + step1[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); + step1[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); + step1[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); + step1[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); + step1[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); + step1[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); + step1[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); + + step1[16] = step2[16]; + step1[17] = step2[17]; + step1[18] = step2[18]; + step1[19] = step2[19]; + temp1 = (-step2[20] + step2[27]) * cospi_16_64; + temp2 = (step2[20] + step2[27]) * cospi_16_64; + step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = (-step2[21] + step2[26]) * cospi_16_64; + temp2 = (step2[21] + step2[26]) * cospi_16_64; + step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = (-step2[22] + step2[25]) * cospi_16_64; + temp2 = (step2[22] + step2[25]) * cospi_16_64; + step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + temp1 = (-step2[23] + step2[24]) * cospi_16_64; + temp2 = (step2[23] + step2[24]) * cospi_16_64; + step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); + step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); + step1[28] = step2[28]; + step1[29] = step2[29]; + step1[30] = step2[30]; + step1[31] = step2[31]; + + // final stage + output[0] = HIGHBD_WRAPLOW(step1[0] + step1[31], bd); + output[1] = HIGHBD_WRAPLOW(step1[1] + step1[30], bd); + output[2] = HIGHBD_WRAPLOW(step1[2] + step1[29], bd); + output[3] = HIGHBD_WRAPLOW(step1[3] + step1[28], bd); + output[4] = HIGHBD_WRAPLOW(step1[4] + step1[27], bd); + output[5] = HIGHBD_WRAPLOW(step1[5] + step1[26], bd); + output[6] = HIGHBD_WRAPLOW(step1[6] + step1[25], bd); + output[7] = HIGHBD_WRAPLOW(step1[7] + step1[24], bd); + output[8] = HIGHBD_WRAPLOW(step1[8] + step1[23], bd); + output[9] = HIGHBD_WRAPLOW(step1[9] + step1[22], bd); + output[10] = HIGHBD_WRAPLOW(step1[10] + step1[21], bd); + output[11] = HIGHBD_WRAPLOW(step1[11] + step1[20], bd); + output[12] = HIGHBD_WRAPLOW(step1[12] + step1[19], bd); + output[13] = HIGHBD_WRAPLOW(step1[13] + step1[18], bd); + output[14] = HIGHBD_WRAPLOW(step1[14] + step1[17], bd); + output[15] = HIGHBD_WRAPLOW(step1[15] + step1[16], bd); + output[16] = HIGHBD_WRAPLOW(step1[15] - step1[16], bd); + output[17] = HIGHBD_WRAPLOW(step1[14] - step1[17], bd); + output[18] = HIGHBD_WRAPLOW(step1[13] - step1[18], bd); + output[19] = HIGHBD_WRAPLOW(step1[12] - step1[19], bd); + output[20] = HIGHBD_WRAPLOW(step1[11] - step1[20], bd); + output[21] = HIGHBD_WRAPLOW(step1[10] - step1[21], bd); + output[22] = HIGHBD_WRAPLOW(step1[9] - step1[22], bd); + output[23] = HIGHBD_WRAPLOW(step1[8] - step1[23], bd); + output[24] = HIGHBD_WRAPLOW(step1[7] - step1[24], bd); + output[25] = HIGHBD_WRAPLOW(step1[6] - step1[25], bd); + output[26] = HIGHBD_WRAPLOW(step1[5] - step1[26], bd); + output[27] = HIGHBD_WRAPLOW(step1[4] - step1[27], bd); + output[28] = HIGHBD_WRAPLOW(step1[3] - step1[28], bd); + output[29] = HIGHBD_WRAPLOW(step1[2] - step1[29], bd); + output[30] = HIGHBD_WRAPLOW(step1[1] - step1[30], bd); + output[31] = HIGHBD_WRAPLOW(step1[0] - step1[31], bd); +} + +void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[32 * 32]; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[32], temp_out[32]; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + // Rows + for (i = 0; i < 32; ++i) { + tran_low_t zero_coeff[16]; + for (j = 0; j < 16; ++j) + zero_coeff[j] = input[2 * j] | input[2 * j + 1]; + for (j = 0; j < 8; ++j) + zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; + for (j = 0; j < 4; ++j) + zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; + for (j = 0; j < 2; ++j) + zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; + + if (zero_coeff[0] | zero_coeff[1]) + highbd_idct32_c(input, outptr, bd); + else + memset(outptr, 0, sizeof(tran_low_t) * 32); + input += 32; + outptr += 32; + } + + // Columns + for (i = 0; i < 32; ++i) { + for (j = 0; j < 32; ++j) + temp_in[j] = out[j * 32 + i]; + highbd_idct32_c(temp_in, temp_out, bd); + for (j = 0; j < 32; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + } + } +} + +void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[32 * 32] = {0}; + tran_low_t *outptr = out; + int i, j; + tran_low_t temp_in[32], temp_out[32]; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + // Rows + // Only upper-left 8x8 has non-zero coeff. + for (i = 0; i < 8; ++i) { + highbd_idct32_c(input, outptr, bd); + input += 32; + outptr += 32; + } + // Columns + for (i = 0; i < 32; ++i) { + for (j = 0; j < 32; ++j) + temp_in[j] = out[j * 32 + i]; + highbd_idct32_c(temp_in, temp_out, bd); + for (j = 0; j < 32; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + } + } +} + +void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + int i, j; + int a1; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + tran_low_t out = HIGHBD_WRAPLOW( + highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); + out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); + a1 = ROUND_POWER_OF_TWO(out, 6); + + for (j = 0; j < 32; ++j) { + for (i = 0; i < 32; ++i) + dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); + dest += stride; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_dsp/inv_txfm.h b/thirdparty/libvpx/vpx_dsp/inv_txfm.h new file mode 100644 index 00000000000..9cfe1be3a7d --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/inv_txfm.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_INV_TXFM_H_ +#define VPX_DSP_INV_TXFM_H_ + +#include + +#include "./vpx_config.h" +#include "vpx_dsp/txfm_common.h" +#include "vpx_ports/mem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static INLINE tran_high_t check_range(tran_high_t input) { +#if CONFIG_COEFFICIENT_RANGE_CHECKING + // For valid VP9 input streams, intermediate stage coefficients should always + // stay within the range of a signed 16 bit integer. Coefficients can go out + // of this range for invalid/corrupt VP9 streams. However, strictly checking + // this range for every intermediate coefficient can burdensome for a decoder, + // therefore the following assertion is only enabled when configured with + // --enable-coefficient-range-checking. + assert(INT16_MIN <= input); + assert(input <= INT16_MAX); +#endif // CONFIG_COEFFICIENT_RANGE_CHECKING + return input; +} + +static INLINE tran_high_t dct_const_round_shift(tran_high_t input) { + tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); + return (tran_high_t)rv; +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE tran_high_t highbd_check_range(tran_high_t input, + int bd) { +#if CONFIG_COEFFICIENT_RANGE_CHECKING + // For valid highbitdepth VP9 streams, intermediate stage coefficients will + // stay within the ranges: + // - 8 bit: signed 16 bit integer + // - 10 bit: signed 18 bit integer + // - 12 bit: signed 20 bit integer + const int32_t int_max = (1 << (7 + bd)) - 1; + const int32_t int_min = -int_max - 1; + assert(int_min <= input); + assert(input <= int_max); + (void) int_min; +#endif // CONFIG_COEFFICIENT_RANGE_CHECKING + (void) bd; + return input; +} + +static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) { + tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); + return (tran_high_t)rv; +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +#if CONFIG_EMULATE_HARDWARE +// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a +// non-normative method to handle overflows. A stream that causes +// overflows in the inverse transform is considered invalid in VP9, +// and a hardware implementer is free to choose any reasonable +// method to handle overflows. However to aid in hardware +// verification they can use a specific implementation of the +// WRAPLOW() macro below that is identical to their intended +// hardware implementation (and also use configure options to trigger +// the C-implementation of the transform). +// +// The particular WRAPLOW implementation below performs strict +// overflow wrapping to match common hardware implementations. +// bd of 8 uses trans_low with 16bits, need to remove 16bits +// bd of 10 uses trans_low with 18bits, need to remove 14bits +// bd of 12 uses trans_low with 20bits, need to remove 12bits +// bd of x uses trans_low with 8+x bits, need to remove 24-x bits + +#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16) +#if CONFIG_VP9_HIGHBITDEPTH +#define HIGHBD_WRAPLOW(x, bd) \ + ((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd)) +#endif // CONFIG_VP9_HIGHBITDEPTH + +#else // CONFIG_EMULATE_HARDWARE + +#define WRAPLOW(x) ((int32_t)check_range(x)) +#if CONFIG_VP9_HIGHBITDEPTH +#define HIGHBD_WRAPLOW(x, bd) \ + ((int32_t)highbd_check_range((x), bd)) +#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_EMULATE_HARDWARE + +void idct4_c(const tran_low_t *input, tran_low_t *output); +void idct8_c(const tran_low_t *input, tran_low_t *output); +void idct16_c(const tran_low_t *input, tran_low_t *output); +void idct32_c(const tran_low_t *input, tran_low_t *output); +void iadst4_c(const tran_low_t *input, tran_low_t *output); +void iadst8_c(const tran_low_t *input, tran_low_t *output); +void iadst16_c(const tran_low_t *input, tran_low_t *output); + +#if CONFIG_VP9_HIGHBITDEPTH +void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd); +void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd); +void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd); + +void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd); +void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd); +void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd); + +static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, + int bd) { + trans = HIGHBD_WRAPLOW(trans, bd); + return clip_pixel_highbd(dest + (int)trans, bd); +} +#endif + +static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) { + trans = WRAPLOW(trans); + return clip_pixel(dest + (int)trans); +} +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_DSP_INV_TXFM_H_ diff --git a/thirdparty/libvpx/vpx_dsp/loopfilter.c b/thirdparty/libvpx/vpx_dsp/loopfilter.c new file mode 100644 index 00000000000..645a1ab95ee --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/loopfilter.c @@ -0,0 +1,767 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_ports/mem.h" + +static INLINE int8_t signed_char_clamp(int t) { + return (int8_t)clamp(t, -128, 127); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE int16_t signed_char_clamp_high(int t, int bd) { + switch (bd) { + case 10: + return (int16_t)clamp(t, -128*4, 128*4-1); + case 12: + return (int16_t)clamp(t, -128*16, 128*16-1); + case 8: + default: + return (int16_t)clamp(t, -128, 128-1); + } +} +#endif + +// should we apply any filter at all: 11111111 yes, 00000000 no +static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, + uint8_t p3, uint8_t p2, + uint8_t p1, uint8_t p0, + uint8_t q0, uint8_t q1, + uint8_t q2, uint8_t q3) { + int8_t mask = 0; + mask |= (abs(p3 - p2) > limit) * -1; + mask |= (abs(p2 - p1) > limit) * -1; + mask |= (abs(p1 - p0) > limit) * -1; + mask |= (abs(q1 - q0) > limit) * -1; + mask |= (abs(q2 - q1) > limit) * -1; + mask |= (abs(q3 - q2) > limit) * -1; + mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; + return ~mask; +} + +static INLINE int8_t flat_mask4(uint8_t thresh, + uint8_t p3, uint8_t p2, + uint8_t p1, uint8_t p0, + uint8_t q0, uint8_t q1, + uint8_t q2, uint8_t q3) { + int8_t mask = 0; + mask |= (abs(p1 - p0) > thresh) * -1; + mask |= (abs(q1 - q0) > thresh) * -1; + mask |= (abs(p2 - p0) > thresh) * -1; + mask |= (abs(q2 - q0) > thresh) * -1; + mask |= (abs(p3 - p0) > thresh) * -1; + mask |= (abs(q3 - q0) > thresh) * -1; + return ~mask; +} + +static INLINE int8_t flat_mask5(uint8_t thresh, + uint8_t p4, uint8_t p3, + uint8_t p2, uint8_t p1, + uint8_t p0, uint8_t q0, + uint8_t q1, uint8_t q2, + uint8_t q3, uint8_t q4) { + int8_t mask = ~flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3); + mask |= (abs(p4 - p0) > thresh) * -1; + mask |= (abs(q4 - q0) > thresh) * -1; + return ~mask; +} + +// is there high edge variance internal edge: 11111111 yes, 00000000 no +static INLINE int8_t hev_mask(uint8_t thresh, uint8_t p1, uint8_t p0, + uint8_t q0, uint8_t q1) { + int8_t hev = 0; + hev |= (abs(p1 - p0) > thresh) * -1; + hev |= (abs(q1 - q0) > thresh) * -1; + return hev; +} + +static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1, + uint8_t *op0, uint8_t *oq0, uint8_t *oq1) { + int8_t filter1, filter2; + + const int8_t ps1 = (int8_t) *op1 ^ 0x80; + const int8_t ps0 = (int8_t) *op0 ^ 0x80; + const int8_t qs0 = (int8_t) *oq0 ^ 0x80; + const int8_t qs1 = (int8_t) *oq1 ^ 0x80; + const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); + + // add outer taps if we have high edge variance + int8_t filter = signed_char_clamp(ps1 - qs1) & hev; + + // inner taps + filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; + + // save bottom 3 bits so that we round one side +4 and the other +3 + // if it equals 4 we'll set to adjust by -1 to account for the fact + // we'd round 3 the other way + filter1 = signed_char_clamp(filter + 4) >> 3; + filter2 = signed_char_clamp(filter + 3) >> 3; + + *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80; + *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80; + + // outer tap adjustments + filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; + + *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80; + *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; +} + +void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, + const uint8_t *blimit, const uint8_t *limit, + const uint8_t *thresh) { + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8; ++i) { + const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; + const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p); + ++s; + } +} + +void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, + const uint8_t *limit0, const uint8_t *thresh0, + const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1) { + vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1); +} + +void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8; ++i) { + const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; + const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + filter4(mask, *thresh, s - 2, s - 1, s, s + 1); + s += pitch; + } +} + +void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, + const uint8_t *limit0, const uint8_t *thresh0, + const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1) { + vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0); + vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); +} + +static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, + uint8_t *op3, uint8_t *op2, + uint8_t *op1, uint8_t *op0, + uint8_t *oq0, uint8_t *oq1, + uint8_t *oq2, uint8_t *oq3) { + if (flat && mask) { + const uint8_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; + const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; + + // 7-tap filter [1, 1, 1, 2, 1, 1, 1] + *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3); + *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3); + *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3); + *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3); + *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3); + *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3); + } else { + filter4(mask, thresh, op1, op0, oq0, oq1); + } +} + +void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8; ++i) { + const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; + const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; + + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); + filter8(mask, *thresh, flat, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, + s, s + 1 * p, s + 2 * p, s + 3 * p); + ++s; + } +} + +void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, + const uint8_t *limit0, const uint8_t *thresh0, + const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1) { + vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1); +} + +void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { + int i; + + for (i = 0; i < 8; ++i) { + const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; + const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); + filter8(mask, *thresh, flat, s - 4, s - 3, s - 2, s - 1, + s, s + 1, s + 2, s + 3); + s += pitch; + } +} + +void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, + const uint8_t *limit0, const uint8_t *thresh0, + const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1) { + vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0); + vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); +} + +static INLINE void filter16(int8_t mask, uint8_t thresh, + uint8_t flat, uint8_t flat2, + uint8_t *op7, uint8_t *op6, + uint8_t *op5, uint8_t *op4, + uint8_t *op3, uint8_t *op2, + uint8_t *op1, uint8_t *op0, + uint8_t *oq0, uint8_t *oq1, + uint8_t *oq2, uint8_t *oq3, + uint8_t *oq4, uint8_t *oq5, + uint8_t *oq6, uint8_t *oq7) { + if (flat2 && flat && mask) { + const uint8_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4, + p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; + + const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3, + q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; + + // 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] + *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + + q0, 4); + *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 + + q0 + q1, 4); + *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + p1 + p0 + + q0 + q1 + q2, 4); + *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + p1 + p0 + + q0 + q1 + q2 + q3, 4); + *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + p0 + + q0 + q1 + q2 + q3 + q4, 4); + *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 + + q0 + q1 + q2 + q3 + q4 + q5, 4); + *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + + q0 + q1 + q2 + q3 + q4 + q5 + q6, 4); + *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + + q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); + *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + + q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); + *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + + q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); + *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + + q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4); + *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + + q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4); + *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + + q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4); + *oq6 = ROUND_POWER_OF_TWO(p0 + + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4); + } else { + filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3); + } +} + +static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int count) { + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8 * count; ++i) { + const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; + const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t flat2 = flat_mask5(1, + s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0, + q0, s[4 * p], s[5 * p], s[6 * p], s[7 * p]); + + filter16(mask, *thresh, flat, flat2, + s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p, + s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, + s, s + 1 * p, s + 2 * p, s + 3 * p, + s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p); + ++s; + } +} + +void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { + mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1); +} + +void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { + mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2); +} + +static void mb_lpf_vertical_edge_w(uint8_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count) { + int i; + + for (i = 0; i < count; ++i) { + const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; + const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; + const int8_t mask = filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); + const int8_t flat2 = flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0, + q0, s[4], s[5], s[6], s[7]); + + filter16(mask, *thresh, flat, flat2, + s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, + s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7); + s += p; + } +} + +void vpx_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { + mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8); +} + +void vpx_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { + mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16); +} + +#if CONFIG_VP9_HIGHBITDEPTH +// Should we apply any filter at all: 11111111 yes, 00000000 no ? +static INLINE int8_t highbd_filter_mask(uint8_t limit, uint8_t blimit, + uint16_t p3, uint16_t p2, + uint16_t p1, uint16_t p0, + uint16_t q0, uint16_t q1, + uint16_t q2, uint16_t q3, int bd) { + int8_t mask = 0; + int16_t limit16 = (uint16_t)limit << (bd - 8); + int16_t blimit16 = (uint16_t)blimit << (bd - 8); + mask |= (abs(p3 - p2) > limit16) * -1; + mask |= (abs(p2 - p1) > limit16) * -1; + mask |= (abs(p1 - p0) > limit16) * -1; + mask |= (abs(q1 - q0) > limit16) * -1; + mask |= (abs(q2 - q1) > limit16) * -1; + mask |= (abs(q3 - q2) > limit16) * -1; + mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1; + return ~mask; +} + +static INLINE int8_t highbd_flat_mask4(uint8_t thresh, + uint16_t p3, uint16_t p2, + uint16_t p1, uint16_t p0, + uint16_t q0, uint16_t q1, + uint16_t q2, uint16_t q3, int bd) { + int8_t mask = 0; + int16_t thresh16 = (uint16_t)thresh << (bd - 8); + mask |= (abs(p1 - p0) > thresh16) * -1; + mask |= (abs(q1 - q0) > thresh16) * -1; + mask |= (abs(p2 - p0) > thresh16) * -1; + mask |= (abs(q2 - q0) > thresh16) * -1; + mask |= (abs(p3 - p0) > thresh16) * -1; + mask |= (abs(q3 - q0) > thresh16) * -1; + return ~mask; +} + +static INLINE int8_t highbd_flat_mask5(uint8_t thresh, + uint16_t p4, uint16_t p3, + uint16_t p2, uint16_t p1, + uint16_t p0, uint16_t q0, + uint16_t q1, uint16_t q2, + uint16_t q3, uint16_t q4, int bd) { + int8_t mask = ~highbd_flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3, bd); + int16_t thresh16 = (uint16_t)thresh << (bd - 8); + mask |= (abs(p4 - p0) > thresh16) * -1; + mask |= (abs(q4 - q0) > thresh16) * -1; + return ~mask; +} + +// Is there high edge variance internal edge: +// 11111111_11111111 yes, 00000000_00000000 no ? +static INLINE int16_t highbd_hev_mask(uint8_t thresh, uint16_t p1, uint16_t p0, + uint16_t q0, uint16_t q1, int bd) { + int16_t hev = 0; + int16_t thresh16 = (uint16_t)thresh << (bd - 8); + hev |= (abs(p1 - p0) > thresh16) * -1; + hev |= (abs(q1 - q0) > thresh16) * -1; + return hev; +} + +static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1, + uint16_t *op0, uint16_t *oq0, uint16_t *oq1, + int bd) { + int16_t filter1, filter2; + // ^0x80 equivalent to subtracting 0x80 from the values to turn them + // into -128 to +127 instead of 0 to 255. + int shift = bd - 8; + const int16_t ps1 = (int16_t)*op1 - (0x80 << shift); + const int16_t ps0 = (int16_t)*op0 - (0x80 << shift); + const int16_t qs0 = (int16_t)*oq0 - (0x80 << shift); + const int16_t qs1 = (int16_t)*oq1 - (0x80 << shift); + const uint16_t hev = highbd_hev_mask(thresh, *op1, *op0, *oq0, *oq1, bd); + + // Add outer taps if we have high edge variance. + int16_t filter = signed_char_clamp_high(ps1 - qs1, bd) & hev; + + // Inner taps. + filter = signed_char_clamp_high(filter + 3 * (qs0 - ps0), bd) & mask; + + // Save bottom 3 bits so that we round one side +4 and the other +3 + // if it equals 4 we'll set to adjust by -1 to account for the fact + // we'd round 3 the other way. + filter1 = signed_char_clamp_high(filter + 4, bd) >> 3; + filter2 = signed_char_clamp_high(filter + 3, bd) >> 3; + + *oq0 = signed_char_clamp_high(qs0 - filter1, bd) + (0x80 << shift); + *op0 = signed_char_clamp_high(ps0 + filter2, bd) + (0x80 << shift); + + // Outer tap adjustments. + filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; + + *oq1 = signed_char_clamp_high(qs1 - filter, bd) + (0x80 << shift); + *op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift); +} + +void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, + const uint8_t *blimit, const uint8_t *limit, + const uint8_t *thresh, int bd) { + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8; ++i) { + const uint16_t p3 = s[-4 * p]; + const uint16_t p2 = s[-3 * p]; + const uint16_t p1 = s[-2 * p]; + const uint16_t p0 = s[-p]; + const uint16_t q0 = s[0 * p]; + const uint16_t q1 = s[1 * p]; + const uint16_t q2 = s[2 * p]; + const uint16_t q3 = s[3 * p]; + const int8_t mask = highbd_filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3, bd); + highbd_filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p, bd); + ++s; + } +} + +void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1, + int bd) { + vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd); +} + +void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int bd) { + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8; ++i) { + const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; + const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; + const int8_t mask = highbd_filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3, bd); + highbd_filter4(mask, *thresh, s - 2, s - 1, s, s + 1, bd); + s += pitch; + } +} + +void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1, + int bd) { + vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, + thresh1, bd); +} + +static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat, + uint16_t *op3, uint16_t *op2, + uint16_t *op1, uint16_t *op0, + uint16_t *oq0, uint16_t *oq1, + uint16_t *oq2, uint16_t *oq3, int bd) { + if (flat && mask) { + const uint16_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; + const uint16_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; + + // 7-tap filter [1, 1, 1, 2, 1, 1, 1] + *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3); + *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3); + *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3); + *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3); + *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3); + *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3); + } else { + highbd_filter4(mask, thresh, op1, op0, oq0, oq1, bd); + } +} + +void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int bd) { + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8; ++i) { + const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; + const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; + + const int8_t mask = highbd_filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3, bd); + const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, + bd); + highbd_filter8(mask, *thresh, flat, + s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, + s, s + 1 * p, s + 2 * p, s + 3 * p, bd); + ++s; + } +} + +void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1, + int bd) { + vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd); +} + +void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int bd) { + int i; + + for (i = 0; i < 8; ++i) { + const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; + const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; + const int8_t mask = highbd_filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3, bd); + const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, + bd); + highbd_filter8(mask, *thresh, flat, + s - 4, s - 3, s - 2, s - 1, + s, s + 1, s + 2, s + 3, + bd); + s += pitch; + } +} + +void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1, + int bd) { + vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, + thresh1, bd); +} + +static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, + uint8_t flat, uint8_t flat2, + uint16_t *op7, uint16_t *op6, + uint16_t *op5, uint16_t *op4, + uint16_t *op3, uint16_t *op2, + uint16_t *op1, uint16_t *op0, + uint16_t *oq0, uint16_t *oq1, + uint16_t *oq2, uint16_t *oq3, + uint16_t *oq4, uint16_t *oq5, + uint16_t *oq6, uint16_t *oq7, int bd) { + if (flat2 && flat && mask) { + const uint16_t p7 = *op7; + const uint16_t p6 = *op6; + const uint16_t p5 = *op5; + const uint16_t p4 = *op4; + const uint16_t p3 = *op3; + const uint16_t p2 = *op2; + const uint16_t p1 = *op1; + const uint16_t p0 = *op0; + const uint16_t q0 = *oq0; + const uint16_t q1 = *oq1; + const uint16_t q2 = *oq2; + const uint16_t q3 = *oq3; + const uint16_t q4 = *oq4; + const uint16_t q5 = *oq5; + const uint16_t q6 = *oq6; + const uint16_t q7 = *oq7; + + // 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] + *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + + q0, 4); + *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 + + q0 + q1, 4); + *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + p1 + p0 + + q0 + q1 + q2, 4); + *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + p1 + p0 + + q0 + q1 + q2 + q3, 4); + *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + p0 + + q0 + q1 + q2 + q3 + q4, 4); + *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 + + q0 + q1 + q2 + q3 + q4 + q5, 4); + *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + + q0 + q1 + q2 + q3 + q4 + q5 + q6, 4); + *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + + q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); + *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + + q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); + *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + + q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); + *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + + q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4); + *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + + q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4); + *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + + q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4); + *oq6 = ROUND_POWER_OF_TWO(p0 + + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4); + } else { + highbd_filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3, + bd); + } +} + +static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count, int bd) { + int i; + + // loop filter designed to work using chars so that we can make maximum use + // of 8 bit simd instructions. + for (i = 0; i < 8 * count; ++i) { + const uint16_t p3 = s[-4 * p]; + const uint16_t p2 = s[-3 * p]; + const uint16_t p1 = s[-2 * p]; + const uint16_t p0 = s[-p]; + const uint16_t q0 = s[0 * p]; + const uint16_t q1 = s[1 * p]; + const uint16_t q2 = s[2 * p]; + const uint16_t q3 = s[3 * p]; + const int8_t mask = highbd_filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3, bd); + const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, + bd); + const int8_t flat2 = highbd_flat_mask5( + 1, s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0, + q0, s[4 * p], s[5 * p], s[6 * p], s[7 * p], bd); + + highbd_filter16(mask, *thresh, flat, flat2, + s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p, + s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, + s, s + 1 * p, s + 2 * p, s + 3 * p, + s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p, + bd); + ++s; + } +} + +void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { + highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd); +} + +void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { + highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd); +} + +static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count, int bd) { + int i; + + for (i = 0; i < count; ++i) { + const uint16_t p3 = s[-4]; + const uint16_t p2 = s[-3]; + const uint16_t p1 = s[-2]; + const uint16_t p0 = s[-1]; + const uint16_t q0 = s[0]; + const uint16_t q1 = s[1]; + const uint16_t q2 = s[2]; + const uint16_t q3 = s[3]; + const int8_t mask = highbd_filter_mask(*limit, *blimit, + p3, p2, p1, p0, q0, q1, q2, q3, bd); + const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, + bd); + const int8_t flat2 = highbd_flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0, + q0, s[4], s[5], s[6], s[7], bd); + + highbd_filter16(mask, *thresh, flat, flat2, + s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, + s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7, + bd); + s += p; + } +} + +void vpx_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int bd) { + highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd); +} + +void vpx_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int bd) { + highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16, bd); +} +#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_dsp/prob.c b/thirdparty/libvpx/vpx_dsp/prob.c new file mode 100644 index 00000000000..639d24dd2f0 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/prob.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./prob.h" + +const uint8_t vpx_norm[256] = { + 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static unsigned int tree_merge_probs_impl(unsigned int i, + const vpx_tree_index *tree, + const vpx_prob *pre_probs, + const unsigned int *counts, + vpx_prob *probs) { + const int l = tree[i]; + const unsigned int left_count = (l <= 0) + ? counts[-l] + : tree_merge_probs_impl(l, tree, pre_probs, counts, probs); + const int r = tree[i + 1]; + const unsigned int right_count = (r <= 0) + ? counts[-r] + : tree_merge_probs_impl(r, tree, pre_probs, counts, probs); + const unsigned int ct[2] = { left_count, right_count }; + probs[i >> 1] = mode_mv_merge_probs(pre_probs[i >> 1], ct); + return left_count + right_count; +} + +void vpx_tree_merge_probs(const vpx_tree_index *tree, const vpx_prob *pre_probs, + const unsigned int *counts, vpx_prob *probs) { + tree_merge_probs_impl(0, tree, pre_probs, counts, probs); +} diff --git a/thirdparty/libvpx/vpx_dsp/prob.h b/thirdparty/libvpx/vpx_dsp/prob.h new file mode 100644 index 00000000000..c3cb103ffb5 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/prob.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_PROB_H_ +#define VPX_DSP_PROB_H_ + +#include "./vpx_config.h" +#include "./vpx_dsp_common.h" + +#include "vpx_ports/mem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef uint8_t vpx_prob; + +#define MAX_PROB 255 + +#define vpx_prob_half ((vpx_prob) 128) + +typedef int8_t vpx_tree_index; + +#define TREE_SIZE(leaf_count) (2 * (leaf_count) - 2) + +#define vpx_complement(x) (255 - x) + +#define MODE_MV_COUNT_SAT 20 + +/* We build coding trees compactly in arrays. + Each node of the tree is a pair of vpx_tree_indices. + Array index often references a corresponding probability table. + Index <= 0 means done encoding/decoding and value = -Index, + Index > 0 means need another bit, specification at index. + Nonnegative indices are always even; processing begins at node 0. */ + +typedef const vpx_tree_index vpx_tree[]; + +static INLINE vpx_prob clip_prob(int p) { + return (p > 255) ? 255 : (p < 1) ? 1 : p; +} + +static INLINE vpx_prob get_prob(int num, int den) { + return (den == 0) ? 128u : clip_prob(((int64_t)num * 256 + (den >> 1)) / den); +} + +static INLINE vpx_prob get_binary_prob(int n0, int n1) { + return get_prob(n0, n0 + n1); +} + +/* This function assumes prob1 and prob2 are already within [1,255] range. */ +static INLINE vpx_prob weighted_prob(int prob1, int prob2, int factor) { + return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8); +} + +static INLINE vpx_prob merge_probs(vpx_prob pre_prob, + const unsigned int ct[2], + unsigned int count_sat, + unsigned int max_update_factor) { + const vpx_prob prob = get_binary_prob(ct[0], ct[1]); + const unsigned int count = VPXMIN(ct[0] + ct[1], count_sat); + const unsigned int factor = max_update_factor * count / count_sat; + return weighted_prob(pre_prob, prob, factor); +} + +// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT; +static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = { + 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, + 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 +}; + +static INLINE vpx_prob mode_mv_merge_probs(vpx_prob pre_prob, + const unsigned int ct[2]) { + const unsigned int den = ct[0] + ct[1]; + if (den == 0) { + return pre_prob; + } else { + const unsigned int count = VPXMIN(den, MODE_MV_COUNT_SAT); + const unsigned int factor = count_to_update_factor[count]; + const vpx_prob prob = + clip_prob(((int64_t)(ct[0]) * 256 + (den >> 1)) / den); + return weighted_prob(pre_prob, prob, factor); + } +} + +void vpx_tree_merge_probs(const vpx_tree_index *tree, const vpx_prob *pre_probs, + const unsigned int *counts, vpx_prob *probs); + + +DECLARE_ALIGNED(16, extern const uint8_t, vpx_norm[256]); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_DSP_PROB_H_ diff --git a/thirdparty/libvpx/vpx_dsp/txfm_common.h b/thirdparty/libvpx/vpx_dsp/txfm_common.h new file mode 100644 index 00000000000..442e6a57b5b --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/txfm_common.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_TXFM_COMMON_H_ +#define VPX_DSP_TXFM_COMMON_H_ + +#include "vpx_dsp/vpx_dsp_common.h" + +// Constants and Macros used by all idct/dct functions +#define DCT_CONST_BITS 14 +#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1)) + +#define UNIT_QUANT_SHIFT 2 +#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT) + +// Constants: +// for (int i = 1; i< 32; ++i) +// printf("static const int cospi_%d_64 = %.0f;\n", i, +// round(16384 * cos(i*M_PI/64))); +// Note: sin(k*Pi/64) = cos((32-k)*Pi/64) +static const tran_high_t cospi_1_64 = 16364; +static const tran_high_t cospi_2_64 = 16305; +static const tran_high_t cospi_3_64 = 16207; +static const tran_high_t cospi_4_64 = 16069; +static const tran_high_t cospi_5_64 = 15893; +static const tran_high_t cospi_6_64 = 15679; +static const tran_high_t cospi_7_64 = 15426; +static const tran_high_t cospi_8_64 = 15137; +static const tran_high_t cospi_9_64 = 14811; +static const tran_high_t cospi_10_64 = 14449; +static const tran_high_t cospi_11_64 = 14053; +static const tran_high_t cospi_12_64 = 13623; +static const tran_high_t cospi_13_64 = 13160; +static const tran_high_t cospi_14_64 = 12665; +static const tran_high_t cospi_15_64 = 12140; +static const tran_high_t cospi_16_64 = 11585; +static const tran_high_t cospi_17_64 = 11003; +static const tran_high_t cospi_18_64 = 10394; +static const tran_high_t cospi_19_64 = 9760; +static const tran_high_t cospi_20_64 = 9102; +static const tran_high_t cospi_21_64 = 8423; +static const tran_high_t cospi_22_64 = 7723; +static const tran_high_t cospi_23_64 = 7005; +static const tran_high_t cospi_24_64 = 6270; +static const tran_high_t cospi_25_64 = 5520; +static const tran_high_t cospi_26_64 = 4756; +static const tran_high_t cospi_27_64 = 3981; +static const tran_high_t cospi_28_64 = 3196; +static const tran_high_t cospi_29_64 = 2404; +static const tran_high_t cospi_30_64 = 1606; +static const tran_high_t cospi_31_64 = 804; + +// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3 +static const tran_high_t sinpi_1_9 = 5283; +static const tran_high_t sinpi_2_9 = 9929; +static const tran_high_t sinpi_3_9 = 13377; +static const tran_high_t sinpi_4_9 = 15212; + +#endif // VPX_DSP_TXFM_COMMON_H_ diff --git a/thirdparty/libvpx/vpx_dsp/vpx_convolve.c b/thirdparty/libvpx/vpx_dsp/vpx_convolve.c new file mode 100644 index 00000000000..2d1c927cbea --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/vpx_convolve.c @@ -0,0 +1,612 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_convolve.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_dsp/vpx_filter.h" +#include "vpx_ports/mem.h" + +static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *x_filters, + int x0_q4, int x_step_q4, int w, int h) { + int x, y; + src -= SUBPEL_TAPS / 2 - 1; + for (y = 0; y < h; ++y) { + int x_q4 = x0_q4; + for (x = 0; x < w; ++x) { + const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; + const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) + sum += src_x[k] * x_filter[k]; + dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); + x_q4 += x_step_q4; + } + src += src_stride; + dst += dst_stride; + } +} + +static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *x_filters, + int x0_q4, int x_step_q4, int w, int h) { + int x, y; + src -= SUBPEL_TAPS / 2 - 1; + for (y = 0; y < h; ++y) { + int x_q4 = x0_q4; + for (x = 0; x < w; ++x) { + const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; + const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) + sum += src_x[k] * x_filter[k]; + dst[x] = ROUND_POWER_OF_TWO(dst[x] + + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); + x_q4 += x_step_q4; + } + src += src_stride; + dst += dst_stride; + } +} + +static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *y_filters, + int y0_q4, int y_step_q4, int w, int h) { + int x, y; + src -= src_stride * (SUBPEL_TAPS / 2 - 1); + + for (x = 0; x < w; ++x) { + int y_q4 = y0_q4; + for (y = 0; y < h; ++y) { + const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; + const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) + sum += src_y[k * src_stride] * y_filter[k]; + dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); + y_q4 += y_step_q4; + } + ++src; + ++dst; + } +} + +static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *y_filters, + int y0_q4, int y_step_q4, int w, int h) { + int x, y; + src -= src_stride * (SUBPEL_TAPS / 2 - 1); + + for (x = 0; x < w; ++x) { + int y_q4 = y0_q4; + for (y = 0; y < h; ++y) { + const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; + const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) + sum += src_y[k * src_stride] * y_filter[k]; + dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); + y_q4 += y_step_q4; + } + ++src; + ++dst; + } +} + +static void convolve(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *const x_filters, + int x0_q4, int x_step_q4, + const InterpKernel *const y_filters, + int y0_q4, int y_step_q4, + int w, int h) { + // Note: Fixed size intermediate buffer, temp, places limits on parameters. + // 2d filtering proceeds in 2 steps: + // (1) Interpolate horizontally into an intermediate buffer, temp. + // (2) Interpolate temp vertically to derive the sub-pixel result. + // Deriving the maximum number of rows in the temp buffer (135): + // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). + // --Largest block size is 64x64 pixels. + // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the + // original frame (in 1/16th pixel units). + // --Must round-up because block may be located at sub-pixel position. + // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. + // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. + uint8_t temp[135 * 64]; + int intermediate_height = + (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; + + assert(w <= 64); + assert(h <= 64); + assert(y_step_q4 <= 32); + assert(x_step_q4 <= 32); + + convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, + x_filters, x0_q4, x_step_q4, w, intermediate_height); + convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, + y_filters, y0_q4, y_step_q4, w, h); +} + +static const InterpKernel *get_filter_base(const int16_t *filter) { + // NOTE: This assumes that the filter table is 256-byte aligned. + // TODO(agrange) Modify to make independent of table alignment. + return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); +} + +static int get_filter_offset(const int16_t *f, const InterpKernel *base) { + return (int)((const InterpKernel *)(intptr_t)f - base); +} + +void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + const InterpKernel *const filters_x = get_filter_base(filter_x); + const int x0_q4 = get_filter_offset(filter_x, filters_x); + + (void)filter_y; + (void)y_step_q4; + + convolve_horiz(src, src_stride, dst, dst_stride, filters_x, + x0_q4, x_step_q4, w, h); +} + +void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + const InterpKernel *const filters_x = get_filter_base(filter_x); + const int x0_q4 = get_filter_offset(filter_x, filters_x); + + (void)filter_y; + (void)y_step_q4; + + convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, + x0_q4, x_step_q4, w, h); +} + +void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + const InterpKernel *const filters_y = get_filter_base(filter_y); + const int y0_q4 = get_filter_offset(filter_y, filters_y); + + (void)filter_x; + (void)x_step_q4; + + convolve_vert(src, src_stride, dst, dst_stride, filters_y, + y0_q4, y_step_q4, w, h); +} + +void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + const InterpKernel *const filters_y = get_filter_base(filter_y); + const int y0_q4 = get_filter_offset(filter_y, filters_y); + + (void)filter_x; + (void)x_step_q4; + + convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, + y0_q4, y_step_q4, w, h); +} + +void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + const InterpKernel *const filters_x = get_filter_base(filter_x); + const int x0_q4 = get_filter_offset(filter_x, filters_x); + + const InterpKernel *const filters_y = get_filter_base(filter_y); + const int y0_q4 = get_filter_offset(filter_y, filters_y); + + convolve(src, src_stride, dst, dst_stride, + filters_x, x0_q4, x_step_q4, + filters_y, y0_q4, y_step_q4, w, h); +} + +void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + /* Fixed size intermediate buffer places limits on parameters. */ + DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]); + assert(w <= 64); + assert(h <= 64); + + vpx_convolve8_c(src, src_stride, temp, 64, + filter_x, x_step_q4, filter_y, y_step_q4, w, h); + vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h); +} + +void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int r; + + (void)filter_x; (void)filter_x_stride; + (void)filter_y; (void)filter_y_stride; + + for (r = h; r > 0; --r) { + memcpy(dst, src, w); + src += src_stride; + dst += dst_stride; + } +} + +void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h) { + int x, y; + + (void)filter_x; (void)filter_x_stride; + (void)filter_y; (void)filter_y_stride; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) + dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); + + src += src_stride; + dst += dst_stride; + } +} + +void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, + filter_y, y_step_q4, w, h); +} + +void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, + filter_y, y_step_q4, w, h); +} + +void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, + filter_y, y_step_q4, w, h); +} + +void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, + x_step_q4, filter_y, y_step_q4, w, h); +} + +void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, + x_step_q4, filter_y, y_step_q4, w, h); +} + +void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, + filter_y, y_step_q4, w, h); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride, + uint8_t *dst8, ptrdiff_t dst_stride, + const InterpKernel *x_filters, + int x0_q4, int x_step_q4, + int w, int h, int bd) { + int x, y; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + src -= SUBPEL_TAPS / 2 - 1; + for (y = 0; y < h; ++y) { + int x_q4 = x0_q4; + for (x = 0; x < w; ++x) { + const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; + const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) + sum += src_x[k] * x_filter[k]; + dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); + x_q4 += x_step_q4; + } + src += src_stride; + dst += dst_stride; + } +} + +static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride, + uint8_t *dst8, ptrdiff_t dst_stride, + const InterpKernel *x_filters, + int x0_q4, int x_step_q4, + int w, int h, int bd) { + int x, y; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + src -= SUBPEL_TAPS / 2 - 1; + for (y = 0; y < h; ++y) { + int x_q4 = x0_q4; + for (x = 0; x < w; ++x) { + const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; + const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) + sum += src_x[k] * x_filter[k]; + dst[x] = ROUND_POWER_OF_TWO(dst[x] + + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); + x_q4 += x_step_q4; + } + src += src_stride; + dst += dst_stride; + } +} + +static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride, + uint8_t *dst8, ptrdiff_t dst_stride, + const InterpKernel *y_filters, + int y0_q4, int y_step_q4, int w, int h, + int bd) { + int x, y; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + src -= src_stride * (SUBPEL_TAPS / 2 - 1); + for (x = 0; x < w; ++x) { + int y_q4 = y0_q4; + for (y = 0; y < h; ++y) { + const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; + const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) + sum += src_y[k * src_stride] * y_filter[k]; + dst[y * dst_stride] = clip_pixel_highbd( + ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); + y_q4 += y_step_q4; + } + ++src; + ++dst; + } +} + +static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride, + uint8_t *dst8, ptrdiff_t dst_stride, + const InterpKernel *y_filters, + int y0_q4, int y_step_q4, int w, int h, + int bd) { + int x, y; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + src -= src_stride * (SUBPEL_TAPS / 2 - 1); + for (x = 0; x < w; ++x) { + int y_q4 = y0_q4; + for (y = 0; y < h; ++y) { + const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; + const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) + sum += src_y[k * src_stride] * y_filter[k]; + dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); + y_q4 += y_step_q4; + } + ++src; + ++dst; + } +} + +static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *const x_filters, + int x0_q4, int x_step_q4, + const InterpKernel *const y_filters, + int y0_q4, int y_step_q4, + int w, int h, int bd) { + // Note: Fixed size intermediate buffer, temp, places limits on parameters. + // 2d filtering proceeds in 2 steps: + // (1) Interpolate horizontally into an intermediate buffer, temp. + // (2) Interpolate temp vertically to derive the sub-pixel result. + // Deriving the maximum number of rows in the temp buffer (135): + // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). + // --Largest block size is 64x64 pixels. + // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the + // original frame (in 1/16th pixel units). + // --Must round-up because block may be located at sub-pixel position. + // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. + // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. + uint16_t temp[64 * 135]; + int intermediate_height = + (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; + + assert(w <= 64); + assert(h <= 64); + assert(y_step_q4 <= 32); + assert(x_step_q4 <= 32); + + highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), + src_stride, CONVERT_TO_BYTEPTR(temp), 64, + x_filters, x0_q4, x_step_q4, w, + intermediate_height, bd); + highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1), + 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, + w, h, bd); +} + + +void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int bd) { + const InterpKernel *const filters_x = get_filter_base(filter_x); + const int x0_q4 = get_filter_offset(filter_x, filters_x); + (void)filter_y; + (void)y_step_q4; + + highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, + x0_q4, x_step_q4, w, h, bd); +} + +void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int bd) { + const InterpKernel *const filters_x = get_filter_base(filter_x); + const int x0_q4 = get_filter_offset(filter_x, filters_x); + (void)filter_y; + (void)y_step_q4; + + highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, + x0_q4, x_step_q4, w, h, bd); +} + +void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int bd) { + const InterpKernel *const filters_y = get_filter_base(filter_y); + const int y0_q4 = get_filter_offset(filter_y, filters_y); + (void)filter_x; + (void)x_step_q4; + + highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, + y0_q4, y_step_q4, w, h, bd); +} + +void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int bd) { + const InterpKernel *const filters_y = get_filter_base(filter_y); + const int y0_q4 = get_filter_offset(filter_y, filters_y); + (void)filter_x; + (void)x_step_q4; + + highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, + y0_q4, y_step_q4, w, h, bd); +} + +void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int bd) { + const InterpKernel *const filters_x = get_filter_base(filter_x); + const int x0_q4 = get_filter_offset(filter_x, filters_x); + + const InterpKernel *const filters_y = get_filter_base(filter_y); + const int y0_q4 = get_filter_offset(filter_y, filters_y); + + highbd_convolve(src, src_stride, dst, dst_stride, + filters_x, x0_q4, x_step_q4, + filters_y, y0_q4, y_step_q4, w, h, bd); +} + +void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int bd) { + // Fixed size intermediate buffer places limits on parameters. + DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]); + assert(w <= 64); + assert(h <= 64); + + vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64, + filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); + vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, + NULL, 0, NULL, 0, w, h, bd); +} + +void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride, + uint8_t *dst8, ptrdiff_t dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h, int bd) { + int r; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + (void)filter_x; + (void)filter_y; + (void)filter_x_stride; + (void)filter_y_stride; + (void)bd; + + for (r = h; r > 0; --r) { + memcpy(dst, src, w * sizeof(uint16_t)); + src += src_stride; + dst += dst_stride; + } +} + +void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride, + uint8_t *dst8, ptrdiff_t dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h, int bd) { + int x, y; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + (void)filter_x; + (void)filter_y; + (void)filter_x_stride; + (void)filter_y_stride; + (void)bd; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); + } + src += src_stride; + dst += dst_stride; + } +} +#endif diff --git a/thirdparty/libvpx/vpx_dsp/vpx_convolve.h b/thirdparty/libvpx/vpx_dsp/vpx_convolve.h new file mode 100644 index 00000000000..9ed3f1750f2 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/vpx_convolve.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VPX_DSP_VPX_CONVOLVE_H_ +#define VPX_DSP_VPX_CONVOLVE_H_ + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); + +#if CONFIG_VP9_HIGHBITDEPTH +typedef void (*highbd_convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int bd); +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_DSP_VPX_CONVOLVE_H_ diff --git a/thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h b/thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h new file mode 100644 index 00000000000..a1d0a51ef56 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_VPX_DSP_COMMON_H_ +#define VPX_DSP_VPX_DSP_COMMON_H_ + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define VPXMIN(x, y) (((x) < (y)) ? (x) : (y)) +#define VPXMAX(x, y) (((x) > (y)) ? (x) : (y)) + +#if CONFIG_VP9_HIGHBITDEPTH +// Note: +// tran_low_t is the datatype used for final transform coefficients. +// tran_high_t is the datatype used for intermediate transform stages. +typedef int64_t tran_high_t; +typedef int32_t tran_low_t; +#else +// Note: +// tran_low_t is the datatype used for final transform coefficients. +// tran_high_t is the datatype used for intermediate transform stages. +typedef int32_t tran_high_t; +typedef int16_t tran_low_t; +#endif // CONFIG_VP9_HIGHBITDEPTH + +static INLINE uint8_t clip_pixel(int val) { + return (val > 255) ? 255 : (val < 0) ? 0 : val; +} + +static INLINE int clamp(int value, int low, int high) { + return value < low ? low : (value > high ? high : value); +} + +static INLINE double fclamp(double value, double low, double high) { + return value < low ? low : (value > high ? high : value); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE uint16_t clip_pixel_highbd(int val, int bd) { + switch (bd) { + case 8: + default: + return (uint16_t)clamp(val, 0, 255); + case 10: + return (uint16_t)clamp(val, 0, 1023); + case 12: + return (uint16_t)clamp(val, 0, 4095); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_DSP_VPX_DSP_COMMON_H_ diff --git a/thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c b/thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c new file mode 100644 index 00000000000..5fe27b614bd --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#define RTCD_C +#include "./vpx_dsp_rtcd.h" +#include "vpx_ports/vpx_once.h" + +void vpx_dsp_rtcd() { + once(setup_rtcd_internal); +} diff --git a/thirdparty/libvpx/vpx_dsp/vpx_filter.h b/thirdparty/libvpx/vpx_dsp/vpx_filter.h new file mode 100644 index 00000000000..2617febf3b3 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/vpx_filter.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_VPX_FILTER_H_ +#define VPX_DSP_VPX_FILTER_H_ + +#include "vpx/vpx_integer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +#define FILTER_BITS 7 + +#define SUBPEL_BITS 4 +#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1) +#define SUBPEL_SHIFTS (1 << SUBPEL_BITS) +#define SUBPEL_TAPS 8 + +typedef int16_t InterpKernel[SUBPEL_TAPS]; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_DSP_VPX_FILTER_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/convolve.h b/thirdparty/libvpx/vpx_dsp/x86/convolve.h new file mode 100644 index 00000000000..7e43eb7c724 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/convolve.h @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VPX_DSP_X86_CONVOLVE_H_ +#define VPX_DSP_X86_CONVOLVE_H_ + +#include + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +typedef void filter8_1dfunction ( + const uint8_t *src_ptr, + ptrdiff_t src_pitch, + uint8_t *output_ptr, + ptrdiff_t out_pitch, + uint32_t output_height, + const int16_t *filter +); + +#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ + void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h) { \ + assert(filter[3] != 128); \ + assert(step_q4 == 16); \ + if (filter[0] | filter[1] | filter[2]) { \ + while (w >= 16) { \ + vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + if (w == 8) { \ + vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + } else if (w == 4) { \ + vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + } \ + } else { \ + while (w >= 16) { \ + vpx_filter_block1d16_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + if (w == 8) { \ + vpx_filter_block1d8_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + } else if (w == 4) { \ + vpx_filter_block1d4_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter); \ + } \ + } \ +} + +#define FUN_CONV_2D(avg, opt) \ +void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h) { \ + assert(filter_x[3] != 128); \ + assert(filter_y[3] != 128); \ + assert(w <= 64); \ + assert(h <= 64); \ + assert(x_step_q4 == 16); \ + assert(y_step_q4 == 16); \ + if (filter_x[0] | filter_x[1] | filter_x[2]) { \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ + vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h + 7); \ + vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, \ + y_step_q4, w, h); \ + } else { \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ + vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h + 1); \ + vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, \ + y_step_q4, w, h); \ + } \ +} + +#if CONFIG_VP9_HIGHBITDEPTH + +typedef void highbd_filter8_1dfunction ( + const uint16_t *src_ptr, + const ptrdiff_t src_pitch, + uint16_t *output_ptr, + ptrdiff_t out_pitch, + unsigned int output_height, + const int16_t *filter, + int bd +); + +#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ + void vpx_highbd_convolve8_##name##_##opt(const uint8_t *src8, \ + ptrdiff_t src_stride, \ + uint8_t *dst8, \ + ptrdiff_t dst_stride, \ + const int16_t *filter_x, \ + int x_step_q4, \ + const int16_t *filter_y, \ + int y_step_q4, \ + int w, int h, int bd) { \ + if (step_q4 == 16 && filter[3] != 128) { \ + uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ + if (filter[0] | filter[1] | filter[2]) { \ + while (w >= 16) { \ + vpx_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vpx_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vpx_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } else { \ + while (w >= 16) { \ + vpx_highbd_filter_block1d16_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vpx_highbd_filter_block1d8_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vpx_highbd_filter_block1d4_##dir##2_##avg##opt(src, \ + src_stride, \ + dst, \ + dst_stride, \ + h, \ + filter, \ + bd); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + } \ + } \ + if (w) { \ + vpx_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, \ + w, h, bd); \ + } \ +} + +#define HIGH_FUN_CONV_2D(avg, opt) \ +void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ + uint8_t *dst, ptrdiff_t dst_stride, \ + const int16_t *filter_x, int x_step_q4, \ + const int16_t *filter_y, int y_step_q4, \ + int w, int h, int bd) { \ + assert(w <= 64); \ + assert(h <= 64); \ + if (x_step_q4 == 16 && y_step_q4 == 16) { \ + if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ + vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ + CONVERT_TO_BYTEPTR(fdata2), 64, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h + 7, bd); \ + vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ + 64, dst, dst_stride, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h, bd); \ + } else { \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ + vpx_highbd_convolve8_horiz_##opt(src, src_stride, \ + CONVERT_TO_BYTEPTR(fdata2), 64, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h + 1, bd); \ + vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ + dst, dst_stride, \ + filter_x, x_step_q4, \ + filter_y, y_step_q4, \ + w, h, bd); \ + } \ + } else { \ + vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ + filter_x, x_step_q4, filter_y, y_step_q4, w, \ + h, bd); \ + } \ +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +#endif // VPX_DSP_X86_CONVOLVE_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm new file mode 100644 index 00000000000..cd6a6ae982c --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm @@ -0,0 +1,860 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "third_party/x86inc/x86inc.asm" + +SECTION_RODATA +pb_1: times 16 db 1 +pw_4: times 8 dw 4 +pw_8: times 8 dw 8 +pw_16: times 8 dw 16 +pw_32: times 8 dw 32 +dc_128: times 16 db 128 +pw2_4: times 8 dw 2 +pw2_8: times 8 dw 4 +pw2_16: times 8 dw 8 +pw2_32: times 8 dw 16 + +SECTION .text + +; ------------------------------------------ +; input: x, y, z, result +; +; trick from pascal +; (x+2y+z+2)>>2 can be calculated as: +; result = avg(x,z) +; result -= xor(x,z) & 1 +; result = avg(result,y) +; ------------------------------------------ +%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4 + pavgb %4, %1, %3 + pxor %3, %1 + pand %3, [GLOBAL(pb_1)] + psubb %4, %3 + pavgb %4, %2 +%endmacro + +INIT_XMM sse2 +cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset + GET_GOT goffsetq + + movq m0, [aboveq] + DEFINE_ARGS dst, stride, temp + psrldq m1, m0, 1 + psrldq m2, m0, 2 + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 + + ; store 4 lines + movd [dstq ], m3 + psrlq m3, 8 + movd [dstq+strideq ], m3 + lea dstq, [dstq+strideq*2] + psrlq m3, 8 + movd [dstq ], m3 + psrlq m3, 8 + movd [dstq+strideq ], m3 + psrlq m0, 56 + movd tempq, m0 + mov [dstq+strideq+3], tempb + + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset + GET_GOT goffsetq + + movu m1, [aboveq] + pslldq m0, m1, 1 + psrldq m2, m1, 1 + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 + punpckhbw m0, m0 ; 7 7 + punpcklwd m0, m0 ; 7 7 7 7 + punpckldq m0, m0 ; 7 7 7 7 7 7 7 7 + punpcklqdq m3, m0 ; -1 0 1 2 3 4 5 6 7 7 7 7 7 7 7 7 + + ; store 4 lines + psrldq m3, 1 + movq [dstq ], m3 + psrldq m3, 1 + movq [dstq+strideq ], m3 + psrldq m3, 1 + movq [dstq+strideq*2], m3 + psrldq m3, 1 + movq [dstq+stride3q ], m3 + lea dstq, [dstq+strideq*4] + + ; store next 4 lines + psrldq m3, 1 + movq [dstq ], m3 + psrldq m3, 1 + movq [dstq+strideq ], m3 + psrldq m3, 1 + movq [dstq+strideq*2], m3 + psrldq m3, 1 + movq [dstq+stride3q ], m3 + + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal d207_predictor_4x4, 4, 4, 5, dst, stride, unused, left, goffset + GET_GOT goffsetq + + movd m0, [leftq] ; abcd [byte] + punpcklbw m4, m0, m0 ; aabb ccdd + punpcklwd m4, m4 ; aaaa bbbb cccc dddd + psrldq m4, 12 ; dddd + punpckldq m0, m4 ; abcd dddd + psrldq m1, m0, 1 ; bcdd + psrldq m2, m0, 2 ; cddd + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; a2bc b2cd c3d d + pavgb m1, m0 ; ab, bc, cd, d [byte] + + punpcklbw m1, m3 ; ab, a2bc, bc, b2cd, cd, c3d, d, d + movd [dstq ], m1 + psrlq m1, 16 ; bc, b2cd, cd, c3d, d, d + movd [dstq+strideq], m1 + + lea dstq, [dstq+strideq*2] + psrlq m1, 16 ; cd, c3d, d, d + movd [dstq ], m1 + movd [dstq+strideq], m4 ; d, d, d, d + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal dc_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + movd m2, [leftq] + movd m0, [aboveq] + pxor m1, m1 + punpckldq m0, m2 + psadbw m0, m1 + paddw m0, [GLOBAL(pw_4)] + psraw m0, 3 + pshuflw m0, m0, 0x0 + packuswb m0, m0 + movd [dstq ], m0 + movd [dstq+strideq], m0 + lea dstq, [dstq+strideq*2] + movd [dstq ], m0 + movd [dstq+strideq], m0 + + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal dc_left_predictor_4x4, 2, 5, 2, dst, stride, above, left, goffset + movifnidn leftq, leftmp + GET_GOT goffsetq + + pxor m1, m1 + movd m0, [leftq] + psadbw m0, m1 + paddw m0, [GLOBAL(pw2_4)] + psraw m0, 2 + pshuflw m0, m0, 0x0 + packuswb m0, m0 + movd [dstq ], m0 + movd [dstq+strideq], m0 + lea dstq, [dstq+strideq*2] + movd [dstq ], m0 + movd [dstq+strideq], m0 + + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal dc_top_predictor_4x4, 3, 5, 2, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + movd m0, [aboveq] + psadbw m0, m1 + paddw m0, [GLOBAL(pw2_4)] + psraw m0, 2 + pshuflw m0, m0, 0x0 + packuswb m0, m0 + movd [dstq ], m0 + movd [dstq+strideq], m0 + lea dstq, [dstq+strideq*2] + movd [dstq ], m0 + movd [dstq+strideq], m0 + + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + movq m0, [aboveq] + movq m2, [leftq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + psadbw m0, m1 + psadbw m2, m1 + paddw m0, m2 + paddw m0, [GLOBAL(pw_8)] + psraw m0, 4 + punpcklbw m0, m0 + pshuflw m0, m0, 0x0 + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal dc_top_predictor_8x8, 3, 5, 2, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + movq m0, [aboveq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + psadbw m0, m1 + paddw m0, [GLOBAL(pw2_8)] + psraw m0, 3 + punpcklbw m0, m0 + pshuflw m0, m0, 0x0 + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal dc_left_predictor_8x8, 2, 5, 2, dst, stride, above, left, goffset + movifnidn leftq, leftmp + GET_GOT goffsetq + + pxor m1, m1 + movq m0, [leftq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + psadbw m0, m1 + paddw m0, [GLOBAL(pw2_8)] + psraw m0, 3 + punpcklbw m0, m0 + pshuflw m0, m0, 0x0 + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal dc_128_predictor_4x4, 2, 5, 1, dst, stride, above, left, goffset + GET_GOT goffsetq + + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + movd m0, [GLOBAL(dc_128)] + movd [dstq ], m0 + movd [dstq+strideq ], m0 + movd [dstq+strideq*2], m0 + movd [dstq+stride3q ], m0 + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal dc_128_predictor_8x8, 2, 5, 1, dst, stride, above, left, goffset + GET_GOT goffsetq + + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + movq m0, [GLOBAL(dc_128)] + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + mova m0, [aboveq] + mova m2, [leftq] + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 4 + psadbw m0, m1 + psadbw m2, m1 + paddw m0, m2 + movhlps m2, m0 + paddw m0, m2 + paddw m0, [GLOBAL(pw_16)] + psraw m0, 5 + pshuflw m0, m0, 0x0 + punpcklqdq m0, m0 + packuswb m0, m0 +.loop: + mova [dstq ], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + + RESTORE_GOT + REP_RET + + +INIT_XMM sse2 +cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + mova m0, [aboveq] + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 4 + psadbw m0, m1 + movhlps m2, m0 + paddw m0, m2 + paddw m0, [GLOBAL(pw2_16)] + psraw m0, 4 + pshuflw m0, m0, 0x0 + punpcklqdq m0, m0 + packuswb m0, m0 +.loop: + mova [dstq ], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + + RESTORE_GOT + REP_RET + +INIT_XMM sse2 +cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + mova m0, [leftq] + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 4 + psadbw m0, m1 + movhlps m2, m0 + paddw m0, m2 + paddw m0, [GLOBAL(pw2_16)] + psraw m0, 4 + pshuflw m0, m0, 0x0 + punpcklqdq m0, m0 + packuswb m0, m0 +.loop: + mova [dstq ], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + + RESTORE_GOT + REP_RET + +INIT_XMM sse2 +cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 4 + mova m0, [GLOBAL(dc_128)] +.loop: + mova [dstq ], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + RESTORE_GOT + RET + + +INIT_XMM sse2 +cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + mova m0, [aboveq] + mova m2, [aboveq+16] + mova m3, [leftq] + mova m4, [leftq+16] + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 8 + psadbw m0, m1 + psadbw m2, m1 + psadbw m3, m1 + psadbw m4, m1 + paddw m0, m2 + paddw m0, m3 + paddw m0, m4 + movhlps m2, m0 + paddw m0, m2 + paddw m0, [GLOBAL(pw_32)] + psraw m0, 6 + pshuflw m0, m0, 0x0 + punpcklqdq m0, m0 + packuswb m0, m0 +.loop: + mova [dstq ], m0 + mova [dstq +16], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq +16], m0 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m0 + mova [dstq+stride3q ], m0 + mova [dstq+stride3q +16], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + + RESTORE_GOT + REP_RET + +INIT_XMM sse2 +cglobal dc_top_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + mova m0, [aboveq] + mova m2, [aboveq+16] + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 8 + psadbw m0, m1 + psadbw m2, m1 + paddw m0, m2 + movhlps m2, m0 + paddw m0, m2 + paddw m0, [GLOBAL(pw2_32)] + psraw m0, 5 + pshuflw m0, m0, 0x0 + punpcklqdq m0, m0 + packuswb m0, m0 +.loop: + mova [dstq ], m0 + mova [dstq +16], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq +16], m0 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m0 + mova [dstq+stride3q ], m0 + mova [dstq+stride3q +16], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + + RESTORE_GOT + REP_RET + +INIT_XMM sse2 +cglobal dc_left_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset + GET_GOT goffsetq + + pxor m1, m1 + mova m0, [leftq] + mova m2, [leftq+16] + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 8 + psadbw m0, m1 + psadbw m2, m1 + paddw m0, m2 + movhlps m2, m0 + paddw m0, m2 + paddw m0, [GLOBAL(pw2_32)] + psraw m0, 5 + pshuflw m0, m0, 0x0 + punpcklqdq m0, m0 + packuswb m0, m0 +.loop: + mova [dstq ], m0 + mova [dstq +16], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq +16], m0 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m0 + mova [dstq+stride3q ], m0 + mova [dstq+stride3q +16], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + + RESTORE_GOT + REP_RET + +INIT_XMM sse2 +cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + + DEFINE_ARGS dst, stride, stride3, lines4 + lea stride3q, [strideq*3] + mov lines4d, 8 + mova m0, [GLOBAL(dc_128)] +.loop: + mova [dstq ], m0 + mova [dstq +16], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq +16], m0 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m0 + mova [dstq+stride3q ], m0 + mova [dstq+stride3q +16], m0 + lea dstq, [dstq+strideq*4] + dec lines4d + jnz .loop + RESTORE_GOT + RET + +INIT_XMM sse2 +cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above + movd m0, [aboveq] + movd [dstq ], m0 + movd [dstq+strideq], m0 + lea dstq, [dstq+strideq*2] + movd [dstq ], m0 + movd [dstq+strideq], m0 + RET + +INIT_XMM sse2 +cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above + movq m0, [aboveq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq ], m0 + movq [dstq+strideq ], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + RET + +INIT_XMM sse2 +cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above + mova m0, [aboveq] + DEFINE_ARGS dst, stride, stride3, nlines4 + lea stride3q, [strideq*3] + mov nlines4d, 4 +.loop: + mova [dstq ], m0 + mova [dstq+strideq ], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec nlines4d + jnz .loop + REP_RET + +INIT_XMM sse2 +cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above + mova m0, [aboveq] + mova m1, [aboveq+16] + DEFINE_ARGS dst, stride, stride3, nlines4 + lea stride3q, [strideq*3] + mov nlines4d, 8 +.loop: + mova [dstq ], m0 + mova [dstq +16], m1 + mova [dstq+strideq ], m0 + mova [dstq+strideq +16], m1 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m1 + mova [dstq+stride3q ], m0 + mova [dstq+stride3q +16], m1 + lea dstq, [dstq+strideq*4] + dec nlines4d + jnz .loop + REP_RET + +INIT_XMM sse2 +cglobal h_predictor_4x4, 2, 4, 4, dst, stride, line, left + movifnidn leftq, leftmp + movd m0, [leftq] + punpcklbw m0, m0 + punpcklbw m0, m0 + pshufd m1, m0, 0x1 + movd [dstq ], m0 + movd [dstq+strideq], m1 + pshufd m2, m0, 0x2 + lea dstq, [dstq+strideq*2] + pshufd m3, m0, 0x3 + movd [dstq ], m2 + movd [dstq+strideq], m3 + RET + +INIT_XMM sse2 +cglobal h_predictor_8x8, 2, 5, 3, dst, stride, line, left + movifnidn leftq, leftmp + mov lineq, -2 + DEFINE_ARGS dst, stride, line, left, stride3 + lea stride3q, [strideq*3] + movq m0, [leftq ] + punpcklbw m0, m0 ; l1 l1 l2 l2 ... l8 l8 +.loop: + pshuflw m1, m0, 0x0 ; l1 l1 l1 l1 l1 l1 l1 l1 + pshuflw m2, m0, 0x55 ; l2 l2 l2 l2 l2 l2 l2 l2 + movq [dstq ], m1 + movq [dstq+strideq], m2 + pshuflw m1, m0, 0xaa + pshuflw m2, m0, 0xff + movq [dstq+strideq*2], m1 + movq [dstq+stride3q ], m2 + pshufd m0, m0, 0xe ; [63:0] l5 l5 l6 l6 l7 l7 l8 l8 + inc lineq + lea dstq, [dstq+strideq*4] + jnz .loop + REP_RET + +INIT_XMM sse2 +cglobal h_predictor_16x16, 2, 5, 3, dst, stride, line, left + movifnidn leftq, leftmp + mov lineq, -4 + DEFINE_ARGS dst, stride, line, left, stride3 + lea stride3q, [strideq*3] +.loop: + movd m0, [leftq] + punpcklbw m0, m0 + punpcklbw m0, m0 ; l1 to l4 each repeated 4 times + pshufd m1, m0, 0x0 ; l1 repeated 16 times + pshufd m2, m0, 0x55 ; l2 repeated 16 times + mova [dstq ], m1 + mova [dstq+strideq ], m2 + pshufd m1, m0, 0xaa + pshufd m2, m0, 0xff + mova [dstq+strideq*2], m1 + mova [dstq+stride3q ], m2 + inc lineq + lea leftq, [leftq+4 ] + lea dstq, [dstq+strideq*4] + jnz .loop + REP_RET + +INIT_XMM sse2 +cglobal h_predictor_32x32, 2, 5, 3, dst, stride, line, left + movifnidn leftq, leftmp + mov lineq, -8 + DEFINE_ARGS dst, stride, line, left, stride3 + lea stride3q, [strideq*3] +.loop: + movd m0, [leftq] + punpcklbw m0, m0 + punpcklbw m0, m0 ; l1 to l4 each repeated 4 times + pshufd m1, m0, 0x0 ; l1 repeated 16 times + pshufd m2, m0, 0x55 ; l2 repeated 16 times + mova [dstq ], m1 + mova [dstq+16 ], m1 + mova [dstq+strideq ], m2 + mova [dstq+strideq+16 ], m2 + pshufd m1, m0, 0xaa + pshufd m2, m0, 0xff + mova [dstq+strideq*2 ], m1 + mova [dstq+strideq*2+16], m1 + mova [dstq+stride3q ], m2 + mova [dstq+stride3q+16 ], m2 + inc lineq + lea leftq, [leftq+4 ] + lea dstq, [dstq+strideq*4] + jnz .loop + REP_RET + +INIT_XMM sse2 +cglobal tm_predictor_4x4, 4, 4, 5, dst, stride, above, left + pxor m1, m1 + movq m0, [aboveq-1]; [63:0] tl t1 t2 t3 t4 x x x + punpcklbw m0, m1 + pshuflw m2, m0, 0x0 ; [63:0] tl tl tl tl [word] + psrldq m0, 2 + psubw m0, m2 ; [63:0] t1-tl t2-tl t3-tl t4-tl [word] + movd m2, [leftq] + punpcklbw m2, m1 + pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word] + pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word] + paddw m4, m0 + paddw m3, m0 + packuswb m4, m4 + packuswb m3, m3 + movd [dstq ], m4 + movd [dstq+strideq], m3 + lea dstq, [dstq+strideq*2] + pshuflw m4, m2, 0xaa + pshuflw m3, m2, 0xff + paddw m4, m0 + paddw m3, m0 + packuswb m4, m4 + packuswb m3, m3 + movd [dstq ], m4 + movd [dstq+strideq], m3 + RET + +INIT_XMM sse2 +cglobal tm_predictor_8x8, 4, 4, 5, dst, stride, above, left + pxor m1, m1 + movd m2, [aboveq-1] + movq m0, [aboveq] + punpcklbw m2, m1 + punpcklbw m0, m1 ; t1 t2 t3 t4 t5 t6 t7 t8 [word] + pshuflw m2, m2, 0x0 ; [63:0] tl tl tl tl [word] + DEFINE_ARGS dst, stride, line, left + mov lineq, -4 + punpcklqdq m2, m2 ; tl tl tl tl tl tl tl tl [word] + psubw m0, m2 ; t1-tl t2-tl ... t8-tl [word] + movq m2, [leftq] + punpcklbw m2, m1 ; l1 l2 l3 l4 l5 l6 l7 l8 [word] +.loop + pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word] + pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word] + punpcklqdq m4, m4 ; l1 l1 l1 l1 l1 l1 l1 l1 [word] + punpcklqdq m3, m3 ; l2 l2 l2 l2 l2 l2 l2 l2 [word] + paddw m4, m0 + paddw m3, m0 + packuswb m4, m3 + movq [dstq ], m4 + movhps [dstq+strideq], m4 + lea dstq, [dstq+strideq*2] + psrldq m2, 4 + inc lineq + jnz .loop + REP_RET + +INIT_XMM sse2 +cglobal tm_predictor_16x16, 4, 5, 8, dst, stride, above, left + pxor m1, m1 + mova m2, [aboveq-16]; + mova m0, [aboveq] ; t1 t2 ... t16 [byte] + punpckhbw m2, m1 ; [127:112] tl [word] + punpckhbw m4, m0, m1 + punpcklbw m0, m1 ; m0:m4 t1 t2 ... t16 [word] + DEFINE_ARGS dst, stride, line, left, stride8 + mov lineq, -8 + pshufhw m2, m2, 0xff + mova m3, [leftq] ; l1 l2 ... l16 [byte] + punpckhqdq m2, m2 ; tl repeated 8 times [word] + psubw m0, m2 + psubw m4, m2 ; m0:m4 t1-tl t2-tl ... t16-tl [word] + punpckhbw m5, m3, m1 + punpcklbw m3, m1 ; m3:m5 l1 l2 ... l16 [word] + lea stride8q, [strideq*8] +.loop: + pshuflw m6, m3, 0x0 + pshuflw m7, m5, 0x0 + punpcklqdq m6, m6 ; l1 repeated 8 times [word] + punpcklqdq m7, m7 ; l8 repeated 8 times [word] + paddw m1, m6, m0 + paddw m6, m4 ; m1:m6 ti-tl+l1 [i=1,15] [word] + psrldq m5, 2 + packuswb m1, m6 + mova [dstq ], m1 + paddw m1, m7, m0 + paddw m7, m4 ; m1:m7 ti-tl+l8 [i=1,15] [word] + psrldq m3, 2 + packuswb m1, m7 + mova [dstq+stride8q], m1 + inc lineq + lea dstq, [dstq+strideq] + jnz .loop + REP_RET + +INIT_XMM sse2 +cglobal tm_predictor_32x32, 4, 4, 8, dst, stride, above, left + pxor m1, m1 + movd m2, [aboveq-1] + mova m0, [aboveq] + mova m4, [aboveq+16] + punpcklbw m2, m1 + punpckhbw m3, m0, m1 + punpckhbw m5, m4, m1 + punpcklbw m0, m1 + punpcklbw m4, m1 + pshuflw m2, m2, 0x0 + DEFINE_ARGS dst, stride, line, left + mov lineq, -16 + punpcklqdq m2, m2 + add leftq, 32 + psubw m0, m2 + psubw m3, m2 + psubw m4, m2 + psubw m5, m2 +.loop: + movd m2, [leftq+lineq*2] + pxor m1, m1 + punpcklbw m2, m1 + pshuflw m7, m2, 0x55 + pshuflw m2, m2, 0x0 + punpcklqdq m2, m2 + punpcklqdq m7, m7 + paddw m6, m2, m3 + paddw m1, m2, m0 + packuswb m1, m6 + mova [dstq ], m1 + paddw m6, m2, m5 + paddw m1, m2, m4 + packuswb m1, m6 + mova [dstq+16 ], m1 + paddw m6, m7, m3 + paddw m1, m7, m0 + packuswb m1, m6 + mova [dstq+strideq ], m1 + paddw m6, m7, m5 + paddw m1, m7, m4 + packuswb m1, m6 + mova [dstq+strideq+16], m1 + lea dstq, [dstq+strideq*2] + inc lineq + jnz .loop + REP_RET diff --git a/thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm b/thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm new file mode 100644 index 00000000000..5e0139fa8d3 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm @@ -0,0 +1,871 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "third_party/x86inc/x86inc.asm" + +SECTION_RODATA + +pb_1: times 16 db 1 +sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 +sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 +sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7 +sh_b1234567777777777: db 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +sh_b2345677777777777: db 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +sh_b123456789abcdeff: db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15 +sh_b23456789abcdefff: db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15 +sh_b32104567: db 3, 2, 1, 0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0 +sh_b8091a2b345: db 8, 0, 9, 1, 10, 2, 11, 3, 4, 5, 0, 0, 0, 0, 0, 0 +sh_b76543210: db 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 +sh_b65432108: db 6, 5, 4, 3, 2, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0 +sh_b54321089: db 5, 4, 3, 2, 1, 0, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0 +sh_b89abcdef: db 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +sh_bfedcba9876543210: db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +SECTION .text + +INIT_XMM ssse3 +cglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset + GET_GOT goffsetq + + mova m0, [aboveq] + DEFINE_ARGS dst, stride, stride3, dst8, line + lea stride3q, [strideq*3] + lea dst8q, [dstq+strideq*8] + mova m1, [GLOBAL(sh_b123456789abcdeff)] + pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] + pavgb m3, m2, m0 + pxor m2, m0 + pshufb m0, m1 + pand m2, [GLOBAL(pb_1)] + psubb m3, m2 + pavgb m0, m3 + + ; first 4 lines and first half of 3rd 4 lines + mov lined, 2 +.loop: + mova [dstq ], m0 + movhps [dst8q ], m0 + pshufb m0, m1 + mova [dstq +strideq ], m0 + movhps [dst8q+strideq ], m0 + pshufb m0, m1 + mova [dstq +strideq*2 ], m0 + movhps [dst8q+strideq*2 ], m0 + pshufb m0, m1 + mova [dstq +stride3q ], m0 + movhps [dst8q+stride3q ], m0 + pshufb m0, m1 + lea dstq, [dstq +strideq*4] + lea dst8q, [dst8q+strideq*4] + dec lined + jnz .loop + + ; bottom-right 8x8 block + movhps [dstq +8], m0 + movhps [dstq+strideq +8], m0 + movhps [dstq+strideq*2+8], m0 + movhps [dstq+stride3q +8], m0 + lea dstq, [dstq+strideq*4] + movhps [dstq +8], m0 + movhps [dstq+strideq +8], m0 + movhps [dstq+strideq*2+8], m0 + movhps [dstq+stride3q +8], m0 + + RESTORE_GOT + RET + +INIT_XMM ssse3 +cglobal d45_predictor_32x32, 3, 6, 7, dst, stride, above, dst16, line, goffset + GET_GOT goffsetq + + mova m0, [aboveq] + mova m4, [aboveq+16] + DEFINE_ARGS dst, stride, stride3, dst16, line + lea stride3q, [strideq*3] + lea dst16q, [dstq +strideq*8] + lea dst16q, [dst16q+strideq*8] + mova m1, [GLOBAL(sh_b123456789abcdeff)] + pshufb m2, m4, [GLOBAL(sh_b23456789abcdefff)] + pavgb m3, m2, m4 + pxor m2, m4 + palignr m5, m4, m0, 1 + palignr m6, m4, m0, 2 + pshufb m4, m1 + pand m2, [GLOBAL(pb_1)] + psubb m3, m2 + pavgb m4, m3 + pavgb m3, m0, m6 + pxor m0, m6 + pand m0, [GLOBAL(pb_1)] + psubb m3, m0 + pavgb m5, m3 + + ; write 4x4 lines (and the first half of the second 4x4 lines) + mov lined, 4 +.loop: + mova [dstq ], m5 + mova [dstq +16], m4 + mova [dst16q ], m4 + palignr m3, m4, m5, 1 + pshufb m4, m1 + mova [dstq +strideq ], m3 + mova [dstq +strideq +16], m4 + mova [dst16q+strideq ], m4 + palignr m5, m4, m3, 1 + pshufb m4, m1 + mova [dstq +strideq*2 ], m5 + mova [dstq +strideq*2+16], m4 + mova [dst16q+strideq*2 ], m4 + palignr m3, m4, m5, 1 + pshufb m4, m1 + mova [dstq +stride3q ], m3 + mova [dstq +stride3q +16], m4 + mova [dst16q+stride3q ], m4 + palignr m5, m4, m3, 1 + pshufb m4, m1 + lea dstq, [dstq +strideq*4] + lea dst16q, [dst16q+strideq*4] + dec lined + jnz .loop + + ; write second half of second 4x4 lines + mova [dstq +16], m4 + mova [dstq +strideq +16], m4 + mova [dstq +strideq*2+16], m4 + mova [dstq +stride3q +16], m4 + lea dstq, [dstq +strideq*4] + mova [dstq +16], m4 + mova [dstq +strideq +16], m4 + mova [dstq +strideq*2+16], m4 + mova [dstq +stride3q +16], m4 + lea dstq, [dstq +strideq*4] + mova [dstq +16], m4 + mova [dstq +strideq +16], m4 + mova [dstq +strideq*2+16], m4 + mova [dstq +stride3q +16], m4 + lea dstq, [dstq +strideq*4] + mova [dstq +16], m4 + mova [dstq +strideq +16], m4 + mova [dstq +strideq*2+16], m4 + mova [dstq +stride3q +16], m4 + + RESTORE_GOT + RET + +; ------------------------------------------ +; input: x, y, z, result +; +; trick from pascal +; (x+2y+z+2)>>2 can be calculated as: +; result = avg(x,z) +; result -= xor(x,z) & 1 +; result = avg(result,y) +; ------------------------------------------ +%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4 + pavgb %4, %1, %3 + pxor %3, %1 + pand %3, [GLOBAL(pb_1)] + psubb %4, %3 + pavgb %4, %2 +%endmacro + +INIT_XMM ssse3 +cglobal d63_predictor_4x4, 3, 4, 5, dst, stride, above, goffset + GET_GOT goffsetq + + movq m3, [aboveq] + pshufb m1, m3, [GLOBAL(sh_b23456777)] + pshufb m2, m3, [GLOBAL(sh_b12345677)] + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m2, m1, m4 + pavgb m3, m2 + + ; store 4 lines + movd [dstq ], m3 + movd [dstq+strideq], m4 + lea dstq, [dstq+strideq*2] + psrldq m3, 1 + psrldq m4, 1 + movd [dstq ], m3 + movd [dstq+strideq], m4 + RESTORE_GOT + RET + +INIT_XMM ssse3 +cglobal d63_predictor_8x8, 3, 4, 5, dst, stride, above, goffset + GET_GOT goffsetq + + movq m3, [aboveq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pshufb m1, m3, [GLOBAL(sh_b2345677777777777)] + pshufb m0, m3, [GLOBAL(sh_b0123456777777777)] + pshufb m2, m3, [GLOBAL(sh_b1234567777777777)] + pshufb m3, [GLOBAL(sh_b0123456777777777)] + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m4 + pavgb m3, m2 + + ; store 4 lines + movq [dstq ], m3 + movq [dstq+strideq], m4 + psrldq m3, 1 + psrldq m4, 1 + movq [dstq+strideq*2], m3 + movq [dstq+stride3q ], m4 + lea dstq, [dstq+strideq*4] + psrldq m3, 1 + psrldq m4, 1 + + ; store 4 lines + movq [dstq ], m3 + movq [dstq+strideq], m4 + psrldq m3, 1 + psrldq m4, 1 + movq [dstq+strideq*2], m3 + movq [dstq+stride3q ], m4 + RESTORE_GOT + RET + +INIT_XMM ssse3 +cglobal d63_predictor_16x16, 3, 5, 5, dst, stride, above, line, goffset + GET_GOT goffsetq + + mova m0, [aboveq] + DEFINE_ARGS dst, stride, stride3, line + lea stride3q, [strideq*3] + mova m1, [GLOBAL(sh_b123456789abcdeff)] + pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] + pshufb m3, m0, m1 + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m3, m2, m4 + pavgb m0, m3 + + mov lined, 4 +.loop: + mova [dstq ], m0 + mova [dstq+strideq ], m4 + pshufb m0, m1 + pshufb m4, m1 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m4 + pshufb m0, m1 + pshufb m4, m1 + lea dstq, [dstq+strideq*4] + dec lined + jnz .loop + RESTORE_GOT + REP_RET + +INIT_XMM ssse3 +cglobal d63_predictor_32x32, 3, 5, 8, dst, stride, above, line, goffset + GET_GOT goffsetq + + mova m0, [aboveq] + mova m7, [aboveq+16] + DEFINE_ARGS dst, stride, stride3, line + mova m1, [GLOBAL(sh_b123456789abcdeff)] + lea stride3q, [strideq*3] + pshufb m2, m7, [GLOBAL(sh_b23456789abcdefff)] + pshufb m3, m7, m1 + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m2, m4 + palignr m6, m7, m0, 1 + palignr m5, m7, m0, 2 + pavgb m7, m3 + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m6, m5, m2 + pavgb m0, m6 + + mov lined, 8 +.loop: + mova [dstq ], m0 + mova [dstq +16], m7 + mova [dstq+strideq ], m2 + mova [dstq+strideq +16], m4 + palignr m3, m7, m0, 1 + palignr m5, m4, m2, 1 + pshufb m7, m1 + pshufb m4, m1 + + mova [dstq+strideq*2 ], m3 + mova [dstq+strideq*2+16], m7 + mova [dstq+stride3q ], m5 + mova [dstq+stride3q +16], m4 + palignr m0, m7, m3, 1 + palignr m2, m4, m5, 1 + pshufb m7, m1 + pshufb m4, m1 + lea dstq, [dstq+strideq*4] + dec lined + jnz .loop + RESTORE_GOT + REP_RET + +INIT_XMM ssse3 +cglobal d153_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset + GET_GOT goffsetq + movd m0, [leftq] ; l1, l2, l3, l4 + movd m1, [aboveq-1] ; tl, t1, t2, t3 + punpckldq m0, m1 ; l1, l2, l3, l4, tl, t1, t2, t3 + pshufb m0, [GLOBAL(sh_b32104567)]; l4, l3, l2, l1, tl, t1, t2, t3 + psrldq m1, m0, 1 ; l3, l2, l1, tl, t1, t2, t3 + psrldq m2, m0, 2 ; l2, l1, tl, t1, t2, t3 + ; comments below are for a predictor like this + ; A1 B1 C1 D1 + ; A2 B2 A1 B1 + ; A3 B3 A2 B2 + ; A4 B4 A3 B3 + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; 3-tap avg B4 B3 B2 B1 C1 D1 + pavgb m1, m0 ; 2-tap avg A4 A3 A2 A1 + + punpcklqdq m3, m1 ; B4 B3 B2 B1 C1 D1 x x A4 A3 A2 A1 .. + + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pshufb m3, [GLOBAL(sh_b8091a2b345)] ; A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 .. + movd [dstq+stride3q ], m3 + psrldq m3, 2 ; A3 B3 A2 B2 A1 B1 C1 D1 .. + movd [dstq+strideq*2], m3 + psrldq m3, 2 ; A2 B2 A1 B1 C1 D1 .. + movd [dstq+strideq ], m3 + psrldq m3, 2 ; A1 B1 C1 D1 .. + movd [dstq ], m3 + RESTORE_GOT + RET + +INIT_XMM ssse3 +cglobal d153_predictor_8x8, 4, 5, 8, dst, stride, above, left, goffset + GET_GOT goffsetq + movq m0, [leftq] ; [0- 7] l1-8 [byte] + movhps m0, [aboveq-1] ; [8-15] tl, t1-7 [byte] + pshufb m1, m0, [GLOBAL(sh_b76543210)] ; l8-1 [word] + pshufb m2, m0, [GLOBAL(sh_b65432108)] ; l7-1,tl [word] + pshufb m3, m0, [GLOBAL(sh_b54321089)] ; l6-1,tl,t1 [word] + pshufb m0, [GLOBAL(sh_b89abcdef)] ; tl,t1-7 [word] + psrldq m4, m0, 1 ; t1-7 [word] + psrldq m5, m0, 2 ; t2-7 [word] + ; comments below are for a predictor like this + ; A1 B1 C1 D1 E1 F1 G1 H1 + ; A2 B2 A1 B1 C1 D1 E1 F1 + ; A3 B3 A2 B2 A1 B1 C1 D1 + ; A4 B4 A3 B3 A2 B2 A1 B1 + ; A5 B5 A4 B4 A3 B3 A2 B2 + ; A6 B6 A5 B5 A4 B4 A3 B3 + ; A7 B7 A6 B6 A5 B5 A4 B4 + ; A8 B8 A7 B7 A6 B6 A5 B5 + pavgb m6, m1, m2 ; 2-tap avg A8-A1 + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m4, m5, m7 ; 3-tap avg C-H1 + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m2, m3, m0 ; 3-tap avg B8-1 + + punpcklbw m6, m0 ; A-B8, A-B7 ... A-B2, A-B1 + + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + + movhps [dstq+stride3q], m6 ; A-B4, A-B3, A-B2, A-B1 + palignr m0, m7, m6, 10 ; A-B3, A-B2, A-B1, C-H1 + movq [dstq+strideq*2], m0 + psrldq m0, 2 ; A-B2, A-B1, C-H1 + movq [dstq+strideq ], m0 + psrldq m0, 2 ; A-H1 + movq [dstq ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq+stride3q ], m6 ; A-B8, A-B7, A-B6, A-B5 + psrldq m6, 2 ; A-B7, A-B6, A-B5, A-B4 + movq [dstq+strideq*2], m6 + psrldq m6, 2 ; A-B6, A-B5, A-B4, A-B3 + movq [dstq+strideq ], m6 + psrldq m6, 2 ; A-B5, A-B4, A-B3, A-B2 + movq [dstq ], m6 + RESTORE_GOT + RET + +INIT_XMM ssse3 +cglobal d153_predictor_16x16, 4, 5, 8, dst, stride, above, left, goffset + GET_GOT goffsetq + mova m0, [leftq] + movu m7, [aboveq-1] + ; comments below are for a predictor like this + ; A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 M1 N1 O1 P1 + ; A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 M1 N1 + ; A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 + ; A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 + ; A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 + ; A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 + ; A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 + ; A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 + ; A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 + ; Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 + ; Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 + ; Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 + ; Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 + ; Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 + ; Af Bf Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 + ; Ag Bg Af Bf Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 + pshufb m6, m7, [GLOBAL(sh_bfedcba9876543210)] + palignr m5, m0, m6, 15 + palignr m3, m0, m6, 14 + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m5, m3, m4 ; 3-tap avg B3-Bg + pshufb m1, m0, [GLOBAL(sh_b123456789abcdeff)] + pavgb m5, m0 ; A1 - Ag + + punpcklbw m0, m4, m5 ; A-B8 ... A-B1 + punpckhbw m4, m5 ; A-B9 ... A-Bg + + pshufb m3, m7, [GLOBAL(sh_b123456789abcdeff)] + pshufb m5, m7, [GLOBAL(sh_b23456789abcdefff)] + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m5, m1 ; 3-tap avg C1-P1 + + pshufb m6, m0, [GLOBAL(sh_bfedcba9876543210)] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + palignr m2, m1, m6, 14 + mova [dstq ], m2 + palignr m2, m1, m6, 12 + mova [dstq+strideq ], m2 + palignr m2, m1, m6, 10 + mova [dstq+strideq*2], m2 + palignr m2, m1, m6, 8 + mova [dstq+stride3q ], m2 + lea dstq, [dstq+strideq*4] + palignr m2, m1, m6, 6 + mova [dstq ], m2 + palignr m2, m1, m6, 4 + mova [dstq+strideq ], m2 + palignr m2, m1, m6, 2 + mova [dstq+strideq*2], m2 + pshufb m4, [GLOBAL(sh_bfedcba9876543210)] + mova [dstq+stride3q ], m6 + lea dstq, [dstq+strideq*4] + + palignr m2, m6, m4, 14 + mova [dstq ], m2 + palignr m2, m6, m4, 12 + mova [dstq+strideq ], m2 + palignr m2, m6, m4, 10 + mova [dstq+strideq*2], m2 + palignr m2, m6, m4, 8 + mova [dstq+stride3q ], m2 + lea dstq, [dstq+strideq*4] + palignr m2, m6, m4, 6 + mova [dstq ], m2 + palignr m2, m6, m4, 4 + mova [dstq+strideq ], m2 + palignr m2, m6, m4, 2 + mova [dstq+strideq*2], m2 + mova [dstq+stride3q ], m4 + RESTORE_GOT + RET + +INIT_XMM ssse3 +cglobal d153_predictor_32x32, 4, 5, 8, dst, stride, above, left, goffset + GET_GOT goffsetq + mova m0, [leftq] + movu m7, [aboveq-1] + movu m1, [aboveq+15] + + pshufb m4, m1, [GLOBAL(sh_b123456789abcdeff)] + pshufb m6, m1, [GLOBAL(sh_b23456789abcdefff)] + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m4, m6, m2 ; 3-tap avg above [high] + + palignr m3, m1, m7, 1 + palignr m5, m1, m7, 2 + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m5, m1 ; 3-tap avg above [low] + + pshufb m7, [GLOBAL(sh_bfedcba9876543210)] + palignr m5, m0, m7, 15 + palignr m3, m0, m7, 14 + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m5, m3, m4 ; 3-tap avg B3-Bg + pavgb m5, m0 ; A1 - Ag + punpcklbw m6, m4, m5 ; A-B8 ... A-B1 + punpckhbw m4, m5 ; A-B9 ... A-Bg + pshufb m6, [GLOBAL(sh_bfedcba9876543210)] + pshufb m4, [GLOBAL(sh_bfedcba9876543210)] + + DEFINE_ARGS dst, stride, stride3, left, line + lea stride3q, [strideq*3] + + palignr m5, m2, m1, 14 + palignr m7, m1, m6, 14 + mova [dstq ], m7 + mova [dstq+16 ], m5 + palignr m5, m2, m1, 12 + palignr m7, m1, m6, 12 + mova [dstq+strideq ], m7 + mova [dstq+strideq+16 ], m5 + palignr m5, m2, m1, 10 + palignr m7, m1, m6, 10 + mova [dstq+strideq*2 ], m7 + mova [dstq+strideq*2+16], m5 + palignr m5, m2, m1, 8 + palignr m7, m1, m6, 8 + mova [dstq+stride3q ], m7 + mova [dstq+stride3q+16 ], m5 + lea dstq, [dstq+strideq*4] + palignr m5, m2, m1, 6 + palignr m7, m1, m6, 6 + mova [dstq ], m7 + mova [dstq+16 ], m5 + palignr m5, m2, m1, 4 + palignr m7, m1, m6, 4 + mova [dstq+strideq ], m7 + mova [dstq+strideq+16 ], m5 + palignr m5, m2, m1, 2 + palignr m7, m1, m6, 2 + mova [dstq+strideq*2 ], m7 + mova [dstq+strideq*2+16], m5 + mova [dstq+stride3q ], m6 + mova [dstq+stride3q+16 ], m1 + lea dstq, [dstq+strideq*4] + + palignr m5, m1, m6, 14 + palignr m3, m6, m4, 14 + mova [dstq ], m3 + mova [dstq+16 ], m5 + palignr m5, m1, m6, 12 + palignr m3, m6, m4, 12 + mova [dstq+strideq ], m3 + mova [dstq+strideq+16 ], m5 + palignr m5, m1, m6, 10 + palignr m3, m6, m4, 10 + mova [dstq+strideq*2 ], m3 + mova [dstq+strideq*2+16], m5 + palignr m5, m1, m6, 8 + palignr m3, m6, m4, 8 + mova [dstq+stride3q ], m3 + mova [dstq+stride3q+16 ], m5 + lea dstq, [dstq+strideq*4] + palignr m5, m1, m6, 6 + palignr m3, m6, m4, 6 + mova [dstq ], m3 + mova [dstq+16 ], m5 + palignr m5, m1, m6, 4 + palignr m3, m6, m4, 4 + mova [dstq+strideq ], m3 + mova [dstq+strideq+16 ], m5 + palignr m5, m1, m6, 2 + palignr m3, m6, m4, 2 + mova [dstq+strideq*2 ], m3 + mova [dstq+strideq*2+16], m5 + mova [dstq+stride3q ], m4 + mova [dstq+stride3q+16 ], m6 + lea dstq, [dstq+strideq*4] + + mova m7, [leftq] + mova m3, [leftq+16] + palignr m5, m3, m7, 15 + palignr m0, m3, m7, 14 + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m5, m0, m2 ; 3-tap avg Bh - + pavgb m5, m3 ; Ah - + punpcklbw m3, m2, m5 ; A-B8 ... A-B1 + punpckhbw m2, m5 ; A-B9 ... A-Bg + pshufb m3, [GLOBAL(sh_bfedcba9876543210)] + pshufb m2, [GLOBAL(sh_bfedcba9876543210)] + + palignr m7, m6, m4, 14 + palignr m0, m4, m3, 14 + mova [dstq ], m0 + mova [dstq+16 ], m7 + palignr m7, m6, m4, 12 + palignr m0, m4, m3, 12 + mova [dstq+strideq ], m0 + mova [dstq+strideq+16 ], m7 + palignr m7, m6, m4, 10 + palignr m0, m4, m3, 10 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m7 + palignr m7, m6, m4, 8 + palignr m0, m4, m3, 8 + mova [dstq+stride3q ], m0 + mova [dstq+stride3q+16 ], m7 + lea dstq, [dstq+strideq*4] + palignr m7, m6, m4, 6 + palignr m0, m4, m3, 6 + mova [dstq ], m0 + mova [dstq+16 ], m7 + palignr m7, m6, m4, 4 + palignr m0, m4, m3, 4 + mova [dstq+strideq ], m0 + mova [dstq+strideq+16 ], m7 + palignr m7, m6, m4, 2 + palignr m0, m4, m3, 2 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m7 + mova [dstq+stride3q ], m3 + mova [dstq+stride3q+16 ], m4 + lea dstq, [dstq+strideq*4] + + palignr m7, m4, m3, 14 + palignr m0, m3, m2, 14 + mova [dstq ], m0 + mova [dstq+16 ], m7 + palignr m7, m4, m3, 12 + palignr m0, m3, m2, 12 + mova [dstq+strideq ], m0 + mova [dstq+strideq+16 ], m7 + palignr m7, m4, m3, 10 + palignr m0, m3, m2, 10 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m7 + palignr m7, m4, m3, 8 + palignr m0, m3, m2, 8 + mova [dstq+stride3q ], m0 + mova [dstq+stride3q+16 ], m7 + lea dstq, [dstq+strideq*4] + palignr m7, m4, m3, 6 + palignr m0, m3, m2, 6 + mova [dstq ], m0 + mova [dstq+16 ], m7 + palignr m7, m4, m3, 4 + palignr m0, m3, m2, 4 + mova [dstq+strideq ], m0 + mova [dstq+strideq+16 ], m7 + palignr m7, m4, m3, 2 + palignr m0, m3, m2, 2 + mova [dstq+strideq*2 ], m0 + mova [dstq+strideq*2+16], m7 + mova [dstq+stride3q ], m2 + mova [dstq+stride3q+16 ], m3 + + RESTORE_GOT + RET + +INIT_XMM ssse3 +cglobal d207_predictor_8x8, 4, 5, 4, dst, stride, stride3, left, goffset + GET_GOT goffsetq + movq m3, [leftq] ; abcdefgh [byte] + lea stride3q, [strideq*3] + + pshufb m1, m3, [GLOBAL(sh_b2345677777777777)] + pshufb m0, m3, [GLOBAL(sh_b0123456777777777)] + pshufb m2, m3, [GLOBAL(sh_b1234567777777777)] + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m3 + pavgb m0, m2 + punpcklbw m0, m3 ; interleaved output + + movq [dstq ], m0 + psrldq m0, 2 + movq [dstq+strideq ], m0 + psrldq m0, 2 + movq [dstq+strideq*2], m0 + psrldq m0, 2 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + pshufhw m0, m0, q0000 ; de, d2ef, ef, e2fg, fg, f2gh, gh, g3h, 8xh + psrldq m0, 2 + movq [dstq ], m0 + psrldq m0, 2 + movq [dstq+strideq ], m0 + psrldq m0, 2 + movq [dstq+strideq*2], m0 + psrldq m0, 2 + movq [dstq+stride3q ], m0 + RESTORE_GOT + RET + +INIT_XMM ssse3 +cglobal d207_predictor_16x16, 4, 5, 5, dst, stride, stride3, left, goffset + GET_GOT goffsetq + lea stride3q, [strideq*3] + mova m0, [leftq] ; abcdefghijklmnop [byte] + pshufb m1, m0, [GLOBAL(sh_b123456789abcdeff)] ; bcdefghijklmnopp + pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 + pavgb m1, m0 ; ab, bc, cd .. no, op, pp [byte] + + punpckhbw m4, m1, m3 ; interleaved input + punpcklbw m1, m3 ; interleaved output + mova [dstq ], m1 + palignr m3, m4, m1, 2 + mova [dstq+strideq ], m3 + palignr m3, m4, m1, 4 + mova [dstq+strideq*2], m3 + palignr m3, m4, m1, 6 + mova [dstq+stride3q ], m3 + lea dstq, [dstq+strideq*4] + palignr m3, m4, m1, 8 + mova [dstq ], m3 + palignr m3, m4, m1, 10 + mova [dstq+strideq ], m3 + palignr m3, m4, m1, 12 + mova [dstq+strideq*2], m3 + palignr m3, m4, m1, 14 + mova [dstq+stride3q ], m3 + DEFINE_ARGS dst, stride, stride3, line + mov lined, 2 + mova m0, [GLOBAL(sh_b23456789abcdefff)] +.loop: + lea dstq, [dstq+strideq*4] + mova [dstq ], m4 + pshufb m4, m0 + mova [dstq+strideq ], m4 + pshufb m4, m0 + mova [dstq+strideq*2], m4 + pshufb m4, m0 + mova [dstq+stride3q ], m4 + pshufb m4, m0 + dec lined + jnz .loop + RESTORE_GOT + REP_RET + +INIT_XMM ssse3 +cglobal d207_predictor_32x32, 4, 5, 8, dst, stride, stride3, left, goffset + GET_GOT goffsetq + lea stride3q, [strideq*3] + mova m1, [leftq] ; 0-15 [byte] + mova m2, [leftq+16] ; 16-31 [byte] + pshufb m0, m2, [GLOBAL(sh_b23456789abcdefff)] + pshufb m4, m2, [GLOBAL(sh_b123456789abcdeff)] + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m2, m4, m0, m3 + palignr m6, m2, m1, 1 + palignr m5, m2, m1, 2 + pavgb m2, m4 ; high 16px even lines + + X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m6, m5, m0 + pavgb m1, m6 ; low 16px even lines + + punpckhbw m6, m1, m0 ; interleaved output 2 + punpcklbw m1, m0 ; interleaved output 1 + + punpckhbw m7, m2, m3 ; interleaved output 4 + punpcklbw m2, m3 ; interleaved output 3 + + ; output 1st 8 lines (and half of 2nd 8 lines) + DEFINE_ARGS dst, stride, stride3, dst8 + lea dst8q, [dstq+strideq*8] + mova [dstq ], m1 + mova [dstq +16], m6 + mova [dst8q ], m6 + palignr m0, m6, m1, 2 + palignr m4, m2, m6, 2 + mova [dstq +strideq ], m0 + mova [dstq +strideq +16], m4 + mova [dst8q+strideq ], m4 + palignr m0, m6, m1, 4 + palignr m4, m2, m6, 4 + mova [dstq +strideq*2 ], m0 + mova [dstq +strideq*2+16], m4 + mova [dst8q+strideq*2 ], m4 + palignr m0, m6, m1, 6 + palignr m4, m2, m6, 6 + mova [dstq +stride3q ], m0 + mova [dstq +stride3q +16], m4 + mova [dst8q+stride3q ], m4 + lea dstq, [dstq +strideq*4] + lea dst8q, [dst8q+strideq*4] + palignr m0, m6, m1, 8 + palignr m4, m2, m6, 8 + mova [dstq ], m0 + mova [dstq +16], m4 + mova [dst8q ], m4 + palignr m0, m6, m1, 10 + palignr m4, m2, m6, 10 + mova [dstq +strideq ], m0 + mova [dstq +strideq +16], m4 + mova [dst8q+strideq ], m4 + palignr m0, m6, m1, 12 + palignr m4, m2, m6, 12 + mova [dstq +strideq*2 ], m0 + mova [dstq +strideq*2+16], m4 + mova [dst8q+strideq*2 ], m4 + palignr m0, m6, m1, 14 + palignr m4, m2, m6, 14 + mova [dstq +stride3q ], m0 + mova [dstq +stride3q +16], m4 + mova [dst8q+stride3q ], m4 + lea dstq, [dstq+strideq*4] + lea dst8q, [dst8q+strideq*4] + + ; output 2nd half of 2nd 8 lines and half of 3rd 8 lines + mova [dstq +16], m2 + mova [dst8q ], m2 + palignr m4, m7, m2, 2 + mova [dstq +strideq +16], m4 + mova [dst8q+strideq ], m4 + palignr m4, m7, m2, 4 + mova [dstq +strideq*2+16], m4 + mova [dst8q+strideq*2 ], m4 + palignr m4, m7, m2, 6 + mova [dstq +stride3q +16], m4 + mova [dst8q+stride3q ], m4 + lea dstq, [dstq+strideq*4] + lea dst8q, [dst8q+strideq*4] + palignr m4, m7, m2, 8 + mova [dstq +16], m4 + mova [dst8q ], m4 + palignr m4, m7, m2, 10 + mova [dstq +strideq +16], m4 + mova [dst8q+strideq ], m4 + palignr m4, m7, m2, 12 + mova [dstq +strideq*2+16], m4 + mova [dst8q+strideq*2 ], m4 + palignr m4, m7, m2, 14 + mova [dstq +stride3q +16], m4 + mova [dst8q+stride3q ], m4 + lea dstq, [dstq+strideq*4] + lea dst8q, [dst8q+strideq*4] + + ; output 2nd half of 3rd 8 lines and half of 4th 8 lines + mova m0, [GLOBAL(sh_b23456789abcdefff)] + mova [dstq +16], m7 + mova [dst8q ], m7 + pshufb m7, m0 + mova [dstq +strideq +16], m7 + mova [dst8q+strideq ], m7 + pshufb m7, m0 + mova [dstq +strideq*2+16], m7 + mova [dst8q+strideq*2 ], m7 + pshufb m7, m0 + mova [dstq +stride3q +16], m7 + mova [dst8q+stride3q ], m7 + pshufb m7, m0 + lea dstq, [dstq+strideq*4] + lea dst8q, [dst8q+strideq*4] + mova [dstq +16], m7 + mova [dst8q ], m7 + pshufb m7, m0 + mova [dstq +strideq +16], m7 + mova [dst8q+strideq ], m7 + pshufb m7, m0 + mova [dstq +strideq*2+16], m7 + mova [dst8q+strideq*2 ], m7 + pshufb m7, m0 + mova [dstq +stride3q +16], m7 + mova [dst8q+stride3q ], m7 + pshufb m7, m0 + lea dstq, [dstq+strideq*4] + + ; output last half of 4th 8 lines + mova [dstq +16], m7 + mova [dstq +strideq +16], m7 + mova [dstq +strideq*2+16], m7 + mova [dstq +stride3q +16], m7 + lea dstq, [dstq+strideq*4] + mova [dstq +16], m7 + mova [dstq +strideq +16], m7 + mova [dstq +strideq*2+16], m7 + mova [dstq +stride3q +16], m7 + + ; done! + RESTORE_GOT + RET diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c new file mode 100644 index 00000000000..df5068c624b --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c @@ -0,0 +1,4046 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/x86/inv_txfm_sse2.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" + +#define RECON_AND_STORE4X4(dest, in_x) \ +{ \ + __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \ + d0 = _mm_unpacklo_epi8(d0, zero); \ + d0 = _mm_add_epi16(in_x, d0); \ + d0 = _mm_packus_epi16(d0, d0); \ + *(int *)(dest) = _mm_cvtsi128_si32(d0); \ +} + +void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { + const __m128i zero = _mm_setzero_si128(); + const __m128i eight = _mm_set1_epi16(8); + const __m128i cst = _mm_setr_epi16( + (int16_t)cospi_16_64, (int16_t)cospi_16_64, (int16_t)cospi_16_64, + (int16_t)-cospi_16_64, (int16_t)cospi_24_64, (int16_t)-cospi_8_64, + (int16_t)cospi_8_64, (int16_t)cospi_24_64); + const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); + __m128i input0, input1, input2, input3; + + // Rows + input0 = load_input_data(input); + input2 = load_input_data(input + 8); + + // Construct i3, i1, i3, i1, i2, i0, i2, i0 + input0 = _mm_shufflelo_epi16(input0, 0xd8); + input0 = _mm_shufflehi_epi16(input0, 0xd8); + input2 = _mm_shufflelo_epi16(input2, 0xd8); + input2 = _mm_shufflehi_epi16(input2, 0xd8); + + input1 = _mm_unpackhi_epi32(input0, input0); + input0 = _mm_unpacklo_epi32(input0, input0); + input3 = _mm_unpackhi_epi32(input2, input2); + input2 = _mm_unpacklo_epi32(input2, input2); + + // Stage 1 + input0 = _mm_madd_epi16(input0, cst); + input1 = _mm_madd_epi16(input1, cst); + input2 = _mm_madd_epi16(input2, cst); + input3 = _mm_madd_epi16(input3, cst); + + input0 = _mm_add_epi32(input0, rounding); + input1 = _mm_add_epi32(input1, rounding); + input2 = _mm_add_epi32(input2, rounding); + input3 = _mm_add_epi32(input3, rounding); + + input0 = _mm_srai_epi32(input0, DCT_CONST_BITS); + input1 = _mm_srai_epi32(input1, DCT_CONST_BITS); + input2 = _mm_srai_epi32(input2, DCT_CONST_BITS); + input3 = _mm_srai_epi32(input3, DCT_CONST_BITS); + + // Stage 2 + input0 = _mm_packs_epi32(input0, input1); + input1 = _mm_packs_epi32(input2, input3); + + // Transpose + input2 = _mm_unpacklo_epi16(input0, input1); + input3 = _mm_unpackhi_epi16(input0, input1); + input0 = _mm_unpacklo_epi32(input2, input3); + input1 = _mm_unpackhi_epi32(input2, input3); + + // Switch column2, column 3, and then, we got: + // input2: column1, column 0; input3: column2, column 3. + input1 = _mm_shuffle_epi32(input1, 0x4e); + input2 = _mm_add_epi16(input0, input1); + input3 = _mm_sub_epi16(input0, input1); + + // Columns + // Construct i3, i1, i3, i1, i2, i0, i2, i0 + input0 = _mm_unpacklo_epi32(input2, input2); + input1 = _mm_unpackhi_epi32(input2, input2); + input2 = _mm_unpackhi_epi32(input3, input3); + input3 = _mm_unpacklo_epi32(input3, input3); + + // Stage 1 + input0 = _mm_madd_epi16(input0, cst); + input1 = _mm_madd_epi16(input1, cst); + input2 = _mm_madd_epi16(input2, cst); + input3 = _mm_madd_epi16(input3, cst); + + input0 = _mm_add_epi32(input0, rounding); + input1 = _mm_add_epi32(input1, rounding); + input2 = _mm_add_epi32(input2, rounding); + input3 = _mm_add_epi32(input3, rounding); + + input0 = _mm_srai_epi32(input0, DCT_CONST_BITS); + input1 = _mm_srai_epi32(input1, DCT_CONST_BITS); + input2 = _mm_srai_epi32(input2, DCT_CONST_BITS); + input3 = _mm_srai_epi32(input3, DCT_CONST_BITS); + + // Stage 2 + input0 = _mm_packs_epi32(input0, input2); + input1 = _mm_packs_epi32(input1, input3); + + // Transpose + input2 = _mm_unpacklo_epi16(input0, input1); + input3 = _mm_unpackhi_epi16(input0, input1); + input0 = _mm_unpacklo_epi32(input2, input3); + input1 = _mm_unpackhi_epi32(input2, input3); + + // Switch column2, column 3, and then, we got: + // input2: column1, column 0; input3: column2, column 3. + input1 = _mm_shuffle_epi32(input1, 0x4e); + input2 = _mm_add_epi16(input0, input1); + input3 = _mm_sub_epi16(input0, input1); + + // Final round and shift + input2 = _mm_add_epi16(input2, eight); + input3 = _mm_add_epi16(input3, eight); + + input2 = _mm_srai_epi16(input2, 4); + input3 = _mm_srai_epi16(input3, 4); + + // Reconstruction and Store + { + __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); + __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); + d0 = _mm_unpacklo_epi32(d0, + _mm_cvtsi32_si128(*(const int *)(dest + stride))); + d2 = _mm_unpacklo_epi32( + _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)), d2); + d0 = _mm_unpacklo_epi8(d0, zero); + d2 = _mm_unpacklo_epi8(d2, zero); + d0 = _mm_add_epi16(d0, input2); + d2 = _mm_add_epi16(d2, input3); + d0 = _mm_packus_epi16(d0, d2); + // store input0 + *(int *)dest = _mm_cvtsi128_si32(d0); + // store input1 + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); + // store input2 + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); + // store input3 + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); + } +} + +void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { + __m128i dc_value; + const __m128i zero = _mm_setzero_si128(); + int a; + + a = (int)dct_const_round_shift(input[0] * cospi_16_64); + a = (int)dct_const_round_shift(a * cospi_16_64); + a = ROUND_POWER_OF_TWO(a, 4); + + dc_value = _mm_set1_epi16(a); + + RECON_AND_STORE4X4(dest + 0 * stride, dc_value); + RECON_AND_STORE4X4(dest + 1 * stride, dc_value); + RECON_AND_STORE4X4(dest + 2 * stride, dc_value); + RECON_AND_STORE4X4(dest + 3 * stride, dc_value); +} + +static INLINE void transpose_4x4(__m128i *res) { + const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]); + const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]); + + res[0] = _mm_unpacklo_epi16(tr0_0, tr0_1); + res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1); +} + +void idct4_sse2(__m128i *in) { + const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64); + const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); + const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); + const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); + __m128i u[8], v[8]; + + transpose_4x4(in); + // stage 1 + u[0] = _mm_unpacklo_epi16(in[0], in[1]); + u[1] = _mm_unpackhi_epi16(in[0], in[1]); + v[0] = _mm_madd_epi16(u[0], k__cospi_p16_p16); + v[1] = _mm_madd_epi16(u[0], k__cospi_p16_m16); + v[2] = _mm_madd_epi16(u[1], k__cospi_p24_m08); + v[3] = _mm_madd_epi16(u[1], k__cospi_p08_p24); + + u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); + u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); + u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); + u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); + + v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); + v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); + v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); + v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); + + u[0] = _mm_packs_epi32(v[0], v[1]); + u[1] = _mm_packs_epi32(v[3], v[2]); + + // stage 2 + in[0] = _mm_add_epi16(u[0], u[1]); + in[1] = _mm_sub_epi16(u[0], u[1]); + in[1] = _mm_shuffle_epi32(in[1], 0x4E); +} + +void iadst4_sse2(__m128i *in) { + const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9); + const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9); + const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9); + const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9); + const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9); + const __m128i kZero = _mm_set1_epi16(0); + const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); + __m128i u[8], v[8], in7; + + transpose_4x4(in); + in7 = _mm_srli_si128(in[1], 8); + in7 = _mm_add_epi16(in7, in[0]); + in7 = _mm_sub_epi16(in7, in[1]); + + u[0] = _mm_unpacklo_epi16(in[0], in[1]); + u[1] = _mm_unpackhi_epi16(in[0], in[1]); + u[2] = _mm_unpacklo_epi16(in7, kZero); + u[3] = _mm_unpackhi_epi16(in[0], kZero); + + v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p04); // s0 + s3 + v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p02); // s2 + s5 + v[2] = _mm_madd_epi16(u[2], k__sinpi_p03_p03); // x2 + v[3] = _mm_madd_epi16(u[0], k__sinpi_p02_m01); // s1 - s4 + v[4] = _mm_madd_epi16(u[1], k__sinpi_p03_m04); // s2 - s6 + v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s2 + + u[0] = _mm_add_epi32(v[0], v[1]); + u[1] = _mm_add_epi32(v[3], v[4]); + u[2] = v[2]; + u[3] = _mm_add_epi32(u[0], u[1]); + u[4] = _mm_slli_epi32(v[5], 2); + u[5] = _mm_add_epi32(u[3], v[5]); + u[6] = _mm_sub_epi32(u[5], u[4]); + + v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); + v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); + v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); + v[3] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); + + u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); + + in[0] = _mm_packs_epi32(u[0], u[1]); + in[1] = _mm_packs_epi32(u[2], u[3]); +} + +#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3, out4, out5, out6, out7) \ + { \ + const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ + const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ + const __m128i tr0_2 = _mm_unpackhi_epi16(in0, in1); \ + const __m128i tr0_3 = _mm_unpackhi_epi16(in2, in3); \ + const __m128i tr0_4 = _mm_unpacklo_epi16(in4, in5); \ + const __m128i tr0_5 = _mm_unpacklo_epi16(in6, in7); \ + const __m128i tr0_6 = _mm_unpackhi_epi16(in4, in5); \ + const __m128i tr0_7 = _mm_unpackhi_epi16(in6, in7); \ + \ + const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ + const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); \ + const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ + const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); \ + const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \ + const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); \ + const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \ + const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); \ + \ + out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \ + out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \ + out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \ + out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ + out4 = _mm_unpacklo_epi64(tr1_1, tr1_5); \ + out5 = _mm_unpackhi_epi64(tr1_1, tr1_5); \ + out6 = _mm_unpacklo_epi64(tr1_3, tr1_7); \ + out7 = _mm_unpackhi_epi64(tr1_3, tr1_7); \ + } + +#define TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, \ + out0, out1, out2, out3) \ + { \ + const __m128i tr0_0 = _mm_unpackhi_epi16(tmp0, tmp1); \ + const __m128i tr0_1 = _mm_unpacklo_epi16(tmp1, tmp0); \ + const __m128i tr0_4 = _mm_unpacklo_epi16(tmp2, tmp3); \ + const __m128i tr0_5 = _mm_unpackhi_epi16(tmp3, tmp2); \ + \ + const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ + const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ + const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \ + const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \ + \ + out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \ + out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \ + out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \ + out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ + } + +#define TRANSPOSE_8X8_10(in0, in1, in2, in3, out0, out1) \ + { \ + const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ + const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ + out0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ + out1 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ + } + +// Define Macro for multiplying elements by constants and adding them together. +#define MULTIPLICATION_AND_ADD(lo_0, hi_0, lo_1, hi_1, \ + cst0, cst1, cst2, cst3, res0, res1, res2, res3) \ + { \ + tmp0 = _mm_madd_epi16(lo_0, cst0); \ + tmp1 = _mm_madd_epi16(hi_0, cst0); \ + tmp2 = _mm_madd_epi16(lo_0, cst1); \ + tmp3 = _mm_madd_epi16(hi_0, cst1); \ + tmp4 = _mm_madd_epi16(lo_1, cst2); \ + tmp5 = _mm_madd_epi16(hi_1, cst2); \ + tmp6 = _mm_madd_epi16(lo_1, cst3); \ + tmp7 = _mm_madd_epi16(hi_1, cst3); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + tmp4 = _mm_add_epi32(tmp4, rounding); \ + tmp5 = _mm_add_epi32(tmp5, rounding); \ + tmp6 = _mm_add_epi32(tmp6, rounding); \ + tmp7 = _mm_add_epi32(tmp7, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); \ + tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); \ + tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); \ + tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); \ + \ + res0 = _mm_packs_epi32(tmp0, tmp1); \ + res1 = _mm_packs_epi32(tmp2, tmp3); \ + res2 = _mm_packs_epi32(tmp4, tmp5); \ + res3 = _mm_packs_epi32(tmp6, tmp7); \ + } + +#define MULTIPLICATION_AND_ADD_2(lo_0, hi_0, cst0, cst1, res0, res1) \ + { \ + tmp0 = _mm_madd_epi16(lo_0, cst0); \ + tmp1 = _mm_madd_epi16(hi_0, cst0); \ + tmp2 = _mm_madd_epi16(lo_0, cst1); \ + tmp3 = _mm_madd_epi16(hi_0, cst1); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + res0 = _mm_packs_epi32(tmp0, tmp1); \ + res1 = _mm_packs_epi32(tmp2, tmp3); \ + } + +#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3, out4, out5, out6, out7) \ + { \ + /* Stage1 */ \ + { \ + const __m128i lo_17 = _mm_unpacklo_epi16(in1, in7); \ + const __m128i hi_17 = _mm_unpackhi_epi16(in1, in7); \ + const __m128i lo_35 = _mm_unpacklo_epi16(in3, in5); \ + const __m128i hi_35 = _mm_unpackhi_epi16(in3, in5); \ + \ + MULTIPLICATION_AND_ADD(lo_17, hi_17, lo_35, hi_35, stg1_0, \ + stg1_1, stg1_2, stg1_3, stp1_4, \ + stp1_7, stp1_5, stp1_6) \ + } \ + \ + /* Stage2 */ \ + { \ + const __m128i lo_04 = _mm_unpacklo_epi16(in0, in4); \ + const __m128i hi_04 = _mm_unpackhi_epi16(in0, in4); \ + const __m128i lo_26 = _mm_unpacklo_epi16(in2, in6); \ + const __m128i hi_26 = _mm_unpackhi_epi16(in2, in6); \ + \ + MULTIPLICATION_AND_ADD(lo_04, hi_04, lo_26, hi_26, stg2_0, \ + stg2_1, stg2_2, stg2_3, stp2_0, \ + stp2_1, stp2_2, stp2_3) \ + \ + stp2_4 = _mm_adds_epi16(stp1_4, stp1_5); \ + stp2_5 = _mm_subs_epi16(stp1_4, stp1_5); \ + stp2_6 = _mm_subs_epi16(stp1_7, stp1_6); \ + stp2_7 = _mm_adds_epi16(stp1_7, stp1_6); \ + } \ + \ + /* Stage3 */ \ + { \ + const __m128i lo_56 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ + const __m128i hi_56 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ + \ + stp1_0 = _mm_adds_epi16(stp2_0, stp2_3); \ + stp1_1 = _mm_adds_epi16(stp2_1, stp2_2); \ + stp1_2 = _mm_subs_epi16(stp2_1, stp2_2); \ + stp1_3 = _mm_subs_epi16(stp2_0, stp2_3); \ + \ + tmp0 = _mm_madd_epi16(lo_56, stg2_1); \ + tmp1 = _mm_madd_epi16(hi_56, stg2_1); \ + tmp2 = _mm_madd_epi16(lo_56, stg2_0); \ + tmp3 = _mm_madd_epi16(hi_56, stg2_0); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ + stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ + } \ + \ + /* Stage4 */ \ + out0 = _mm_adds_epi16(stp1_0, stp2_7); \ + out1 = _mm_adds_epi16(stp1_1, stp1_6); \ + out2 = _mm_adds_epi16(stp1_2, stp1_5); \ + out3 = _mm_adds_epi16(stp1_3, stp2_4); \ + out4 = _mm_subs_epi16(stp1_3, stp2_4); \ + out5 = _mm_subs_epi16(stp1_2, stp1_5); \ + out6 = _mm_subs_epi16(stp1_1, stp1_6); \ + out7 = _mm_subs_epi16(stp1_0, stp2_7); \ + } + +void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { + const __m128i zero = _mm_setzero_si128(); + const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); + const __m128i final_rounding = _mm_set1_epi16(1 << 4); + const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); + const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); + const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); + const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64); + const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64); + const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); + const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64); + + __m128i in0, in1, in2, in3, in4, in5, in6, in7; + __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7; + __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7; + __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int i; + + // Load input data. + in0 = load_input_data(input); + in1 = load_input_data(input + 8 * 1); + in2 = load_input_data(input + 8 * 2); + in3 = load_input_data(input + 8 * 3); + in4 = load_input_data(input + 8 * 4); + in5 = load_input_data(input + 8 * 5); + in6 = load_input_data(input + 8 * 6); + in7 = load_input_data(input + 8 * 7); + + // 2-D + for (i = 0; i < 2; i++) { + // 8x8 Transpose is copied from vpx_fdct8x8_sse2() + TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, + in0, in1, in2, in3, in4, in5, in6, in7); + + // 4-stage 1D idct8x8 + IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, + in0, in1, in2, in3, in4, in5, in6, in7); + } + + // Final rounding and shift + in0 = _mm_adds_epi16(in0, final_rounding); + in1 = _mm_adds_epi16(in1, final_rounding); + in2 = _mm_adds_epi16(in2, final_rounding); + in3 = _mm_adds_epi16(in3, final_rounding); + in4 = _mm_adds_epi16(in4, final_rounding); + in5 = _mm_adds_epi16(in5, final_rounding); + in6 = _mm_adds_epi16(in6, final_rounding); + in7 = _mm_adds_epi16(in7, final_rounding); + + in0 = _mm_srai_epi16(in0, 5); + in1 = _mm_srai_epi16(in1, 5); + in2 = _mm_srai_epi16(in2, 5); + in3 = _mm_srai_epi16(in3, 5); + in4 = _mm_srai_epi16(in4, 5); + in5 = _mm_srai_epi16(in5, 5); + in6 = _mm_srai_epi16(in6, 5); + in7 = _mm_srai_epi16(in7, 5); + + RECON_AND_STORE(dest + 0 * stride, in0); + RECON_AND_STORE(dest + 1 * stride, in1); + RECON_AND_STORE(dest + 2 * stride, in2); + RECON_AND_STORE(dest + 3 * stride, in3); + RECON_AND_STORE(dest + 4 * stride, in4); + RECON_AND_STORE(dest + 5 * stride, in5); + RECON_AND_STORE(dest + 6 * stride, in6); + RECON_AND_STORE(dest + 7 * stride, in7); +} + +void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { + __m128i dc_value; + const __m128i zero = _mm_setzero_si128(); + int a; + + a = (int)dct_const_round_shift(input[0] * cospi_16_64); + a = (int)dct_const_round_shift(a * cospi_16_64); + a = ROUND_POWER_OF_TWO(a, 5); + + dc_value = _mm_set1_epi16(a); + + RECON_AND_STORE(dest + 0 * stride, dc_value); + RECON_AND_STORE(dest + 1 * stride, dc_value); + RECON_AND_STORE(dest + 2 * stride, dc_value); + RECON_AND_STORE(dest + 3 * stride, dc_value); + RECON_AND_STORE(dest + 4 * stride, dc_value); + RECON_AND_STORE(dest + 5 * stride, dc_value); + RECON_AND_STORE(dest + 6 * stride, dc_value); + RECON_AND_STORE(dest + 7 * stride, dc_value); +} + +void idct8_sse2(__m128i *in) { + const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); + const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); + const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); + const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); + const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64); + const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64); + const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); + const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64); + + __m128i in0, in1, in2, in3, in4, in5, in6, in7; + __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7; + __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7; + __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + + // 8x8 Transpose is copied from vpx_fdct8x8_sse2() + TRANSPOSE_8X8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], + in0, in1, in2, in3, in4, in5, in6, in7); + + // 4-stage 1D idct8x8 + IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, + in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]); +} + +void iadst8_sse2(__m128i *in) { + const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); + const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); + const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); + const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); + const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); + const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); + const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); + const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); + const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); + const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); + const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); + const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); + const __m128i k__const_0 = _mm_set1_epi16(0); + const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); + + __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15; + __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; + __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; + __m128i s0, s1, s2, s3, s4, s5, s6, s7; + __m128i in0, in1, in2, in3, in4, in5, in6, in7; + + // transpose + array_transpose_8x8(in, in); + + // properly aligned for butterfly input + in0 = in[7]; + in1 = in[0]; + in2 = in[5]; + in3 = in[2]; + in4 = in[3]; + in5 = in[4]; + in6 = in[1]; + in7 = in[6]; + + // column transformation + // stage 1 + // interleave and multiply/add into 32-bit integer + s0 = _mm_unpacklo_epi16(in0, in1); + s1 = _mm_unpackhi_epi16(in0, in1); + s2 = _mm_unpacklo_epi16(in2, in3); + s3 = _mm_unpackhi_epi16(in2, in3); + s4 = _mm_unpacklo_epi16(in4, in5); + s5 = _mm_unpackhi_epi16(in4, in5); + s6 = _mm_unpacklo_epi16(in6, in7); + s7 = _mm_unpackhi_epi16(in6, in7); + + u0 = _mm_madd_epi16(s0, k__cospi_p02_p30); + u1 = _mm_madd_epi16(s1, k__cospi_p02_p30); + u2 = _mm_madd_epi16(s0, k__cospi_p30_m02); + u3 = _mm_madd_epi16(s1, k__cospi_p30_m02); + u4 = _mm_madd_epi16(s2, k__cospi_p10_p22); + u5 = _mm_madd_epi16(s3, k__cospi_p10_p22); + u6 = _mm_madd_epi16(s2, k__cospi_p22_m10); + u7 = _mm_madd_epi16(s3, k__cospi_p22_m10); + u8 = _mm_madd_epi16(s4, k__cospi_p18_p14); + u9 = _mm_madd_epi16(s5, k__cospi_p18_p14); + u10 = _mm_madd_epi16(s4, k__cospi_p14_m18); + u11 = _mm_madd_epi16(s5, k__cospi_p14_m18); + u12 = _mm_madd_epi16(s6, k__cospi_p26_p06); + u13 = _mm_madd_epi16(s7, k__cospi_p26_p06); + u14 = _mm_madd_epi16(s6, k__cospi_p06_m26); + u15 = _mm_madd_epi16(s7, k__cospi_p06_m26); + + // addition + w0 = _mm_add_epi32(u0, u8); + w1 = _mm_add_epi32(u1, u9); + w2 = _mm_add_epi32(u2, u10); + w3 = _mm_add_epi32(u3, u11); + w4 = _mm_add_epi32(u4, u12); + w5 = _mm_add_epi32(u5, u13); + w6 = _mm_add_epi32(u6, u14); + w7 = _mm_add_epi32(u7, u15); + w8 = _mm_sub_epi32(u0, u8); + w9 = _mm_sub_epi32(u1, u9); + w10 = _mm_sub_epi32(u2, u10); + w11 = _mm_sub_epi32(u3, u11); + w12 = _mm_sub_epi32(u4, u12); + w13 = _mm_sub_epi32(u5, u13); + w14 = _mm_sub_epi32(u6, u14); + w15 = _mm_sub_epi32(u7, u15); + + // shift and rounding + v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); + v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); + v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); + v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); + v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); + v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); + v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); + v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); + v8 = _mm_add_epi32(w8, k__DCT_CONST_ROUNDING); + v9 = _mm_add_epi32(w9, k__DCT_CONST_ROUNDING); + v10 = _mm_add_epi32(w10, k__DCT_CONST_ROUNDING); + v11 = _mm_add_epi32(w11, k__DCT_CONST_ROUNDING); + v12 = _mm_add_epi32(w12, k__DCT_CONST_ROUNDING); + v13 = _mm_add_epi32(w13, k__DCT_CONST_ROUNDING); + v14 = _mm_add_epi32(w14, k__DCT_CONST_ROUNDING); + v15 = _mm_add_epi32(w15, k__DCT_CONST_ROUNDING); + + u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); + u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); + u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); + u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); + u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); + u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); + u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); + u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); + u8 = _mm_srai_epi32(v8, DCT_CONST_BITS); + u9 = _mm_srai_epi32(v9, DCT_CONST_BITS); + u10 = _mm_srai_epi32(v10, DCT_CONST_BITS); + u11 = _mm_srai_epi32(v11, DCT_CONST_BITS); + u12 = _mm_srai_epi32(v12, DCT_CONST_BITS); + u13 = _mm_srai_epi32(v13, DCT_CONST_BITS); + u14 = _mm_srai_epi32(v14, DCT_CONST_BITS); + u15 = _mm_srai_epi32(v15, DCT_CONST_BITS); + + // back to 16-bit and pack 8 integers into __m128i + in[0] = _mm_packs_epi32(u0, u1); + in[1] = _mm_packs_epi32(u2, u3); + in[2] = _mm_packs_epi32(u4, u5); + in[3] = _mm_packs_epi32(u6, u7); + in[4] = _mm_packs_epi32(u8, u9); + in[5] = _mm_packs_epi32(u10, u11); + in[6] = _mm_packs_epi32(u12, u13); + in[7] = _mm_packs_epi32(u14, u15); + + // stage 2 + s0 = _mm_add_epi16(in[0], in[2]); + s1 = _mm_add_epi16(in[1], in[3]); + s2 = _mm_sub_epi16(in[0], in[2]); + s3 = _mm_sub_epi16(in[1], in[3]); + u0 = _mm_unpacklo_epi16(in[4], in[5]); + u1 = _mm_unpackhi_epi16(in[4], in[5]); + u2 = _mm_unpacklo_epi16(in[6], in[7]); + u3 = _mm_unpackhi_epi16(in[6], in[7]); + + v0 = _mm_madd_epi16(u0, k__cospi_p08_p24); + v1 = _mm_madd_epi16(u1, k__cospi_p08_p24); + v2 = _mm_madd_epi16(u0, k__cospi_p24_m08); + v3 = _mm_madd_epi16(u1, k__cospi_p24_m08); + v4 = _mm_madd_epi16(u2, k__cospi_m24_p08); + v5 = _mm_madd_epi16(u3, k__cospi_m24_p08); + v6 = _mm_madd_epi16(u2, k__cospi_p08_p24); + v7 = _mm_madd_epi16(u3, k__cospi_p08_p24); + + w0 = _mm_add_epi32(v0, v4); + w1 = _mm_add_epi32(v1, v5); + w2 = _mm_add_epi32(v2, v6); + w3 = _mm_add_epi32(v3, v7); + w4 = _mm_sub_epi32(v0, v4); + w5 = _mm_sub_epi32(v1, v5); + w6 = _mm_sub_epi32(v2, v6); + w7 = _mm_sub_epi32(v3, v7); + + v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); + v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); + v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); + v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); + v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); + v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); + v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); + v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); + + u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); + u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); + u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); + u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); + u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); + u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); + u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); + u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); + + // back to 16-bit intergers + s4 = _mm_packs_epi32(u0, u1); + s5 = _mm_packs_epi32(u2, u3); + s6 = _mm_packs_epi32(u4, u5); + s7 = _mm_packs_epi32(u6, u7); + + // stage 3 + u0 = _mm_unpacklo_epi16(s2, s3); + u1 = _mm_unpackhi_epi16(s2, s3); + u2 = _mm_unpacklo_epi16(s6, s7); + u3 = _mm_unpackhi_epi16(s6, s7); + + v0 = _mm_madd_epi16(u0, k__cospi_p16_p16); + v1 = _mm_madd_epi16(u1, k__cospi_p16_p16); + v2 = _mm_madd_epi16(u0, k__cospi_p16_m16); + v3 = _mm_madd_epi16(u1, k__cospi_p16_m16); + v4 = _mm_madd_epi16(u2, k__cospi_p16_p16); + v5 = _mm_madd_epi16(u3, k__cospi_p16_p16); + v6 = _mm_madd_epi16(u2, k__cospi_p16_m16); + v7 = _mm_madd_epi16(u3, k__cospi_p16_m16); + + u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING); + u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING); + u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING); + u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING); + u4 = _mm_add_epi32(v4, k__DCT_CONST_ROUNDING); + u5 = _mm_add_epi32(v5, k__DCT_CONST_ROUNDING); + u6 = _mm_add_epi32(v6, k__DCT_CONST_ROUNDING); + u7 = _mm_add_epi32(v7, k__DCT_CONST_ROUNDING); + + v0 = _mm_srai_epi32(u0, DCT_CONST_BITS); + v1 = _mm_srai_epi32(u1, DCT_CONST_BITS); + v2 = _mm_srai_epi32(u2, DCT_CONST_BITS); + v3 = _mm_srai_epi32(u3, DCT_CONST_BITS); + v4 = _mm_srai_epi32(u4, DCT_CONST_BITS); + v5 = _mm_srai_epi32(u5, DCT_CONST_BITS); + v6 = _mm_srai_epi32(u6, DCT_CONST_BITS); + v7 = _mm_srai_epi32(u7, DCT_CONST_BITS); + + s2 = _mm_packs_epi32(v0, v1); + s3 = _mm_packs_epi32(v2, v3); + s6 = _mm_packs_epi32(v4, v5); + s7 = _mm_packs_epi32(v6, v7); + + in[0] = s0; + in[1] = _mm_sub_epi16(k__const_0, s4); + in[2] = s6; + in[3] = _mm_sub_epi16(k__const_0, s2); + in[4] = s3; + in[5] = _mm_sub_epi16(k__const_0, s7); + in[6] = s5; + in[7] = _mm_sub_epi16(k__const_0, s1); +} + +void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { + const __m128i zero = _mm_setzero_si128(); + const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); + const __m128i final_rounding = _mm_set1_epi16(1 << 4); + const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); + const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); + const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); + const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64); + const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64); + const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); + const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64); + const __m128i stg3_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); + + __m128i in0, in1, in2, in3, in4, in5, in6, in7; + __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7; + __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7; + __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + + // Rows. Load 4-row input data. + in0 = load_input_data(input); + in1 = load_input_data(input + 8 * 1); + in2 = load_input_data(input + 8 * 2); + in3 = load_input_data(input + 8 * 3); + + // 8x4 Transpose + TRANSPOSE_8X8_10(in0, in1, in2, in3, in0, in1); + // Stage1 + { + const __m128i lo_17 = _mm_unpackhi_epi16(in0, zero); + const __m128i lo_35 = _mm_unpackhi_epi16(in1, zero); + + tmp0 = _mm_madd_epi16(lo_17, stg1_0); + tmp2 = _mm_madd_epi16(lo_17, stg1_1); + tmp4 = _mm_madd_epi16(lo_35, stg1_2); + tmp6 = _mm_madd_epi16(lo_35, stg1_3); + + tmp0 = _mm_add_epi32(tmp0, rounding); + tmp2 = _mm_add_epi32(tmp2, rounding); + tmp4 = _mm_add_epi32(tmp4, rounding); + tmp6 = _mm_add_epi32(tmp6, rounding); + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); + tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); + tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); + + stp1_4 = _mm_packs_epi32(tmp0, tmp2); + stp1_5 = _mm_packs_epi32(tmp4, tmp6); + } + + // Stage2 + { + const __m128i lo_04 = _mm_unpacklo_epi16(in0, zero); + const __m128i lo_26 = _mm_unpacklo_epi16(in1, zero); + + tmp0 = _mm_madd_epi16(lo_04, stg2_0); + tmp2 = _mm_madd_epi16(lo_04, stg2_1); + tmp4 = _mm_madd_epi16(lo_26, stg2_2); + tmp6 = _mm_madd_epi16(lo_26, stg2_3); + + tmp0 = _mm_add_epi32(tmp0, rounding); + tmp2 = _mm_add_epi32(tmp2, rounding); + tmp4 = _mm_add_epi32(tmp4, rounding); + tmp6 = _mm_add_epi32(tmp6, rounding); + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); + tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); + tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); + + stp2_0 = _mm_packs_epi32(tmp0, tmp2); + stp2_2 = _mm_packs_epi32(tmp6, tmp4); + + tmp0 = _mm_adds_epi16(stp1_4, stp1_5); + tmp1 = _mm_subs_epi16(stp1_4, stp1_5); + + stp2_4 = tmp0; + stp2_5 = _mm_unpacklo_epi64(tmp1, zero); + stp2_6 = _mm_unpackhi_epi64(tmp1, zero); + } + + // Stage3 + { + const __m128i lo_56 = _mm_unpacklo_epi16(stp2_5, stp2_6); + + tmp4 = _mm_adds_epi16(stp2_0, stp2_2); + tmp6 = _mm_subs_epi16(stp2_0, stp2_2); + + stp1_2 = _mm_unpackhi_epi64(tmp6, tmp4); + stp1_3 = _mm_unpacklo_epi64(tmp6, tmp4); + + tmp0 = _mm_madd_epi16(lo_56, stg3_0); + tmp2 = _mm_madd_epi16(lo_56, stg2_0); // stg3_1 = stg2_0 + + tmp0 = _mm_add_epi32(tmp0, rounding); + tmp2 = _mm_add_epi32(tmp2, rounding); + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); + + stp1_5 = _mm_packs_epi32(tmp0, tmp2); + } + + // Stage4 + tmp0 = _mm_adds_epi16(stp1_3, stp2_4); + tmp1 = _mm_adds_epi16(stp1_2, stp1_5); + tmp2 = _mm_subs_epi16(stp1_3, stp2_4); + tmp3 = _mm_subs_epi16(stp1_2, stp1_5); + + TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3) + + IDCT8(in0, in1, in2, in3, zero, zero, zero, zero, + in0, in1, in2, in3, in4, in5, in6, in7); + // Final rounding and shift + in0 = _mm_adds_epi16(in0, final_rounding); + in1 = _mm_adds_epi16(in1, final_rounding); + in2 = _mm_adds_epi16(in2, final_rounding); + in3 = _mm_adds_epi16(in3, final_rounding); + in4 = _mm_adds_epi16(in4, final_rounding); + in5 = _mm_adds_epi16(in5, final_rounding); + in6 = _mm_adds_epi16(in6, final_rounding); + in7 = _mm_adds_epi16(in7, final_rounding); + + in0 = _mm_srai_epi16(in0, 5); + in1 = _mm_srai_epi16(in1, 5); + in2 = _mm_srai_epi16(in2, 5); + in3 = _mm_srai_epi16(in3, 5); + in4 = _mm_srai_epi16(in4, 5); + in5 = _mm_srai_epi16(in5, 5); + in6 = _mm_srai_epi16(in6, 5); + in7 = _mm_srai_epi16(in7, 5); + + RECON_AND_STORE(dest + 0 * stride, in0); + RECON_AND_STORE(dest + 1 * stride, in1); + RECON_AND_STORE(dest + 2 * stride, in2); + RECON_AND_STORE(dest + 3 * stride, in3); + RECON_AND_STORE(dest + 4 * stride, in4); + RECON_AND_STORE(dest + 5 * stride, in5); + RECON_AND_STORE(dest + 6 * stride, in6); + RECON_AND_STORE(dest + 7 * stride, in7); +} + +#define IDCT16 \ + /* Stage2 */ \ + { \ + const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \ + const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], in[15]); \ + const __m128i lo_9_7 = _mm_unpacklo_epi16(in[9], in[7]); \ + const __m128i hi_9_7 = _mm_unpackhi_epi16(in[9], in[7]); \ + const __m128i lo_5_11 = _mm_unpacklo_epi16(in[5], in[11]); \ + const __m128i hi_5_11 = _mm_unpackhi_epi16(in[5], in[11]); \ + const __m128i lo_13_3 = _mm_unpacklo_epi16(in[13], in[3]); \ + const __m128i hi_13_3 = _mm_unpackhi_epi16(in[13], in[3]); \ + \ + MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_9_7, hi_9_7, \ + stg2_0, stg2_1, stg2_2, stg2_3, \ + stp2_8, stp2_15, stp2_9, stp2_14) \ + \ + MULTIPLICATION_AND_ADD(lo_5_11, hi_5_11, lo_13_3, hi_13_3, \ + stg2_4, stg2_5, stg2_6, stg2_7, \ + stp2_10, stp2_13, stp2_11, stp2_12) \ + } \ + \ + /* Stage3 */ \ + { \ + const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], in[14]); \ + const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], in[14]); \ + const __m128i lo_10_6 = _mm_unpacklo_epi16(in[10], in[6]); \ + const __m128i hi_10_6 = _mm_unpackhi_epi16(in[10], in[6]); \ + \ + MULTIPLICATION_AND_ADD(lo_2_14, hi_2_14, lo_10_6, hi_10_6, \ + stg3_0, stg3_1, stg3_2, stg3_3, \ + stp1_4, stp1_7, stp1_5, stp1_6) \ + \ + stp1_8_0 = _mm_add_epi16(stp2_8, stp2_9); \ + stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \ + stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \ + stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \ + \ + stp1_12_0 = _mm_add_epi16(stp2_12, stp2_13); \ + stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \ + stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \ + stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \ + } \ + \ + /* Stage4 */ \ + { \ + const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]); \ + const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], in[8]); \ + const __m128i lo_4_12 = _mm_unpacklo_epi16(in[4], in[12]); \ + const __m128i hi_4_12 = _mm_unpackhi_epi16(in[4], in[12]); \ + \ + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ + const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + \ + MULTIPLICATION_AND_ADD(lo_0_8, hi_0_8, lo_4_12, hi_4_12, \ + stg4_0, stg4_1, stg4_2, stg4_3, \ + stp2_0, stp2_1, stp2_2, stp2_3) \ + \ + stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \ + stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \ + stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \ + \ + MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, \ + stg4_4, stg4_5, stg4_6, stg4_7, \ + stp2_9, stp2_14, stp2_10, stp2_13) \ + } \ + \ + /* Stage5 */ \ + { \ + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ + const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ + \ + stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \ + stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \ + stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \ + stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \ + \ + tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ + tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ + tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ + tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ + stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ + \ + stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \ + stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ + stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \ + \ + stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \ + stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ + stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ + stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \ + } \ + \ + /* Stage6 */ \ + { \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ + const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ + \ + stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \ + stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ + stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ + stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \ + stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \ + stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ + stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \ + \ + MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ + stg6_0, stg4_0, stg6_0, stg4_0, \ + stp2_10, stp2_13, stp2_11, stp2_12) \ + } + +#define IDCT16_10 \ + /* Stage2 */ \ + { \ + const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \ + const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], zero); \ + const __m128i lo_13_3 = _mm_unpacklo_epi16(zero, in[3]); \ + const __m128i hi_13_3 = _mm_unpackhi_epi16(zero, in[3]); \ + \ + MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_13_3, hi_13_3, \ + stg2_0, stg2_1, stg2_6, stg2_7, \ + stp1_8_0, stp1_15, stp1_11, stp1_12_0) \ + } \ + \ + /* Stage3 */ \ + { \ + const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], zero); \ + const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], zero); \ + \ + MULTIPLICATION_AND_ADD_2(lo_2_14, hi_2_14, \ + stg3_0, stg3_1, \ + stp2_4, stp2_7) \ + \ + stp1_9 = stp1_8_0; \ + stp1_10 = stp1_11; \ + \ + stp1_13 = stp1_12_0; \ + stp1_14 = stp1_15; \ + } \ + \ + /* Stage4 */ \ + { \ + const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero); \ + const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], zero); \ + \ + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ + const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + \ + MULTIPLICATION_AND_ADD_2(lo_0_8, hi_0_8, \ + stg4_0, stg4_1, \ + stp1_0, stp1_1) \ + stp2_5 = stp2_4; \ + stp2_6 = stp2_7; \ + \ + MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, \ + stg4_4, stg4_5, stg4_6, stg4_7, \ + stp2_9, stp2_14, stp2_10, stp2_13) \ + } \ + \ + /* Stage5 */ \ + { \ + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ + const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ + \ + stp1_2 = stp1_1; \ + stp1_3 = stp1_0; \ + \ + tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ + tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ + tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ + tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ + stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ + \ + stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \ + stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ + stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \ + \ + stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \ + stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ + stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ + stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \ + } \ + \ + /* Stage6 */ \ + { \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ + const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ + \ + stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \ + stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ + stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ + stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \ + stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \ + stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ + stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \ + \ + MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ + stg6_0, stg4_0, stg6_0, stg4_0, \ + stp2_10, stp2_13, stp2_11, stp2_12) \ + } + +void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { + const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); + const __m128i final_rounding = _mm_set1_epi16(1 << 5); + const __m128i zero = _mm_setzero_si128(); + + const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); + const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); + const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64); + const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64); + const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64); + const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64); + const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); + const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); + + const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); + const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); + const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64); + const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64); + + const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); + const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); + const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64); + const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); + const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); + const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); + const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64); + + const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); + + __m128i in[16], l[16], r[16], *curr1; + __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, + stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, + stp1_8_0, stp1_12_0; + __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, + stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15; + __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int i; + + curr1 = l; + for (i = 0; i < 2; i++) { + // 1-D idct + + // Load input data. + in[0] = load_input_data(input); + in[8] = load_input_data(input + 8 * 1); + in[1] = load_input_data(input + 8 * 2); + in[9] = load_input_data(input + 8 * 3); + in[2] = load_input_data(input + 8 * 4); + in[10] = load_input_data(input + 8 * 5); + in[3] = load_input_data(input + 8 * 6); + in[11] = load_input_data(input + 8 * 7); + in[4] = load_input_data(input + 8 * 8); + in[12] = load_input_data(input + 8 * 9); + in[5] = load_input_data(input + 8 * 10); + in[13] = load_input_data(input + 8 * 11); + in[6] = load_input_data(input + 8 * 12); + in[14] = load_input_data(input + 8 * 13); + in[7] = load_input_data(input + 8 * 14); + in[15] = load_input_data(input + 8 * 15); + + array_transpose_8x8(in, in); + array_transpose_8x8(in + 8, in + 8); + + IDCT16 + + // Stage7 + curr1[0] = _mm_add_epi16(stp2_0, stp1_15); + curr1[1] = _mm_add_epi16(stp2_1, stp1_14); + curr1[2] = _mm_add_epi16(stp2_2, stp2_13); + curr1[3] = _mm_add_epi16(stp2_3, stp2_12); + curr1[4] = _mm_add_epi16(stp2_4, stp2_11); + curr1[5] = _mm_add_epi16(stp2_5, stp2_10); + curr1[6] = _mm_add_epi16(stp2_6, stp1_9); + curr1[7] = _mm_add_epi16(stp2_7, stp1_8); + curr1[8] = _mm_sub_epi16(stp2_7, stp1_8); + curr1[9] = _mm_sub_epi16(stp2_6, stp1_9); + curr1[10] = _mm_sub_epi16(stp2_5, stp2_10); + curr1[11] = _mm_sub_epi16(stp2_4, stp2_11); + curr1[12] = _mm_sub_epi16(stp2_3, stp2_12); + curr1[13] = _mm_sub_epi16(stp2_2, stp2_13); + curr1[14] = _mm_sub_epi16(stp2_1, stp1_14); + curr1[15] = _mm_sub_epi16(stp2_0, stp1_15); + + curr1 = r; + input += 128; + } + for (i = 0; i < 2; i++) { + int j; + // 1-D idct + array_transpose_8x8(l + i * 8, in); + array_transpose_8x8(r + i * 8, in + 8); + + IDCT16 + + // 2-D + in[0] = _mm_add_epi16(stp2_0, stp1_15); + in[1] = _mm_add_epi16(stp2_1, stp1_14); + in[2] = _mm_add_epi16(stp2_2, stp2_13); + in[3] = _mm_add_epi16(stp2_3, stp2_12); + in[4] = _mm_add_epi16(stp2_4, stp2_11); + in[5] = _mm_add_epi16(stp2_5, stp2_10); + in[6] = _mm_add_epi16(stp2_6, stp1_9); + in[7] = _mm_add_epi16(stp2_7, stp1_8); + in[8] = _mm_sub_epi16(stp2_7, stp1_8); + in[9] = _mm_sub_epi16(stp2_6, stp1_9); + in[10] = _mm_sub_epi16(stp2_5, stp2_10); + in[11] = _mm_sub_epi16(stp2_4, stp2_11); + in[12] = _mm_sub_epi16(stp2_3, stp2_12); + in[13] = _mm_sub_epi16(stp2_2, stp2_13); + in[14] = _mm_sub_epi16(stp2_1, stp1_14); + in[15] = _mm_sub_epi16(stp2_0, stp1_15); + + for (j = 0; j < 16; ++j) { + // Final rounding and shift + in[j] = _mm_adds_epi16(in[j], final_rounding); + in[j] = _mm_srai_epi16(in[j], 6); + RECON_AND_STORE(dest + j * stride, in[j]); + } + + dest += 8; + } +} + +void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { + __m128i dc_value; + const __m128i zero = _mm_setzero_si128(); + int a, i; + + a = (int)dct_const_round_shift(input[0] * cospi_16_64); + a = (int)dct_const_round_shift(a * cospi_16_64); + a = ROUND_POWER_OF_TWO(a, 6); + + dc_value = _mm_set1_epi16(a); + + for (i = 0; i < 16; ++i) { + RECON_AND_STORE(dest + 0, dc_value); + RECON_AND_STORE(dest + 8, dc_value); + dest += stride; + } +} + +static void iadst16_8col(__m128i *in) { + // perform 16x16 1-D ADST for 8 columns + __m128i s[16], x[16], u[32], v[32]; + const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); + const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64); + const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64); + const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64); + const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64); + const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64); + const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64); + const __m128i k__cospi_p19_m13 = pair_set_epi16(cospi_19_64, -cospi_13_64); + const __m128i k__cospi_p17_p15 = pair_set_epi16(cospi_17_64, cospi_15_64); + const __m128i k__cospi_p15_m17 = pair_set_epi16(cospi_15_64, -cospi_17_64); + const __m128i k__cospi_p21_p11 = pair_set_epi16(cospi_21_64, cospi_11_64); + const __m128i k__cospi_p11_m21 = pair_set_epi16(cospi_11_64, -cospi_21_64); + const __m128i k__cospi_p25_p07 = pair_set_epi16(cospi_25_64, cospi_7_64); + const __m128i k__cospi_p07_m25 = pair_set_epi16(cospi_7_64, -cospi_25_64); + const __m128i k__cospi_p29_p03 = pair_set_epi16(cospi_29_64, cospi_3_64); + const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64); + const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); + const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); + const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); + const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); + const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64); + const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64); + const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); + const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); + const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); + const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); + const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); + const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); + const __m128i kZero = _mm_set1_epi16(0); + + u[0] = _mm_unpacklo_epi16(in[15], in[0]); + u[1] = _mm_unpackhi_epi16(in[15], in[0]); + u[2] = _mm_unpacklo_epi16(in[13], in[2]); + u[3] = _mm_unpackhi_epi16(in[13], in[2]); + u[4] = _mm_unpacklo_epi16(in[11], in[4]); + u[5] = _mm_unpackhi_epi16(in[11], in[4]); + u[6] = _mm_unpacklo_epi16(in[9], in[6]); + u[7] = _mm_unpackhi_epi16(in[9], in[6]); + u[8] = _mm_unpacklo_epi16(in[7], in[8]); + u[9] = _mm_unpackhi_epi16(in[7], in[8]); + u[10] = _mm_unpacklo_epi16(in[5], in[10]); + u[11] = _mm_unpackhi_epi16(in[5], in[10]); + u[12] = _mm_unpacklo_epi16(in[3], in[12]); + u[13] = _mm_unpackhi_epi16(in[3], in[12]); + u[14] = _mm_unpacklo_epi16(in[1], in[14]); + u[15] = _mm_unpackhi_epi16(in[1], in[14]); + + v[0] = _mm_madd_epi16(u[0], k__cospi_p01_p31); + v[1] = _mm_madd_epi16(u[1], k__cospi_p01_p31); + v[2] = _mm_madd_epi16(u[0], k__cospi_p31_m01); + v[3] = _mm_madd_epi16(u[1], k__cospi_p31_m01); + v[4] = _mm_madd_epi16(u[2], k__cospi_p05_p27); + v[5] = _mm_madd_epi16(u[3], k__cospi_p05_p27); + v[6] = _mm_madd_epi16(u[2], k__cospi_p27_m05); + v[7] = _mm_madd_epi16(u[3], k__cospi_p27_m05); + v[8] = _mm_madd_epi16(u[4], k__cospi_p09_p23); + v[9] = _mm_madd_epi16(u[5], k__cospi_p09_p23); + v[10] = _mm_madd_epi16(u[4], k__cospi_p23_m09); + v[11] = _mm_madd_epi16(u[5], k__cospi_p23_m09); + v[12] = _mm_madd_epi16(u[6], k__cospi_p13_p19); + v[13] = _mm_madd_epi16(u[7], k__cospi_p13_p19); + v[14] = _mm_madd_epi16(u[6], k__cospi_p19_m13); + v[15] = _mm_madd_epi16(u[7], k__cospi_p19_m13); + v[16] = _mm_madd_epi16(u[8], k__cospi_p17_p15); + v[17] = _mm_madd_epi16(u[9], k__cospi_p17_p15); + v[18] = _mm_madd_epi16(u[8], k__cospi_p15_m17); + v[19] = _mm_madd_epi16(u[9], k__cospi_p15_m17); + v[20] = _mm_madd_epi16(u[10], k__cospi_p21_p11); + v[21] = _mm_madd_epi16(u[11], k__cospi_p21_p11); + v[22] = _mm_madd_epi16(u[10], k__cospi_p11_m21); + v[23] = _mm_madd_epi16(u[11], k__cospi_p11_m21); + v[24] = _mm_madd_epi16(u[12], k__cospi_p25_p07); + v[25] = _mm_madd_epi16(u[13], k__cospi_p25_p07); + v[26] = _mm_madd_epi16(u[12], k__cospi_p07_m25); + v[27] = _mm_madd_epi16(u[13], k__cospi_p07_m25); + v[28] = _mm_madd_epi16(u[14], k__cospi_p29_p03); + v[29] = _mm_madd_epi16(u[15], k__cospi_p29_p03); + v[30] = _mm_madd_epi16(u[14], k__cospi_p03_m29); + v[31] = _mm_madd_epi16(u[15], k__cospi_p03_m29); + + u[0] = _mm_add_epi32(v[0], v[16]); + u[1] = _mm_add_epi32(v[1], v[17]); + u[2] = _mm_add_epi32(v[2], v[18]); + u[3] = _mm_add_epi32(v[3], v[19]); + u[4] = _mm_add_epi32(v[4], v[20]); + u[5] = _mm_add_epi32(v[5], v[21]); + u[6] = _mm_add_epi32(v[6], v[22]); + u[7] = _mm_add_epi32(v[7], v[23]); + u[8] = _mm_add_epi32(v[8], v[24]); + u[9] = _mm_add_epi32(v[9], v[25]); + u[10] = _mm_add_epi32(v[10], v[26]); + u[11] = _mm_add_epi32(v[11], v[27]); + u[12] = _mm_add_epi32(v[12], v[28]); + u[13] = _mm_add_epi32(v[13], v[29]); + u[14] = _mm_add_epi32(v[14], v[30]); + u[15] = _mm_add_epi32(v[15], v[31]); + u[16] = _mm_sub_epi32(v[0], v[16]); + u[17] = _mm_sub_epi32(v[1], v[17]); + u[18] = _mm_sub_epi32(v[2], v[18]); + u[19] = _mm_sub_epi32(v[3], v[19]); + u[20] = _mm_sub_epi32(v[4], v[20]); + u[21] = _mm_sub_epi32(v[5], v[21]); + u[22] = _mm_sub_epi32(v[6], v[22]); + u[23] = _mm_sub_epi32(v[7], v[23]); + u[24] = _mm_sub_epi32(v[8], v[24]); + u[25] = _mm_sub_epi32(v[9], v[25]); + u[26] = _mm_sub_epi32(v[10], v[26]); + u[27] = _mm_sub_epi32(v[11], v[27]); + u[28] = _mm_sub_epi32(v[12], v[28]); + u[29] = _mm_sub_epi32(v[13], v[29]); + u[30] = _mm_sub_epi32(v[14], v[30]); + u[31] = _mm_sub_epi32(v[15], v[31]); + + v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); + v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); + v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); + v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); + v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); + v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); + v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); + v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); + v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); + v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); + v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); + v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); + v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); + v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); + v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); + v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); + v[16] = _mm_add_epi32(u[16], k__DCT_CONST_ROUNDING); + v[17] = _mm_add_epi32(u[17], k__DCT_CONST_ROUNDING); + v[18] = _mm_add_epi32(u[18], k__DCT_CONST_ROUNDING); + v[19] = _mm_add_epi32(u[19], k__DCT_CONST_ROUNDING); + v[20] = _mm_add_epi32(u[20], k__DCT_CONST_ROUNDING); + v[21] = _mm_add_epi32(u[21], k__DCT_CONST_ROUNDING); + v[22] = _mm_add_epi32(u[22], k__DCT_CONST_ROUNDING); + v[23] = _mm_add_epi32(u[23], k__DCT_CONST_ROUNDING); + v[24] = _mm_add_epi32(u[24], k__DCT_CONST_ROUNDING); + v[25] = _mm_add_epi32(u[25], k__DCT_CONST_ROUNDING); + v[26] = _mm_add_epi32(u[26], k__DCT_CONST_ROUNDING); + v[27] = _mm_add_epi32(u[27], k__DCT_CONST_ROUNDING); + v[28] = _mm_add_epi32(u[28], k__DCT_CONST_ROUNDING); + v[29] = _mm_add_epi32(u[29], k__DCT_CONST_ROUNDING); + v[30] = _mm_add_epi32(u[30], k__DCT_CONST_ROUNDING); + v[31] = _mm_add_epi32(u[31], k__DCT_CONST_ROUNDING); + + u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); + u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); + u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); + u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); + u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); + u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); + u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); + u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); + u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); + u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); + u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); + u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); + u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); + u[16] = _mm_srai_epi32(v[16], DCT_CONST_BITS); + u[17] = _mm_srai_epi32(v[17], DCT_CONST_BITS); + u[18] = _mm_srai_epi32(v[18], DCT_CONST_BITS); + u[19] = _mm_srai_epi32(v[19], DCT_CONST_BITS); + u[20] = _mm_srai_epi32(v[20], DCT_CONST_BITS); + u[21] = _mm_srai_epi32(v[21], DCT_CONST_BITS); + u[22] = _mm_srai_epi32(v[22], DCT_CONST_BITS); + u[23] = _mm_srai_epi32(v[23], DCT_CONST_BITS); + u[24] = _mm_srai_epi32(v[24], DCT_CONST_BITS); + u[25] = _mm_srai_epi32(v[25], DCT_CONST_BITS); + u[26] = _mm_srai_epi32(v[26], DCT_CONST_BITS); + u[27] = _mm_srai_epi32(v[27], DCT_CONST_BITS); + u[28] = _mm_srai_epi32(v[28], DCT_CONST_BITS); + u[29] = _mm_srai_epi32(v[29], DCT_CONST_BITS); + u[30] = _mm_srai_epi32(v[30], DCT_CONST_BITS); + u[31] = _mm_srai_epi32(v[31], DCT_CONST_BITS); + + s[0] = _mm_packs_epi32(u[0], u[1]); + s[1] = _mm_packs_epi32(u[2], u[3]); + s[2] = _mm_packs_epi32(u[4], u[5]); + s[3] = _mm_packs_epi32(u[6], u[7]); + s[4] = _mm_packs_epi32(u[8], u[9]); + s[5] = _mm_packs_epi32(u[10], u[11]); + s[6] = _mm_packs_epi32(u[12], u[13]); + s[7] = _mm_packs_epi32(u[14], u[15]); + s[8] = _mm_packs_epi32(u[16], u[17]); + s[9] = _mm_packs_epi32(u[18], u[19]); + s[10] = _mm_packs_epi32(u[20], u[21]); + s[11] = _mm_packs_epi32(u[22], u[23]); + s[12] = _mm_packs_epi32(u[24], u[25]); + s[13] = _mm_packs_epi32(u[26], u[27]); + s[14] = _mm_packs_epi32(u[28], u[29]); + s[15] = _mm_packs_epi32(u[30], u[31]); + + // stage 2 + u[0] = _mm_unpacklo_epi16(s[8], s[9]); + u[1] = _mm_unpackhi_epi16(s[8], s[9]); + u[2] = _mm_unpacklo_epi16(s[10], s[11]); + u[3] = _mm_unpackhi_epi16(s[10], s[11]); + u[4] = _mm_unpacklo_epi16(s[12], s[13]); + u[5] = _mm_unpackhi_epi16(s[12], s[13]); + u[6] = _mm_unpacklo_epi16(s[14], s[15]); + u[7] = _mm_unpackhi_epi16(s[14], s[15]); + + v[0] = _mm_madd_epi16(u[0], k__cospi_p04_p28); + v[1] = _mm_madd_epi16(u[1], k__cospi_p04_p28); + v[2] = _mm_madd_epi16(u[0], k__cospi_p28_m04); + v[3] = _mm_madd_epi16(u[1], k__cospi_p28_m04); + v[4] = _mm_madd_epi16(u[2], k__cospi_p20_p12); + v[5] = _mm_madd_epi16(u[3], k__cospi_p20_p12); + v[6] = _mm_madd_epi16(u[2], k__cospi_p12_m20); + v[7] = _mm_madd_epi16(u[3], k__cospi_p12_m20); + v[8] = _mm_madd_epi16(u[4], k__cospi_m28_p04); + v[9] = _mm_madd_epi16(u[5], k__cospi_m28_p04); + v[10] = _mm_madd_epi16(u[4], k__cospi_p04_p28); + v[11] = _mm_madd_epi16(u[5], k__cospi_p04_p28); + v[12] = _mm_madd_epi16(u[6], k__cospi_m12_p20); + v[13] = _mm_madd_epi16(u[7], k__cospi_m12_p20); + v[14] = _mm_madd_epi16(u[6], k__cospi_p20_p12); + v[15] = _mm_madd_epi16(u[7], k__cospi_p20_p12); + + u[0] = _mm_add_epi32(v[0], v[8]); + u[1] = _mm_add_epi32(v[1], v[9]); + u[2] = _mm_add_epi32(v[2], v[10]); + u[3] = _mm_add_epi32(v[3], v[11]); + u[4] = _mm_add_epi32(v[4], v[12]); + u[5] = _mm_add_epi32(v[5], v[13]); + u[6] = _mm_add_epi32(v[6], v[14]); + u[7] = _mm_add_epi32(v[7], v[15]); + u[8] = _mm_sub_epi32(v[0], v[8]); + u[9] = _mm_sub_epi32(v[1], v[9]); + u[10] = _mm_sub_epi32(v[2], v[10]); + u[11] = _mm_sub_epi32(v[3], v[11]); + u[12] = _mm_sub_epi32(v[4], v[12]); + u[13] = _mm_sub_epi32(v[5], v[13]); + u[14] = _mm_sub_epi32(v[6], v[14]); + u[15] = _mm_sub_epi32(v[7], v[15]); + + v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); + v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); + v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); + v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); + v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); + v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); + v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); + v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); + v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); + v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); + v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); + v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); + v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); + v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); + v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); + v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); + + u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); + u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); + u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); + u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); + u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); + u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); + u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); + u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); + u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); + u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); + u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); + u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); + u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); + + x[0] = _mm_add_epi16(s[0], s[4]); + x[1] = _mm_add_epi16(s[1], s[5]); + x[2] = _mm_add_epi16(s[2], s[6]); + x[3] = _mm_add_epi16(s[3], s[7]); + x[4] = _mm_sub_epi16(s[0], s[4]); + x[5] = _mm_sub_epi16(s[1], s[5]); + x[6] = _mm_sub_epi16(s[2], s[6]); + x[7] = _mm_sub_epi16(s[3], s[7]); + x[8] = _mm_packs_epi32(u[0], u[1]); + x[9] = _mm_packs_epi32(u[2], u[3]); + x[10] = _mm_packs_epi32(u[4], u[5]); + x[11] = _mm_packs_epi32(u[6], u[7]); + x[12] = _mm_packs_epi32(u[8], u[9]); + x[13] = _mm_packs_epi32(u[10], u[11]); + x[14] = _mm_packs_epi32(u[12], u[13]); + x[15] = _mm_packs_epi32(u[14], u[15]); + + // stage 3 + u[0] = _mm_unpacklo_epi16(x[4], x[5]); + u[1] = _mm_unpackhi_epi16(x[4], x[5]); + u[2] = _mm_unpacklo_epi16(x[6], x[7]); + u[3] = _mm_unpackhi_epi16(x[6], x[7]); + u[4] = _mm_unpacklo_epi16(x[12], x[13]); + u[5] = _mm_unpackhi_epi16(x[12], x[13]); + u[6] = _mm_unpacklo_epi16(x[14], x[15]); + u[7] = _mm_unpackhi_epi16(x[14], x[15]); + + v[0] = _mm_madd_epi16(u[0], k__cospi_p08_p24); + v[1] = _mm_madd_epi16(u[1], k__cospi_p08_p24); + v[2] = _mm_madd_epi16(u[0], k__cospi_p24_m08); + v[3] = _mm_madd_epi16(u[1], k__cospi_p24_m08); + v[4] = _mm_madd_epi16(u[2], k__cospi_m24_p08); + v[5] = _mm_madd_epi16(u[3], k__cospi_m24_p08); + v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); + v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); + v[8] = _mm_madd_epi16(u[4], k__cospi_p08_p24); + v[9] = _mm_madd_epi16(u[5], k__cospi_p08_p24); + v[10] = _mm_madd_epi16(u[4], k__cospi_p24_m08); + v[11] = _mm_madd_epi16(u[5], k__cospi_p24_m08); + v[12] = _mm_madd_epi16(u[6], k__cospi_m24_p08); + v[13] = _mm_madd_epi16(u[7], k__cospi_m24_p08); + v[14] = _mm_madd_epi16(u[6], k__cospi_p08_p24); + v[15] = _mm_madd_epi16(u[7], k__cospi_p08_p24); + + u[0] = _mm_add_epi32(v[0], v[4]); + u[1] = _mm_add_epi32(v[1], v[5]); + u[2] = _mm_add_epi32(v[2], v[6]); + u[3] = _mm_add_epi32(v[3], v[7]); + u[4] = _mm_sub_epi32(v[0], v[4]); + u[5] = _mm_sub_epi32(v[1], v[5]); + u[6] = _mm_sub_epi32(v[2], v[6]); + u[7] = _mm_sub_epi32(v[3], v[7]); + u[8] = _mm_add_epi32(v[8], v[12]); + u[9] = _mm_add_epi32(v[9], v[13]); + u[10] = _mm_add_epi32(v[10], v[14]); + u[11] = _mm_add_epi32(v[11], v[15]); + u[12] = _mm_sub_epi32(v[8], v[12]); + u[13] = _mm_sub_epi32(v[9], v[13]); + u[14] = _mm_sub_epi32(v[10], v[14]); + u[15] = _mm_sub_epi32(v[11], v[15]); + + u[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); + u[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); + u[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); + u[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); + u[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); + u[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); + u[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); + u[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); + u[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); + u[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); + u[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); + u[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); + u[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); + u[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); + u[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); + u[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); + + v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); + v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); + v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); + v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); + v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); + v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); + v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); + v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); + v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); + v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); + v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); + v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); + v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); + v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); + v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); + v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); + + s[0] = _mm_add_epi16(x[0], x[2]); + s[1] = _mm_add_epi16(x[1], x[3]); + s[2] = _mm_sub_epi16(x[0], x[2]); + s[3] = _mm_sub_epi16(x[1], x[3]); + s[4] = _mm_packs_epi32(v[0], v[1]); + s[5] = _mm_packs_epi32(v[2], v[3]); + s[6] = _mm_packs_epi32(v[4], v[5]); + s[7] = _mm_packs_epi32(v[6], v[7]); + s[8] = _mm_add_epi16(x[8], x[10]); + s[9] = _mm_add_epi16(x[9], x[11]); + s[10] = _mm_sub_epi16(x[8], x[10]); + s[11] = _mm_sub_epi16(x[9], x[11]); + s[12] = _mm_packs_epi32(v[8], v[9]); + s[13] = _mm_packs_epi32(v[10], v[11]); + s[14] = _mm_packs_epi32(v[12], v[13]); + s[15] = _mm_packs_epi32(v[14], v[15]); + + // stage 4 + u[0] = _mm_unpacklo_epi16(s[2], s[3]); + u[1] = _mm_unpackhi_epi16(s[2], s[3]); + u[2] = _mm_unpacklo_epi16(s[6], s[7]); + u[3] = _mm_unpackhi_epi16(s[6], s[7]); + u[4] = _mm_unpacklo_epi16(s[10], s[11]); + u[5] = _mm_unpackhi_epi16(s[10], s[11]); + u[6] = _mm_unpacklo_epi16(s[14], s[15]); + u[7] = _mm_unpackhi_epi16(s[14], s[15]); + + v[0] = _mm_madd_epi16(u[0], k__cospi_m16_m16); + v[1] = _mm_madd_epi16(u[1], k__cospi_m16_m16); + v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16); + v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16); + v[4] = _mm_madd_epi16(u[2], k__cospi_p16_p16); + v[5] = _mm_madd_epi16(u[3], k__cospi_p16_p16); + v[6] = _mm_madd_epi16(u[2], k__cospi_m16_p16); + v[7] = _mm_madd_epi16(u[3], k__cospi_m16_p16); + v[8] = _mm_madd_epi16(u[4], k__cospi_p16_p16); + v[9] = _mm_madd_epi16(u[5], k__cospi_p16_p16); + v[10] = _mm_madd_epi16(u[4], k__cospi_m16_p16); + v[11] = _mm_madd_epi16(u[5], k__cospi_m16_p16); + v[12] = _mm_madd_epi16(u[6], k__cospi_m16_m16); + v[13] = _mm_madd_epi16(u[7], k__cospi_m16_m16); + v[14] = _mm_madd_epi16(u[6], k__cospi_p16_m16); + v[15] = _mm_madd_epi16(u[7], k__cospi_p16_m16); + + u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); + u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); + u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); + u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); + u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); + u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); + u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); + u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); + u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); + u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); + u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); + u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); + u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); + u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); + u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); + u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); + + v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); + v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); + v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); + v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); + v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); + v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); + v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); + v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); + v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); + v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); + v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); + v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); + v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); + v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); + v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); + v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); + + in[0] = s[0]; + in[1] = _mm_sub_epi16(kZero, s[8]); + in[2] = s[12]; + in[3] = _mm_sub_epi16(kZero, s[4]); + in[4] = _mm_packs_epi32(v[4], v[5]); + in[5] = _mm_packs_epi32(v[12], v[13]); + in[6] = _mm_packs_epi32(v[8], v[9]); + in[7] = _mm_packs_epi32(v[0], v[1]); + in[8] = _mm_packs_epi32(v[2], v[3]); + in[9] = _mm_packs_epi32(v[10], v[11]); + in[10] = _mm_packs_epi32(v[14], v[15]); + in[11] = _mm_packs_epi32(v[6], v[7]); + in[12] = s[5]; + in[13] = _mm_sub_epi16(kZero, s[13]); + in[14] = s[9]; + in[15] = _mm_sub_epi16(kZero, s[1]); +} + +static void idct16_8col(__m128i *in) { + const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); + const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); + const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); + const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); + const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); + const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); + const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); + const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); + const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); + const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); + const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); + const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); + const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); + const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); + const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); + const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); + const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); + const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); + const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); + __m128i v[16], u[16], s[16], t[16]; + + // stage 1 + s[0] = in[0]; + s[1] = in[8]; + s[2] = in[4]; + s[3] = in[12]; + s[4] = in[2]; + s[5] = in[10]; + s[6] = in[6]; + s[7] = in[14]; + s[8] = in[1]; + s[9] = in[9]; + s[10] = in[5]; + s[11] = in[13]; + s[12] = in[3]; + s[13] = in[11]; + s[14] = in[7]; + s[15] = in[15]; + + // stage 2 + u[0] = _mm_unpacklo_epi16(s[8], s[15]); + u[1] = _mm_unpackhi_epi16(s[8], s[15]); + u[2] = _mm_unpacklo_epi16(s[9], s[14]); + u[3] = _mm_unpackhi_epi16(s[9], s[14]); + u[4] = _mm_unpacklo_epi16(s[10], s[13]); + u[5] = _mm_unpackhi_epi16(s[10], s[13]); + u[6] = _mm_unpacklo_epi16(s[11], s[12]); + u[7] = _mm_unpackhi_epi16(s[11], s[12]); + + v[0] = _mm_madd_epi16(u[0], k__cospi_p30_m02); + v[1] = _mm_madd_epi16(u[1], k__cospi_p30_m02); + v[2] = _mm_madd_epi16(u[0], k__cospi_p02_p30); + v[3] = _mm_madd_epi16(u[1], k__cospi_p02_p30); + v[4] = _mm_madd_epi16(u[2], k__cospi_p14_m18); + v[5] = _mm_madd_epi16(u[3], k__cospi_p14_m18); + v[6] = _mm_madd_epi16(u[2], k__cospi_p18_p14); + v[7] = _mm_madd_epi16(u[3], k__cospi_p18_p14); + v[8] = _mm_madd_epi16(u[4], k__cospi_p22_m10); + v[9] = _mm_madd_epi16(u[5], k__cospi_p22_m10); + v[10] = _mm_madd_epi16(u[4], k__cospi_p10_p22); + v[11] = _mm_madd_epi16(u[5], k__cospi_p10_p22); + v[12] = _mm_madd_epi16(u[6], k__cospi_p06_m26); + v[13] = _mm_madd_epi16(u[7], k__cospi_p06_m26); + v[14] = _mm_madd_epi16(u[6], k__cospi_p26_p06); + v[15] = _mm_madd_epi16(u[7], k__cospi_p26_p06); + + u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); + u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); + u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); + u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); + u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); + u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); + u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); + u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); + u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); + u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); + u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); + u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); + u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); + u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); + u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); + u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); + + u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); + u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); + u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); + u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); + u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); + u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); + u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); + u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); + u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); + u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); + u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); + u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); + u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); + + s[8] = _mm_packs_epi32(u[0], u[1]); + s[15] = _mm_packs_epi32(u[2], u[3]); + s[9] = _mm_packs_epi32(u[4], u[5]); + s[14] = _mm_packs_epi32(u[6], u[7]); + s[10] = _mm_packs_epi32(u[8], u[9]); + s[13] = _mm_packs_epi32(u[10], u[11]); + s[11] = _mm_packs_epi32(u[12], u[13]); + s[12] = _mm_packs_epi32(u[14], u[15]); + + // stage 3 + t[0] = s[0]; + t[1] = s[1]; + t[2] = s[2]; + t[3] = s[3]; + u[0] = _mm_unpacklo_epi16(s[4], s[7]); + u[1] = _mm_unpackhi_epi16(s[4], s[7]); + u[2] = _mm_unpacklo_epi16(s[5], s[6]); + u[3] = _mm_unpackhi_epi16(s[5], s[6]); + + v[0] = _mm_madd_epi16(u[0], k__cospi_p28_m04); + v[1] = _mm_madd_epi16(u[1], k__cospi_p28_m04); + v[2] = _mm_madd_epi16(u[0], k__cospi_p04_p28); + v[3] = _mm_madd_epi16(u[1], k__cospi_p04_p28); + v[4] = _mm_madd_epi16(u[2], k__cospi_p12_m20); + v[5] = _mm_madd_epi16(u[3], k__cospi_p12_m20); + v[6] = _mm_madd_epi16(u[2], k__cospi_p20_p12); + v[7] = _mm_madd_epi16(u[3], k__cospi_p20_p12); + + u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); + u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); + u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); + u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); + u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); + u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); + u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); + u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); + + u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); + u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); + u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); + u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); + u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); + + t[4] = _mm_packs_epi32(u[0], u[1]); + t[7] = _mm_packs_epi32(u[2], u[3]); + t[5] = _mm_packs_epi32(u[4], u[5]); + t[6] = _mm_packs_epi32(u[6], u[7]); + t[8] = _mm_add_epi16(s[8], s[9]); + t[9] = _mm_sub_epi16(s[8], s[9]); + t[10] = _mm_sub_epi16(s[11], s[10]); + t[11] = _mm_add_epi16(s[10], s[11]); + t[12] = _mm_add_epi16(s[12], s[13]); + t[13] = _mm_sub_epi16(s[12], s[13]); + t[14] = _mm_sub_epi16(s[15], s[14]); + t[15] = _mm_add_epi16(s[14], s[15]); + + // stage 4 + u[0] = _mm_unpacklo_epi16(t[0], t[1]); + u[1] = _mm_unpackhi_epi16(t[0], t[1]); + u[2] = _mm_unpacklo_epi16(t[2], t[3]); + u[3] = _mm_unpackhi_epi16(t[2], t[3]); + u[4] = _mm_unpacklo_epi16(t[9], t[14]); + u[5] = _mm_unpackhi_epi16(t[9], t[14]); + u[6] = _mm_unpacklo_epi16(t[10], t[13]); + u[7] = _mm_unpackhi_epi16(t[10], t[13]); + + v[0] = _mm_madd_epi16(u[0], k__cospi_p16_p16); + v[1] = _mm_madd_epi16(u[1], k__cospi_p16_p16); + v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16); + v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16); + v[4] = _mm_madd_epi16(u[2], k__cospi_p24_m08); + v[5] = _mm_madd_epi16(u[3], k__cospi_p24_m08); + v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); + v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); + v[8] = _mm_madd_epi16(u[4], k__cospi_m08_p24); + v[9] = _mm_madd_epi16(u[5], k__cospi_m08_p24); + v[10] = _mm_madd_epi16(u[4], k__cospi_p24_p08); + v[11] = _mm_madd_epi16(u[5], k__cospi_p24_p08); + v[12] = _mm_madd_epi16(u[6], k__cospi_m24_m08); + v[13] = _mm_madd_epi16(u[7], k__cospi_m24_m08); + v[14] = _mm_madd_epi16(u[6], k__cospi_m08_p24); + v[15] = _mm_madd_epi16(u[7], k__cospi_m08_p24); + + u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); + u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); + u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); + u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); + u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); + u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); + u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); + u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); + u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); + u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); + u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); + u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); + u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); + u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); + u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); + u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); + + u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); + u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); + u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); + u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); + u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); + u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); + u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); + u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); + u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); + u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); + u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); + u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); + u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); + + s[0] = _mm_packs_epi32(u[0], u[1]); + s[1] = _mm_packs_epi32(u[2], u[3]); + s[2] = _mm_packs_epi32(u[4], u[5]); + s[3] = _mm_packs_epi32(u[6], u[7]); + s[4] = _mm_add_epi16(t[4], t[5]); + s[5] = _mm_sub_epi16(t[4], t[5]); + s[6] = _mm_sub_epi16(t[7], t[6]); + s[7] = _mm_add_epi16(t[6], t[7]); + s[8] = t[8]; + s[15] = t[15]; + s[9] = _mm_packs_epi32(u[8], u[9]); + s[14] = _mm_packs_epi32(u[10], u[11]); + s[10] = _mm_packs_epi32(u[12], u[13]); + s[13] = _mm_packs_epi32(u[14], u[15]); + s[11] = t[11]; + s[12] = t[12]; + + // stage 5 + t[0] = _mm_add_epi16(s[0], s[3]); + t[1] = _mm_add_epi16(s[1], s[2]); + t[2] = _mm_sub_epi16(s[1], s[2]); + t[3] = _mm_sub_epi16(s[0], s[3]); + t[4] = s[4]; + t[7] = s[7]; + + u[0] = _mm_unpacklo_epi16(s[5], s[6]); + u[1] = _mm_unpackhi_epi16(s[5], s[6]); + v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16); + v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16); + v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16); + v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16); + u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); + u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); + u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); + u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); + u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); + t[5] = _mm_packs_epi32(u[0], u[1]); + t[6] = _mm_packs_epi32(u[2], u[3]); + + t[8] = _mm_add_epi16(s[8], s[11]); + t[9] = _mm_add_epi16(s[9], s[10]); + t[10] = _mm_sub_epi16(s[9], s[10]); + t[11] = _mm_sub_epi16(s[8], s[11]); + t[12] = _mm_sub_epi16(s[15], s[12]); + t[13] = _mm_sub_epi16(s[14], s[13]); + t[14] = _mm_add_epi16(s[13], s[14]); + t[15] = _mm_add_epi16(s[12], s[15]); + + // stage 6 + s[0] = _mm_add_epi16(t[0], t[7]); + s[1] = _mm_add_epi16(t[1], t[6]); + s[2] = _mm_add_epi16(t[2], t[5]); + s[3] = _mm_add_epi16(t[3], t[4]); + s[4] = _mm_sub_epi16(t[3], t[4]); + s[5] = _mm_sub_epi16(t[2], t[5]); + s[6] = _mm_sub_epi16(t[1], t[6]); + s[7] = _mm_sub_epi16(t[0], t[7]); + s[8] = t[8]; + s[9] = t[9]; + + u[0] = _mm_unpacklo_epi16(t[10], t[13]); + u[1] = _mm_unpackhi_epi16(t[10], t[13]); + u[2] = _mm_unpacklo_epi16(t[11], t[12]); + u[3] = _mm_unpackhi_epi16(t[11], t[12]); + + v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16); + v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16); + v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16); + v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16); + v[4] = _mm_madd_epi16(u[2], k__cospi_m16_p16); + v[5] = _mm_madd_epi16(u[3], k__cospi_m16_p16); + v[6] = _mm_madd_epi16(u[2], k__cospi_p16_p16); + v[7] = _mm_madd_epi16(u[3], k__cospi_p16_p16); + + u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); + u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); + u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); + u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); + u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); + u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); + u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); + u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); + + u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); + u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); + u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); + u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); + u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); + + s[10] = _mm_packs_epi32(u[0], u[1]); + s[13] = _mm_packs_epi32(u[2], u[3]); + s[11] = _mm_packs_epi32(u[4], u[5]); + s[12] = _mm_packs_epi32(u[6], u[7]); + s[14] = t[14]; + s[15] = t[15]; + + // stage 7 + in[0] = _mm_add_epi16(s[0], s[15]); + in[1] = _mm_add_epi16(s[1], s[14]); + in[2] = _mm_add_epi16(s[2], s[13]); + in[3] = _mm_add_epi16(s[3], s[12]); + in[4] = _mm_add_epi16(s[4], s[11]); + in[5] = _mm_add_epi16(s[5], s[10]); + in[6] = _mm_add_epi16(s[6], s[9]); + in[7] = _mm_add_epi16(s[7], s[8]); + in[8] = _mm_sub_epi16(s[7], s[8]); + in[9] = _mm_sub_epi16(s[6], s[9]); + in[10] = _mm_sub_epi16(s[5], s[10]); + in[11] = _mm_sub_epi16(s[4], s[11]); + in[12] = _mm_sub_epi16(s[3], s[12]); + in[13] = _mm_sub_epi16(s[2], s[13]); + in[14] = _mm_sub_epi16(s[1], s[14]); + in[15] = _mm_sub_epi16(s[0], s[15]); +} + +void idct16_sse2(__m128i *in0, __m128i *in1) { + array_transpose_16x16(in0, in1); + idct16_8col(in0); + idct16_8col(in1); +} + +void iadst16_sse2(__m128i *in0, __m128i *in1) { + array_transpose_16x16(in0, in1); + iadst16_8col(in0); + iadst16_8col(in1); +} + +void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { + const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); + const __m128i final_rounding = _mm_set1_epi16(1 << 5); + const __m128i zero = _mm_setzero_si128(); + + const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); + const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); + const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); + const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); + + const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); + const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); + + const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); + const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); + const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); + const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); + const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64); + + const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); + __m128i in[16], l[16]; + __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, + stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, + stp1_8_0, stp1_12_0; + __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, + stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14; + __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int i; + // First 1-D inverse DCT + // Load input data. + in[0] = load_input_data(input); + in[1] = load_input_data(input + 8 * 2); + in[2] = load_input_data(input + 8 * 4); + in[3] = load_input_data(input + 8 * 6); + + TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1]); + + // Stage2 + { + const __m128i lo_1_15 = _mm_unpackhi_epi16(in[0], zero); + const __m128i lo_13_3 = _mm_unpackhi_epi16(zero, in[1]); + + tmp0 = _mm_madd_epi16(lo_1_15, stg2_0); + tmp2 = _mm_madd_epi16(lo_1_15, stg2_1); + tmp5 = _mm_madd_epi16(lo_13_3, stg2_6); + tmp7 = _mm_madd_epi16(lo_13_3, stg2_7); + + tmp0 = _mm_add_epi32(tmp0, rounding); + tmp2 = _mm_add_epi32(tmp2, rounding); + tmp5 = _mm_add_epi32(tmp5, rounding); + tmp7 = _mm_add_epi32(tmp7, rounding); + + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); + tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); + tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); + + stp2_8 = _mm_packs_epi32(tmp0, tmp2); + stp2_11 = _mm_packs_epi32(tmp5, tmp7); + } + + // Stage3 + { + const __m128i lo_2_14 = _mm_unpacklo_epi16(in[1], zero); + + tmp0 = _mm_madd_epi16(lo_2_14, stg3_0); + tmp2 = _mm_madd_epi16(lo_2_14, stg3_1); + + tmp0 = _mm_add_epi32(tmp0, rounding); + tmp2 = _mm_add_epi32(tmp2, rounding); + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); + + stp1_13 = _mm_unpackhi_epi64(stp2_11, zero); + stp1_14 = _mm_unpackhi_epi64(stp2_8, zero); + + stp1_4 = _mm_packs_epi32(tmp0, tmp2); + } + + // Stage4 + { + const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero); + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp1_14); + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp1_13); + + tmp0 = _mm_madd_epi16(lo_0_8, stg4_0); + tmp2 = _mm_madd_epi16(lo_0_8, stg4_1); + tmp1 = _mm_madd_epi16(lo_9_14, stg4_4); + tmp3 = _mm_madd_epi16(lo_9_14, stg4_5); + tmp5 = _mm_madd_epi16(lo_10_13, stg4_6); + tmp7 = _mm_madd_epi16(lo_10_13, stg4_7); + + tmp0 = _mm_add_epi32(tmp0, rounding); + tmp2 = _mm_add_epi32(tmp2, rounding); + tmp1 = _mm_add_epi32(tmp1, rounding); + tmp3 = _mm_add_epi32(tmp3, rounding); + tmp5 = _mm_add_epi32(tmp5, rounding); + tmp7 = _mm_add_epi32(tmp7, rounding); + + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); + tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); + tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); + + stp1_0 = _mm_packs_epi32(tmp0, tmp0); + stp1_1 = _mm_packs_epi32(tmp2, tmp2); + stp2_9 = _mm_packs_epi32(tmp1, tmp3); + stp2_10 = _mm_packs_epi32(tmp5, tmp7); + + stp2_6 = _mm_unpackhi_epi64(stp1_4, zero); + } + + // Stage5 and Stage6 + { + tmp0 = _mm_add_epi16(stp2_8, stp2_11); + tmp1 = _mm_sub_epi16(stp2_8, stp2_11); + tmp2 = _mm_add_epi16(stp2_9, stp2_10); + tmp3 = _mm_sub_epi16(stp2_9, stp2_10); + + stp1_9 = _mm_unpacklo_epi64(tmp2, zero); + stp1_10 = _mm_unpacklo_epi64(tmp3, zero); + stp1_8 = _mm_unpacklo_epi64(tmp0, zero); + stp1_11 = _mm_unpacklo_epi64(tmp1, zero); + + stp1_13 = _mm_unpackhi_epi64(tmp3, zero); + stp1_14 = _mm_unpackhi_epi64(tmp2, zero); + stp1_12 = _mm_unpackhi_epi64(tmp1, zero); + stp1_15 = _mm_unpackhi_epi64(tmp0, zero); + } + + // Stage6 + { + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp1_4); + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); + const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); + + tmp1 = _mm_madd_epi16(lo_6_5, stg4_1); + tmp3 = _mm_madd_epi16(lo_6_5, stg4_0); + tmp0 = _mm_madd_epi16(lo_10_13, stg6_0); + tmp2 = _mm_madd_epi16(lo_10_13, stg4_0); + tmp4 = _mm_madd_epi16(lo_11_12, stg6_0); + tmp6 = _mm_madd_epi16(lo_11_12, stg4_0); + + tmp1 = _mm_add_epi32(tmp1, rounding); + tmp3 = _mm_add_epi32(tmp3, rounding); + tmp0 = _mm_add_epi32(tmp0, rounding); + tmp2 = _mm_add_epi32(tmp2, rounding); + tmp4 = _mm_add_epi32(tmp4, rounding); + tmp6 = _mm_add_epi32(tmp6, rounding); + + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); + tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); + tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); + + stp1_6 = _mm_packs_epi32(tmp3, tmp1); + + stp2_10 = _mm_packs_epi32(tmp0, zero); + stp2_13 = _mm_packs_epi32(tmp2, zero); + stp2_11 = _mm_packs_epi32(tmp4, zero); + stp2_12 = _mm_packs_epi32(tmp6, zero); + + tmp0 = _mm_add_epi16(stp1_0, stp1_4); + tmp1 = _mm_sub_epi16(stp1_0, stp1_4); + tmp2 = _mm_add_epi16(stp1_1, stp1_6); + tmp3 = _mm_sub_epi16(stp1_1, stp1_6); + + stp2_0 = _mm_unpackhi_epi64(tmp0, zero); + stp2_1 = _mm_unpacklo_epi64(tmp2, zero); + stp2_2 = _mm_unpackhi_epi64(tmp2, zero); + stp2_3 = _mm_unpacklo_epi64(tmp0, zero); + stp2_4 = _mm_unpacklo_epi64(tmp1, zero); + stp2_5 = _mm_unpackhi_epi64(tmp3, zero); + stp2_6 = _mm_unpacklo_epi64(tmp3, zero); + stp2_7 = _mm_unpackhi_epi64(tmp1, zero); + } + + // Stage7. Left 8x16 only. + l[0] = _mm_add_epi16(stp2_0, stp1_15); + l[1] = _mm_add_epi16(stp2_1, stp1_14); + l[2] = _mm_add_epi16(stp2_2, stp2_13); + l[3] = _mm_add_epi16(stp2_3, stp2_12); + l[4] = _mm_add_epi16(stp2_4, stp2_11); + l[5] = _mm_add_epi16(stp2_5, stp2_10); + l[6] = _mm_add_epi16(stp2_6, stp1_9); + l[7] = _mm_add_epi16(stp2_7, stp1_8); + l[8] = _mm_sub_epi16(stp2_7, stp1_8); + l[9] = _mm_sub_epi16(stp2_6, stp1_9); + l[10] = _mm_sub_epi16(stp2_5, stp2_10); + l[11] = _mm_sub_epi16(stp2_4, stp2_11); + l[12] = _mm_sub_epi16(stp2_3, stp2_12); + l[13] = _mm_sub_epi16(stp2_2, stp2_13); + l[14] = _mm_sub_epi16(stp2_1, stp1_14); + l[15] = _mm_sub_epi16(stp2_0, stp1_15); + + // Second 1-D inverse transform, performed per 8x16 block + for (i = 0; i < 2; i++) { + int j; + array_transpose_4X8(l + 8 * i, in); + + IDCT16_10 + + // Stage7 + in[0] = _mm_add_epi16(stp2_0, stp1_15); + in[1] = _mm_add_epi16(stp2_1, stp1_14); + in[2] = _mm_add_epi16(stp2_2, stp2_13); + in[3] = _mm_add_epi16(stp2_3, stp2_12); + in[4] = _mm_add_epi16(stp2_4, stp2_11); + in[5] = _mm_add_epi16(stp2_5, stp2_10); + in[6] = _mm_add_epi16(stp2_6, stp1_9); + in[7] = _mm_add_epi16(stp2_7, stp1_8); + in[8] = _mm_sub_epi16(stp2_7, stp1_8); + in[9] = _mm_sub_epi16(stp2_6, stp1_9); + in[10] = _mm_sub_epi16(stp2_5, stp2_10); + in[11] = _mm_sub_epi16(stp2_4, stp2_11); + in[12] = _mm_sub_epi16(stp2_3, stp2_12); + in[13] = _mm_sub_epi16(stp2_2, stp2_13); + in[14] = _mm_sub_epi16(stp2_1, stp1_14); + in[15] = _mm_sub_epi16(stp2_0, stp1_15); + + for (j = 0; j < 16; ++j) { + // Final rounding and shift + in[j] = _mm_adds_epi16(in[j], final_rounding); + in[j] = _mm_srai_epi16(in[j], 6); + RECON_AND_STORE(dest + j * stride, in[j]); + } + + dest += 8; + } +} + +#define LOAD_DQCOEFF(reg, input) \ + { \ + reg = load_input_data(input); \ + input += 8; \ + } \ + +#define IDCT32_34 \ +/* Stage1 */ \ +{ \ + const __m128i zero = _mm_setzero_si128();\ + const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero); \ + const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero); \ + \ + const __m128i lo_25_7= _mm_unpacklo_epi16(zero, in[7]); \ + const __m128i hi_25_7 = _mm_unpackhi_epi16(zero, in[7]); \ + \ + const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], zero); \ + const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], zero); \ + \ + const __m128i lo_29_3 = _mm_unpacklo_epi16(zero, in[3]); \ + const __m128i hi_29_3 = _mm_unpackhi_epi16(zero, in[3]); \ + \ + MULTIPLICATION_AND_ADD_2(lo_1_31, hi_1_31, stg1_0, \ + stg1_1, stp1_16, stp1_31); \ + MULTIPLICATION_AND_ADD_2(lo_25_7, hi_25_7, stg1_6, \ + stg1_7, stp1_19, stp1_28); \ + MULTIPLICATION_AND_ADD_2(lo_5_27, hi_5_27, stg1_8, \ + stg1_9, stp1_20, stp1_27); \ + MULTIPLICATION_AND_ADD_2(lo_29_3, hi_29_3, stg1_14, \ + stg1_15, stp1_23, stp1_24); \ +} \ +\ +/* Stage2 */ \ +{ \ + const __m128i zero = _mm_setzero_si128();\ + const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero); \ + const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero); \ + \ + const __m128i lo_26_6 = _mm_unpacklo_epi16(zero, in[6]); \ + const __m128i hi_26_6 = _mm_unpackhi_epi16(zero, in[6]); \ + \ + MULTIPLICATION_AND_ADD_2(lo_2_30, hi_2_30, stg2_0, \ + stg2_1, stp2_8, stp2_15); \ + MULTIPLICATION_AND_ADD_2(lo_26_6, hi_26_6, stg2_6, \ + stg2_7, stp2_11, stp2_12); \ + \ + stp2_16 = stp1_16; \ + stp2_19 = stp1_19; \ + \ + stp2_20 = stp1_20; \ + stp2_23 = stp1_23; \ + \ + stp2_24 = stp1_24; \ + stp2_27 = stp1_27; \ + \ + stp2_28 = stp1_28; \ + stp2_31 = stp1_31; \ +} \ +\ +/* Stage3 */ \ +{ \ + const __m128i zero = _mm_setzero_si128();\ + const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero); \ + const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero); \ + \ + const __m128i lo_17_30 = _mm_unpacklo_epi16(stp1_16, stp1_31); \ + const __m128i hi_17_30 = _mm_unpackhi_epi16(stp1_16, stp1_31); \ + const __m128i lo_18_29 = _mm_unpacklo_epi16(stp1_19, stp1_28); \ + const __m128i hi_18_29 = _mm_unpackhi_epi16(stp1_19, stp1_28); \ + \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp1_20, stp1_27); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp1_20, stp1_27); \ + const __m128i lo_22_25 = _mm_unpacklo_epi16(stp1_23, stp1_24); \ + const __m128i hi_22_25 = _mm_unpackhi_epi16(stp1_23, stp2_24); \ + \ + MULTIPLICATION_AND_ADD_2(lo_4_28, hi_4_28, stg3_0, \ + stg3_1, stp1_4, stp1_7); \ + \ + stp1_8 = stp2_8; \ + stp1_11 = stp2_11; \ + stp1_12 = stp2_12; \ + stp1_15 = stp2_15; \ + \ + MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \ + stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, \ + stp1_18, stp1_29) \ + MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \ + stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, \ + stp1_22, stp1_25) \ + \ + stp1_16 = stp2_16; \ + stp1_31 = stp2_31; \ + stp1_19 = stp2_19; \ + stp1_20 = stp2_20; \ + stp1_23 = stp2_23; \ + stp1_24 = stp2_24; \ + stp1_27 = stp2_27; \ + stp1_28 = stp2_28; \ +} \ +\ +/* Stage4 */ \ +{ \ + const __m128i zero = _mm_setzero_si128();\ + const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero); \ + const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero); \ + \ + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp2_15); \ + const __m128i hi_9_14 = _mm_unpackhi_epi16(stp2_8, stp2_15); \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp2_12); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp2_11, stp2_12); \ + \ + MULTIPLICATION_AND_ADD_2(lo_0_16, hi_0_16, stg4_0, \ + stg4_1, stp2_0, stp2_1); \ + \ + stp2_4 = stp1_4; \ + stp2_5 = stp1_4; \ + stp2_6 = stp1_7; \ + stp2_7 = stp1_7; \ + \ + MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ + stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, \ + stp2_10, stp2_13) \ + \ + stp2_8 = stp1_8; \ + stp2_15 = stp1_15; \ + stp2_11 = stp1_11; \ + stp2_12 = stp1_12; \ + \ + stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \ + stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \ + stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \ + stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \ + stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \ + stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \ + stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \ + stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \ + \ + stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \ + stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \ + stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \ + stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \ + stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \ + stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \ + stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \ + stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \ +} \ +\ +/* Stage5 */ \ +{ \ + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ + const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ + const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ + const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ + \ + const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \ + const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \ + const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ + const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ + \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ + \ + stp1_0 = stp2_0; \ + stp1_1 = stp2_1; \ + stp1_2 = stp2_1; \ + stp1_3 = stp2_0; \ + \ + tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ + tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ + tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ + tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ + stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ + \ + stp1_4 = stp2_4; \ + stp1_7 = stp2_7; \ + \ + stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \ + stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ + stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \ + stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \ + stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ + stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ + stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \ + \ + stp1_16 = stp2_16; \ + stp1_17 = stp2_17; \ + \ + MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \ + stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, \ + stp1_19, stp1_28) \ + MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \ + stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, \ + stp1_21, stp1_26) \ + \ + stp1_22 = stp2_22; \ + stp1_23 = stp2_23; \ + stp1_24 = stp2_24; \ + stp1_25 = stp2_25; \ + stp1_30 = stp2_30; \ + stp1_31 = stp2_31; \ +} \ +\ +/* Stage6 */ \ +{ \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ + const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ + \ + stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \ + stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ + stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ + stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \ + stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \ + stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ + stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \ + \ + stp2_8 = stp1_8; \ + stp2_9 = stp1_9; \ + stp2_14 = stp1_14; \ + stp2_15 = stp1_15; \ + \ + MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ + stg6_0, stg4_0, stg6_0, stg4_0, stp2_10, \ + stp2_13, stp2_11, stp2_12) \ + \ + stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \ + stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \ + stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \ + stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \ + stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \ + stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \ + stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \ + stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \ + \ + stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \ + stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \ + stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \ + stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \ + stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \ + stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \ + stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \ + stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \ +} \ +\ +/* Stage7 */ \ +{ \ + const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ + const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ + \ + const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ + const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ + const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \ + const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \ + \ + stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \ + stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \ + stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \ + stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \ + stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \ + stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \ + stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \ + stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \ + stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \ + stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \ + stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \ + stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \ + stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \ + stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \ + stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \ + \ + stp1_16 = stp2_16; \ + stp1_17 = stp2_17; \ + stp1_18 = stp2_18; \ + stp1_19 = stp2_19; \ + \ + MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, \ + stp1_21, stp1_26) \ + MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, \ + stp1_23, stp1_24) \ + \ + stp1_28 = stp2_28; \ + stp1_29 = stp2_29; \ + stp1_30 = stp2_30; \ + stp1_31 = stp2_31; \ +} + + +#define IDCT32 \ +/* Stage1 */ \ +{ \ + const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]); \ + const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], in[31]); \ + const __m128i lo_17_15 = _mm_unpacklo_epi16(in[17], in[15]); \ + const __m128i hi_17_15 = _mm_unpackhi_epi16(in[17], in[15]); \ + \ + const __m128i lo_9_23 = _mm_unpacklo_epi16(in[9], in[23]); \ + const __m128i hi_9_23 = _mm_unpackhi_epi16(in[9], in[23]); \ + const __m128i lo_25_7= _mm_unpacklo_epi16(in[25], in[7]); \ + const __m128i hi_25_7 = _mm_unpackhi_epi16(in[25], in[7]); \ + \ + const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], in[27]); \ + const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], in[27]); \ + const __m128i lo_21_11 = _mm_unpacklo_epi16(in[21], in[11]); \ + const __m128i hi_21_11 = _mm_unpackhi_epi16(in[21], in[11]); \ + \ + const __m128i lo_13_19 = _mm_unpacklo_epi16(in[13], in[19]); \ + const __m128i hi_13_19 = _mm_unpackhi_epi16(in[13], in[19]); \ + const __m128i lo_29_3 = _mm_unpacklo_epi16(in[29], in[3]); \ + const __m128i hi_29_3 = _mm_unpackhi_epi16(in[29], in[3]); \ + \ + MULTIPLICATION_AND_ADD(lo_1_31, hi_1_31, lo_17_15, hi_17_15, stg1_0, \ + stg1_1, stg1_2, stg1_3, stp1_16, stp1_31, \ + stp1_17, stp1_30) \ + MULTIPLICATION_AND_ADD(lo_9_23, hi_9_23, lo_25_7, hi_25_7, stg1_4, \ + stg1_5, stg1_6, stg1_7, stp1_18, stp1_29, \ + stp1_19, stp1_28) \ + MULTIPLICATION_AND_ADD(lo_5_27, hi_5_27, lo_21_11, hi_21_11, stg1_8, \ + stg1_9, stg1_10, stg1_11, stp1_20, stp1_27, \ + stp1_21, stp1_26) \ + MULTIPLICATION_AND_ADD(lo_13_19, hi_13_19, lo_29_3, hi_29_3, stg1_12, \ + stg1_13, stg1_14, stg1_15, stp1_22, stp1_25, \ + stp1_23, stp1_24) \ +} \ +\ +/* Stage2 */ \ +{ \ + const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], in[30]); \ + const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], in[30]); \ + const __m128i lo_18_14 = _mm_unpacklo_epi16(in[18], in[14]); \ + const __m128i hi_18_14 = _mm_unpackhi_epi16(in[18], in[14]); \ + \ + const __m128i lo_10_22 = _mm_unpacklo_epi16(in[10], in[22]); \ + const __m128i hi_10_22 = _mm_unpackhi_epi16(in[10], in[22]); \ + const __m128i lo_26_6 = _mm_unpacklo_epi16(in[26], in[6]); \ + const __m128i hi_26_6 = _mm_unpackhi_epi16(in[26], in[6]); \ + \ + MULTIPLICATION_AND_ADD(lo_2_30, hi_2_30, lo_18_14, hi_18_14, stg2_0, \ + stg2_1, stg2_2, stg2_3, stp2_8, stp2_15, stp2_9, \ + stp2_14) \ + MULTIPLICATION_AND_ADD(lo_10_22, hi_10_22, lo_26_6, hi_26_6, stg2_4, \ + stg2_5, stg2_6, stg2_7, stp2_10, stp2_13, \ + stp2_11, stp2_12) \ + \ + stp2_16 = _mm_add_epi16(stp1_16, stp1_17); \ + stp2_17 = _mm_sub_epi16(stp1_16, stp1_17); \ + stp2_18 = _mm_sub_epi16(stp1_19, stp1_18); \ + stp2_19 = _mm_add_epi16(stp1_19, stp1_18); \ + \ + stp2_20 = _mm_add_epi16(stp1_20, stp1_21); \ + stp2_21 = _mm_sub_epi16(stp1_20, stp1_21); \ + stp2_22 = _mm_sub_epi16(stp1_23, stp1_22); \ + stp2_23 = _mm_add_epi16(stp1_23, stp1_22); \ + \ + stp2_24 = _mm_add_epi16(stp1_24, stp1_25); \ + stp2_25 = _mm_sub_epi16(stp1_24, stp1_25); \ + stp2_26 = _mm_sub_epi16(stp1_27, stp1_26); \ + stp2_27 = _mm_add_epi16(stp1_27, stp1_26); \ + \ + stp2_28 = _mm_add_epi16(stp1_28, stp1_29); \ + stp2_29 = _mm_sub_epi16(stp1_28, stp1_29); \ + stp2_30 = _mm_sub_epi16(stp1_31, stp1_30); \ + stp2_31 = _mm_add_epi16(stp1_31, stp1_30); \ +} \ +\ +/* Stage3 */ \ +{ \ + const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], in[28]); \ + const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], in[28]); \ + const __m128i lo_20_12 = _mm_unpacklo_epi16(in[20], in[12]); \ + const __m128i hi_20_12 = _mm_unpackhi_epi16(in[20], in[12]); \ + \ + const __m128i lo_17_30 = _mm_unpacklo_epi16(stp2_17, stp2_30); \ + const __m128i hi_17_30 = _mm_unpackhi_epi16(stp2_17, stp2_30); \ + const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ + const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ + \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ + const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ + const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ + \ + MULTIPLICATION_AND_ADD(lo_4_28, hi_4_28, lo_20_12, hi_20_12, stg3_0, \ + stg3_1, stg3_2, stg3_3, stp1_4, stp1_7, stp1_5, \ + stp1_6) \ + \ + stp1_8 = _mm_add_epi16(stp2_8, stp2_9); \ + stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \ + stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \ + stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \ + stp1_12 = _mm_add_epi16(stp2_12, stp2_13); \ + stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \ + stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \ + stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \ + \ + MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \ + stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, \ + stp1_18, stp1_29) \ + MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \ + stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, \ + stp1_22, stp1_25) \ + \ + stp1_16 = stp2_16; \ + stp1_31 = stp2_31; \ + stp1_19 = stp2_19; \ + stp1_20 = stp2_20; \ + stp1_23 = stp2_23; \ + stp1_24 = stp2_24; \ + stp1_27 = stp2_27; \ + stp1_28 = stp2_28; \ +} \ +\ +/* Stage4 */ \ +{ \ + const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], in[16]); \ + const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], in[16]); \ + const __m128i lo_8_24 = _mm_unpacklo_epi16(in[8], in[24]); \ + const __m128i hi_8_24 = _mm_unpackhi_epi16(in[8], in[24]); \ + \ + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ + const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + \ + MULTIPLICATION_AND_ADD(lo_0_16, hi_0_16, lo_8_24, hi_8_24, stg4_0, \ + stg4_1, stg4_2, stg4_3, stp2_0, stp2_1, \ + stp2_2, stp2_3) \ + \ + stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \ + stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \ + stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \ + \ + MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ + stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, \ + stp2_10, stp2_13) \ + \ + stp2_8 = stp1_8; \ + stp2_15 = stp1_15; \ + stp2_11 = stp1_11; \ + stp2_12 = stp1_12; \ + \ + stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \ + stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \ + stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \ + stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \ + stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \ + stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \ + stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \ + stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \ + \ + stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \ + stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \ + stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \ + stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \ + stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \ + stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \ + stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \ + stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \ +} \ +\ +/* Stage5 */ \ +{ \ + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ + const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ + const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ + const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ + \ + const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \ + const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \ + const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ + const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ + \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ + \ + stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \ + stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \ + stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \ + stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \ + \ + tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ + tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ + tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ + tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ + stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ + \ + stp1_4 = stp2_4; \ + stp1_7 = stp2_7; \ + \ + stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \ + stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ + stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \ + stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \ + stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ + stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ + stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \ + \ + stp1_16 = stp2_16; \ + stp1_17 = stp2_17; \ + \ + MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \ + stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, \ + stp1_19, stp1_28) \ + MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \ + stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, \ + stp1_21, stp1_26) \ + \ + stp1_22 = stp2_22; \ + stp1_23 = stp2_23; \ + stp1_24 = stp2_24; \ + stp1_25 = stp2_25; \ + stp1_30 = stp2_30; \ + stp1_31 = stp2_31; \ +} \ +\ +/* Stage6 */ \ +{ \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ + const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ + \ + stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \ + stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ + stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ + stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \ + stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \ + stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ + stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \ + \ + stp2_8 = stp1_8; \ + stp2_9 = stp1_9; \ + stp2_14 = stp1_14; \ + stp2_15 = stp1_15; \ + \ + MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ + stg6_0, stg4_0, stg6_0, stg4_0, stp2_10, \ + stp2_13, stp2_11, stp2_12) \ + \ + stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \ + stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \ + stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \ + stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \ + stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \ + stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \ + stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \ + stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \ + \ + stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \ + stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \ + stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \ + stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \ + stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \ + stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \ + stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \ + stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \ +} \ +\ +/* Stage7 */ \ +{ \ + const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ + const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ + \ + const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ + const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ + const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \ + const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \ + \ + stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \ + stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \ + stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \ + stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \ + stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \ + stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \ + stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \ + stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \ + stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \ + stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \ + stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \ + stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \ + stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \ + stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \ + stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \ + \ + stp1_16 = stp2_16; \ + stp1_17 = stp2_17; \ + stp1_18 = stp2_18; \ + stp1_19 = stp2_19; \ + \ + MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, \ + stp1_21, stp1_26) \ + MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, \ + stp1_23, stp1_24) \ + \ + stp1_28 = stp2_28; \ + stp1_29 = stp2_29; \ + stp1_30 = stp2_30; \ + stp1_31 = stp2_31; \ +} + +// Only upper-left 8x8 has non-zero coeff +void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { + const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); + const __m128i final_rounding = _mm_set1_epi16(1<<5); + + // idct constants for each stage + const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64); + const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64); + const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64); + const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64); + const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64); + const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64); + const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64); + const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64); + + const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); + const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); + const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); + const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); + + const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); + const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); + const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64); + const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64); + const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64); + const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64); + const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64); + const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64); + + const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); + const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); + const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); + const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); + + const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); + + __m128i in[32], col[32]; + __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, + stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, + stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, + stp1_23, stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, + stp1_30, stp1_31; + __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, + stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15, + stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, + stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, + stp2_30, stp2_31; + __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int i; + + // Load input data. Only need to load the top left 8x8 block. + in[0] = load_input_data(input); + in[1] = load_input_data(input + 32); + in[2] = load_input_data(input + 64); + in[3] = load_input_data(input + 96); + in[4] = load_input_data(input + 128); + in[5] = load_input_data(input + 160); + in[6] = load_input_data(input + 192); + in[7] = load_input_data(input + 224); + + for (i = 8; i < 32; ++i) { + in[i] = _mm_setzero_si128(); + } + + array_transpose_8x8(in, in); + // TODO(hkuang): Following transposes are unnecessary. But remove them will + // lead to performance drop on some devices. + array_transpose_8x8(in + 8, in + 8); + array_transpose_8x8(in + 16, in + 16); + array_transpose_8x8(in + 24, in + 24); + + IDCT32_34 + + // 1_D: Store 32 intermediate results for each 8x32 block. + col[0] = _mm_add_epi16(stp1_0, stp1_31); + col[1] = _mm_add_epi16(stp1_1, stp1_30); + col[2] = _mm_add_epi16(stp1_2, stp1_29); + col[3] = _mm_add_epi16(stp1_3, stp1_28); + col[4] = _mm_add_epi16(stp1_4, stp1_27); + col[5] = _mm_add_epi16(stp1_5, stp1_26); + col[6] = _mm_add_epi16(stp1_6, stp1_25); + col[7] = _mm_add_epi16(stp1_7, stp1_24); + col[8] = _mm_add_epi16(stp1_8, stp1_23); + col[9] = _mm_add_epi16(stp1_9, stp1_22); + col[10] = _mm_add_epi16(stp1_10, stp1_21); + col[11] = _mm_add_epi16(stp1_11, stp1_20); + col[12] = _mm_add_epi16(stp1_12, stp1_19); + col[13] = _mm_add_epi16(stp1_13, stp1_18); + col[14] = _mm_add_epi16(stp1_14, stp1_17); + col[15] = _mm_add_epi16(stp1_15, stp1_16); + col[16] = _mm_sub_epi16(stp1_15, stp1_16); + col[17] = _mm_sub_epi16(stp1_14, stp1_17); + col[18] = _mm_sub_epi16(stp1_13, stp1_18); + col[19] = _mm_sub_epi16(stp1_12, stp1_19); + col[20] = _mm_sub_epi16(stp1_11, stp1_20); + col[21] = _mm_sub_epi16(stp1_10, stp1_21); + col[22] = _mm_sub_epi16(stp1_9, stp1_22); + col[23] = _mm_sub_epi16(stp1_8, stp1_23); + col[24] = _mm_sub_epi16(stp1_7, stp1_24); + col[25] = _mm_sub_epi16(stp1_6, stp1_25); + col[26] = _mm_sub_epi16(stp1_5, stp1_26); + col[27] = _mm_sub_epi16(stp1_4, stp1_27); + col[28] = _mm_sub_epi16(stp1_3, stp1_28); + col[29] = _mm_sub_epi16(stp1_2, stp1_29); + col[30] = _mm_sub_epi16(stp1_1, stp1_30); + col[31] = _mm_sub_epi16(stp1_0, stp1_31); + for (i = 0; i < 4; i++) { + int j; + const __m128i zero = _mm_setzero_si128(); + // Transpose 32x8 block to 8x32 block + array_transpose_8x8(col + i * 8, in); + IDCT32_34 + + // 2_D: Calculate the results and store them to destination. + in[0] = _mm_add_epi16(stp1_0, stp1_31); + in[1] = _mm_add_epi16(stp1_1, stp1_30); + in[2] = _mm_add_epi16(stp1_2, stp1_29); + in[3] = _mm_add_epi16(stp1_3, stp1_28); + in[4] = _mm_add_epi16(stp1_4, stp1_27); + in[5] = _mm_add_epi16(stp1_5, stp1_26); + in[6] = _mm_add_epi16(stp1_6, stp1_25); + in[7] = _mm_add_epi16(stp1_7, stp1_24); + in[8] = _mm_add_epi16(stp1_8, stp1_23); + in[9] = _mm_add_epi16(stp1_9, stp1_22); + in[10] = _mm_add_epi16(stp1_10, stp1_21); + in[11] = _mm_add_epi16(stp1_11, stp1_20); + in[12] = _mm_add_epi16(stp1_12, stp1_19); + in[13] = _mm_add_epi16(stp1_13, stp1_18); + in[14] = _mm_add_epi16(stp1_14, stp1_17); + in[15] = _mm_add_epi16(stp1_15, stp1_16); + in[16] = _mm_sub_epi16(stp1_15, stp1_16); + in[17] = _mm_sub_epi16(stp1_14, stp1_17); + in[18] = _mm_sub_epi16(stp1_13, stp1_18); + in[19] = _mm_sub_epi16(stp1_12, stp1_19); + in[20] = _mm_sub_epi16(stp1_11, stp1_20); + in[21] = _mm_sub_epi16(stp1_10, stp1_21); + in[22] = _mm_sub_epi16(stp1_9, stp1_22); + in[23] = _mm_sub_epi16(stp1_8, stp1_23); + in[24] = _mm_sub_epi16(stp1_7, stp1_24); + in[25] = _mm_sub_epi16(stp1_6, stp1_25); + in[26] = _mm_sub_epi16(stp1_5, stp1_26); + in[27] = _mm_sub_epi16(stp1_4, stp1_27); + in[28] = _mm_sub_epi16(stp1_3, stp1_28); + in[29] = _mm_sub_epi16(stp1_2, stp1_29); + in[30] = _mm_sub_epi16(stp1_1, stp1_30); + in[31] = _mm_sub_epi16(stp1_0, stp1_31); + + for (j = 0; j < 32; ++j) { + // Final rounding and shift + in[j] = _mm_adds_epi16(in[j], final_rounding); + in[j] = _mm_srai_epi16(in[j], 6); + RECON_AND_STORE(dest + j * stride, in[j]); + } + + dest += 8; + } +} + +void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { + const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); + const __m128i final_rounding = _mm_set1_epi16(1 << 5); + const __m128i zero = _mm_setzero_si128(); + + // idct constants for each stage + const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64); + const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64); + const __m128i stg1_2 = pair_set_epi16(cospi_15_64, -cospi_17_64); + const __m128i stg1_3 = pair_set_epi16(cospi_17_64, cospi_15_64); + const __m128i stg1_4 = pair_set_epi16(cospi_23_64, -cospi_9_64); + const __m128i stg1_5 = pair_set_epi16(cospi_9_64, cospi_23_64); + const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64); + const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64); + const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64); + const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64); + const __m128i stg1_10 = pair_set_epi16(cospi_11_64, -cospi_21_64); + const __m128i stg1_11 = pair_set_epi16(cospi_21_64, cospi_11_64); + const __m128i stg1_12 = pair_set_epi16(cospi_19_64, -cospi_13_64); + const __m128i stg1_13 = pair_set_epi16(cospi_13_64, cospi_19_64); + const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64); + const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64); + + const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); + const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); + const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64); + const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64); + const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64); + const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64); + const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); + const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); + + const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); + const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); + const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64); + const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64); + const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64); + const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64); + const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64); + const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64); + const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64); + const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64); + + const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); + const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); + const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); + const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64); + const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); + const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); + const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); + + const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); + + __m128i in[32], col[128], zero_idx[16]; + __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, + stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, + stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, + stp1_23, stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, + stp1_30, stp1_31; + __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, + stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15, + stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, + stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, + stp2_30, stp2_31; + __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int i, j, i32; + + for (i = 0; i < 4; i++) { + i32 = (i << 5); + // First 1-D idct + // Load input data. + LOAD_DQCOEFF(in[0], input); + LOAD_DQCOEFF(in[8], input); + LOAD_DQCOEFF(in[16], input); + LOAD_DQCOEFF(in[24], input); + LOAD_DQCOEFF(in[1], input); + LOAD_DQCOEFF(in[9], input); + LOAD_DQCOEFF(in[17], input); + LOAD_DQCOEFF(in[25], input); + LOAD_DQCOEFF(in[2], input); + LOAD_DQCOEFF(in[10], input); + LOAD_DQCOEFF(in[18], input); + LOAD_DQCOEFF(in[26], input); + LOAD_DQCOEFF(in[3], input); + LOAD_DQCOEFF(in[11], input); + LOAD_DQCOEFF(in[19], input); + LOAD_DQCOEFF(in[27], input); + + LOAD_DQCOEFF(in[4], input); + LOAD_DQCOEFF(in[12], input); + LOAD_DQCOEFF(in[20], input); + LOAD_DQCOEFF(in[28], input); + LOAD_DQCOEFF(in[5], input); + LOAD_DQCOEFF(in[13], input); + LOAD_DQCOEFF(in[21], input); + LOAD_DQCOEFF(in[29], input); + LOAD_DQCOEFF(in[6], input); + LOAD_DQCOEFF(in[14], input); + LOAD_DQCOEFF(in[22], input); + LOAD_DQCOEFF(in[30], input); + LOAD_DQCOEFF(in[7], input); + LOAD_DQCOEFF(in[15], input); + LOAD_DQCOEFF(in[23], input); + LOAD_DQCOEFF(in[31], input); + + // checking if all entries are zero + zero_idx[0] = _mm_or_si128(in[0], in[1]); + zero_idx[1] = _mm_or_si128(in[2], in[3]); + zero_idx[2] = _mm_or_si128(in[4], in[5]); + zero_idx[3] = _mm_or_si128(in[6], in[7]); + zero_idx[4] = _mm_or_si128(in[8], in[9]); + zero_idx[5] = _mm_or_si128(in[10], in[11]); + zero_idx[6] = _mm_or_si128(in[12], in[13]); + zero_idx[7] = _mm_or_si128(in[14], in[15]); + zero_idx[8] = _mm_or_si128(in[16], in[17]); + zero_idx[9] = _mm_or_si128(in[18], in[19]); + zero_idx[10] = _mm_or_si128(in[20], in[21]); + zero_idx[11] = _mm_or_si128(in[22], in[23]); + zero_idx[12] = _mm_or_si128(in[24], in[25]); + zero_idx[13] = _mm_or_si128(in[26], in[27]); + zero_idx[14] = _mm_or_si128(in[28], in[29]); + zero_idx[15] = _mm_or_si128(in[30], in[31]); + + zero_idx[0] = _mm_or_si128(zero_idx[0], zero_idx[1]); + zero_idx[1] = _mm_or_si128(zero_idx[2], zero_idx[3]); + zero_idx[2] = _mm_or_si128(zero_idx[4], zero_idx[5]); + zero_idx[3] = _mm_or_si128(zero_idx[6], zero_idx[7]); + zero_idx[4] = _mm_or_si128(zero_idx[8], zero_idx[9]); + zero_idx[5] = _mm_or_si128(zero_idx[10], zero_idx[11]); + zero_idx[6] = _mm_or_si128(zero_idx[12], zero_idx[13]); + zero_idx[7] = _mm_or_si128(zero_idx[14], zero_idx[15]); + + zero_idx[8] = _mm_or_si128(zero_idx[0], zero_idx[1]); + zero_idx[9] = _mm_or_si128(zero_idx[2], zero_idx[3]); + zero_idx[10] = _mm_or_si128(zero_idx[4], zero_idx[5]); + zero_idx[11] = _mm_or_si128(zero_idx[6], zero_idx[7]); + zero_idx[12] = _mm_or_si128(zero_idx[8], zero_idx[9]); + zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]); + zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]); + + if (_mm_movemask_epi8(_mm_cmpeq_epi32(zero_idx[14], zero)) == 0xFFFF) { + col[i32 + 0] = _mm_setzero_si128(); + col[i32 + 1] = _mm_setzero_si128(); + col[i32 + 2] = _mm_setzero_si128(); + col[i32 + 3] = _mm_setzero_si128(); + col[i32 + 4] = _mm_setzero_si128(); + col[i32 + 5] = _mm_setzero_si128(); + col[i32 + 6] = _mm_setzero_si128(); + col[i32 + 7] = _mm_setzero_si128(); + col[i32 + 8] = _mm_setzero_si128(); + col[i32 + 9] = _mm_setzero_si128(); + col[i32 + 10] = _mm_setzero_si128(); + col[i32 + 11] = _mm_setzero_si128(); + col[i32 + 12] = _mm_setzero_si128(); + col[i32 + 13] = _mm_setzero_si128(); + col[i32 + 14] = _mm_setzero_si128(); + col[i32 + 15] = _mm_setzero_si128(); + col[i32 + 16] = _mm_setzero_si128(); + col[i32 + 17] = _mm_setzero_si128(); + col[i32 + 18] = _mm_setzero_si128(); + col[i32 + 19] = _mm_setzero_si128(); + col[i32 + 20] = _mm_setzero_si128(); + col[i32 + 21] = _mm_setzero_si128(); + col[i32 + 22] = _mm_setzero_si128(); + col[i32 + 23] = _mm_setzero_si128(); + col[i32 + 24] = _mm_setzero_si128(); + col[i32 + 25] = _mm_setzero_si128(); + col[i32 + 26] = _mm_setzero_si128(); + col[i32 + 27] = _mm_setzero_si128(); + col[i32 + 28] = _mm_setzero_si128(); + col[i32 + 29] = _mm_setzero_si128(); + col[i32 + 30] = _mm_setzero_si128(); + col[i32 + 31] = _mm_setzero_si128(); + continue; + } + + // Transpose 32x8 block to 8x32 block + array_transpose_8x8(in, in); + array_transpose_8x8(in + 8, in + 8); + array_transpose_8x8(in + 16, in + 16); + array_transpose_8x8(in + 24, in + 24); + + IDCT32 + + // 1_D: Store 32 intermediate results for each 8x32 block. + col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31); + col[i32 + 1] = _mm_add_epi16(stp1_1, stp1_30); + col[i32 + 2] = _mm_add_epi16(stp1_2, stp1_29); + col[i32 + 3] = _mm_add_epi16(stp1_3, stp1_28); + col[i32 + 4] = _mm_add_epi16(stp1_4, stp1_27); + col[i32 + 5] = _mm_add_epi16(stp1_5, stp1_26); + col[i32 + 6] = _mm_add_epi16(stp1_6, stp1_25); + col[i32 + 7] = _mm_add_epi16(stp1_7, stp1_24); + col[i32 + 8] = _mm_add_epi16(stp1_8, stp1_23); + col[i32 + 9] = _mm_add_epi16(stp1_9, stp1_22); + col[i32 + 10] = _mm_add_epi16(stp1_10, stp1_21); + col[i32 + 11] = _mm_add_epi16(stp1_11, stp1_20); + col[i32 + 12] = _mm_add_epi16(stp1_12, stp1_19); + col[i32 + 13] = _mm_add_epi16(stp1_13, stp1_18); + col[i32 + 14] = _mm_add_epi16(stp1_14, stp1_17); + col[i32 + 15] = _mm_add_epi16(stp1_15, stp1_16); + col[i32 + 16] = _mm_sub_epi16(stp1_15, stp1_16); + col[i32 + 17] = _mm_sub_epi16(stp1_14, stp1_17); + col[i32 + 18] = _mm_sub_epi16(stp1_13, stp1_18); + col[i32 + 19] = _mm_sub_epi16(stp1_12, stp1_19); + col[i32 + 20] = _mm_sub_epi16(stp1_11, stp1_20); + col[i32 + 21] = _mm_sub_epi16(stp1_10, stp1_21); + col[i32 + 22] = _mm_sub_epi16(stp1_9, stp1_22); + col[i32 + 23] = _mm_sub_epi16(stp1_8, stp1_23); + col[i32 + 24] = _mm_sub_epi16(stp1_7, stp1_24); + col[i32 + 25] = _mm_sub_epi16(stp1_6, stp1_25); + col[i32 + 26] = _mm_sub_epi16(stp1_5, stp1_26); + col[i32 + 27] = _mm_sub_epi16(stp1_4, stp1_27); + col[i32 + 28] = _mm_sub_epi16(stp1_3, stp1_28); + col[i32 + 29] = _mm_sub_epi16(stp1_2, stp1_29); + col[i32 + 30] = _mm_sub_epi16(stp1_1, stp1_30); + col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31); + } + for (i = 0; i < 4; i++) { + // Second 1-D idct + j = i << 3; + + // Transpose 32x8 block to 8x32 block + array_transpose_8x8(col + j, in); + array_transpose_8x8(col + j + 32, in + 8); + array_transpose_8x8(col + j + 64, in + 16); + array_transpose_8x8(col + j + 96, in + 24); + + IDCT32 + + // 2_D: Calculate the results and store them to destination. + in[0] = _mm_add_epi16(stp1_0, stp1_31); + in[1] = _mm_add_epi16(stp1_1, stp1_30); + in[2] = _mm_add_epi16(stp1_2, stp1_29); + in[3] = _mm_add_epi16(stp1_3, stp1_28); + in[4] = _mm_add_epi16(stp1_4, stp1_27); + in[5] = _mm_add_epi16(stp1_5, stp1_26); + in[6] = _mm_add_epi16(stp1_6, stp1_25); + in[7] = _mm_add_epi16(stp1_7, stp1_24); + in[8] = _mm_add_epi16(stp1_8, stp1_23); + in[9] = _mm_add_epi16(stp1_9, stp1_22); + in[10] = _mm_add_epi16(stp1_10, stp1_21); + in[11] = _mm_add_epi16(stp1_11, stp1_20); + in[12] = _mm_add_epi16(stp1_12, stp1_19); + in[13] = _mm_add_epi16(stp1_13, stp1_18); + in[14] = _mm_add_epi16(stp1_14, stp1_17); + in[15] = _mm_add_epi16(stp1_15, stp1_16); + in[16] = _mm_sub_epi16(stp1_15, stp1_16); + in[17] = _mm_sub_epi16(stp1_14, stp1_17); + in[18] = _mm_sub_epi16(stp1_13, stp1_18); + in[19] = _mm_sub_epi16(stp1_12, stp1_19); + in[20] = _mm_sub_epi16(stp1_11, stp1_20); + in[21] = _mm_sub_epi16(stp1_10, stp1_21); + in[22] = _mm_sub_epi16(stp1_9, stp1_22); + in[23] = _mm_sub_epi16(stp1_8, stp1_23); + in[24] = _mm_sub_epi16(stp1_7, stp1_24); + in[25] = _mm_sub_epi16(stp1_6, stp1_25); + in[26] = _mm_sub_epi16(stp1_5, stp1_26); + in[27] = _mm_sub_epi16(stp1_4, stp1_27); + in[28] = _mm_sub_epi16(stp1_3, stp1_28); + in[29] = _mm_sub_epi16(stp1_2, stp1_29); + in[30] = _mm_sub_epi16(stp1_1, stp1_30); + in[31] = _mm_sub_epi16(stp1_0, stp1_31); + + for (j = 0; j < 32; ++j) { + // Final rounding and shift + in[j] = _mm_adds_epi16(in[j], final_rounding); + in[j] = _mm_srai_epi16(in[j], 6); + RECON_AND_STORE(dest + j * stride, in[j]); + } + + dest += 8; + } +} + +void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride) { + __m128i dc_value; + const __m128i zero = _mm_setzero_si128(); + int a, j; + + a = (int)dct_const_round_shift(input[0] * cospi_16_64); + a = (int)dct_const_round_shift(a * cospi_16_64); + a = ROUND_POWER_OF_TWO(a, 6); + + dc_value = _mm_set1_epi16(a); + + for (j = 0; j < 32; ++j) { + RECON_AND_STORE(dest + 0 + j * stride, dc_value); + RECON_AND_STORE(dest + 8 + j * stride, dc_value); + RECON_AND_STORE(dest + 16 + j * stride, dc_value); + RECON_AND_STORE(dest + 24 + j * stride, dc_value); + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE __m128i clamp_high_sse2(__m128i value, int bd) { + __m128i ubounded, retval; + const __m128i zero = _mm_set1_epi16(0); + const __m128i one = _mm_set1_epi16(1); + const __m128i max = _mm_subs_epi16(_mm_slli_epi16(one, bd), one); + ubounded = _mm_cmpgt_epi16(value, max); + retval = _mm_andnot_si128(ubounded, value); + ubounded = _mm_and_si128(ubounded, max); + retval = _mm_or_si128(retval, ubounded); + retval = _mm_and_si128(retval, _mm_cmpgt_epi16(retval, zero)); + return retval; +} + +void vpx_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[4 * 4]; + tran_low_t *outptr = out; + int i, j; + __m128i inptr[4]; + __m128i sign_bits[2]; + __m128i temp_mm, min_input, max_input; + int test; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + int optimised_cols = 0; + const __m128i zero = _mm_set1_epi16(0); + const __m128i eight = _mm_set1_epi16(8); + const __m128i max = _mm_set1_epi16(12043); + const __m128i min = _mm_set1_epi16(-12043); + // Load input into __m128i + inptr[0] = _mm_loadu_si128((const __m128i *)input); + inptr[1] = _mm_loadu_si128((const __m128i *)(input + 4)); + inptr[2] = _mm_loadu_si128((const __m128i *)(input + 8)); + inptr[3] = _mm_loadu_si128((const __m128i *)(input + 12)); + + // Pack to 16 bits + inptr[0] = _mm_packs_epi32(inptr[0], inptr[1]); + inptr[1] = _mm_packs_epi32(inptr[2], inptr[3]); + + max_input = _mm_max_epi16(inptr[0], inptr[1]); + min_input = _mm_min_epi16(inptr[0], inptr[1]); + max_input = _mm_cmpgt_epi16(max_input, max); + min_input = _mm_cmplt_epi16(min_input, min); + temp_mm = _mm_or_si128(max_input, min_input); + test = _mm_movemask_epi8(temp_mm); + + if (!test) { + // Do the row transform + idct4_sse2(inptr); + + // Check the min & max values + max_input = _mm_max_epi16(inptr[0], inptr[1]); + min_input = _mm_min_epi16(inptr[0], inptr[1]); + max_input = _mm_cmpgt_epi16(max_input, max); + min_input = _mm_cmplt_epi16(min_input, min); + temp_mm = _mm_or_si128(max_input, min_input); + test = _mm_movemask_epi8(temp_mm); + + if (test) { + transpose_4x4(inptr); + sign_bits[0] = _mm_cmplt_epi16(inptr[0], zero); + sign_bits[1] = _mm_cmplt_epi16(inptr[1], zero); + inptr[3] = _mm_unpackhi_epi16(inptr[1], sign_bits[1]); + inptr[2] = _mm_unpacklo_epi16(inptr[1], sign_bits[1]); + inptr[1] = _mm_unpackhi_epi16(inptr[0], sign_bits[0]); + inptr[0] = _mm_unpacklo_epi16(inptr[0], sign_bits[0]); + _mm_storeu_si128((__m128i *)outptr, inptr[0]); + _mm_storeu_si128((__m128i *)(outptr + 4), inptr[1]); + _mm_storeu_si128((__m128i *)(outptr + 8), inptr[2]); + _mm_storeu_si128((__m128i *)(outptr + 12), inptr[3]); + } else { + // Set to use the optimised transform for the column + optimised_cols = 1; + } + } else { + // Run the un-optimised row transform + for (i = 0; i < 4; ++i) { + vpx_highbd_idct4_c(input, outptr, bd); + input += 4; + outptr += 4; + } + } + + if (optimised_cols) { + idct4_sse2(inptr); + + // Final round and shift + inptr[0] = _mm_add_epi16(inptr[0], eight); + inptr[1] = _mm_add_epi16(inptr[1], eight); + + inptr[0] = _mm_srai_epi16(inptr[0], 4); + inptr[1] = _mm_srai_epi16(inptr[1], 4); + + // Reconstruction and Store + { + __m128i d0 = _mm_loadl_epi64((const __m128i *)dest); + __m128i d2 = _mm_loadl_epi64((const __m128i *)(dest + stride * 2)); + d0 = _mm_unpacklo_epi64( + d0, _mm_loadl_epi64((const __m128i *)(dest + stride))); + d2 = _mm_unpacklo_epi64( + d2, _mm_loadl_epi64((const __m128i *)(dest + stride * 3))); + d0 = clamp_high_sse2(_mm_adds_epi16(d0, inptr[0]), bd); + d2 = clamp_high_sse2(_mm_adds_epi16(d2, inptr[1]), bd); + // store input0 + _mm_storel_epi64((__m128i *)dest, d0); + // store input1 + d0 = _mm_srli_si128(d0, 8); + _mm_storel_epi64((__m128i *)(dest + stride), d0); + // store input2 + _mm_storel_epi64((__m128i *)(dest + stride * 2), d2); + // store input3 + d2 = _mm_srli_si128(d2, 8); + _mm_storel_epi64((__m128i *)(dest + stride * 3), d2); + } + } else { + // Run the un-optimised column transform + tran_low_t temp_in[4], temp_out[4]; + // Columns + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = out[j * 4 + i]; + vpx_highbd_idct4_c(temp_in, temp_out, bd); + for (j = 0; j < 4; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); + } + } + } +} + +void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[8 * 8]; + tran_low_t *outptr = out; + int i, j, test; + __m128i inptr[8]; + __m128i min_input, max_input, temp1, temp2, sign_bits; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + const __m128i zero = _mm_set1_epi16(0); + const __m128i sixteen = _mm_set1_epi16(16); + const __m128i max = _mm_set1_epi16(6201); + const __m128i min = _mm_set1_epi16(-6201); + int optimised_cols = 0; + + // Load input into __m128i & pack to 16 bits + for (i = 0; i < 8; i++) { + temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4)); + inptr[i] = _mm_packs_epi32(temp1, temp2); + } + + // Find the min & max for the row transform + max_input = _mm_max_epi16(inptr[0], inptr[1]); + min_input = _mm_min_epi16(inptr[0], inptr[1]); + for (i = 2; i < 8; i++) { + max_input = _mm_max_epi16(max_input, inptr[i]); + min_input = _mm_min_epi16(min_input, inptr[i]); + } + max_input = _mm_cmpgt_epi16(max_input, max); + min_input = _mm_cmplt_epi16(min_input, min); + temp1 = _mm_or_si128(max_input, min_input); + test = _mm_movemask_epi8(temp1); + + if (!test) { + // Do the row transform + idct8_sse2(inptr); + + // Find the min & max for the column transform + max_input = _mm_max_epi16(inptr[0], inptr[1]); + min_input = _mm_min_epi16(inptr[0], inptr[1]); + for (i = 2; i < 8; i++) { + max_input = _mm_max_epi16(max_input, inptr[i]); + min_input = _mm_min_epi16(min_input, inptr[i]); + } + max_input = _mm_cmpgt_epi16(max_input, max); + min_input = _mm_cmplt_epi16(min_input, min); + temp1 = _mm_or_si128(max_input, min_input); + test = _mm_movemask_epi8(temp1); + + if (test) { + array_transpose_8x8(inptr, inptr); + for (i = 0; i < 8; i++) { + sign_bits = _mm_cmplt_epi16(inptr[i], zero); + temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits); + temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits); + _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2); + } + } else { + // Set to use the optimised transform for the column + optimised_cols = 1; + } + } else { + // Run the un-optimised row transform + for (i = 0; i < 8; ++i) { + vpx_highbd_idct8_c(input, outptr, bd); + input += 8; + outptr += 8; + } + } + + if (optimised_cols) { + idct8_sse2(inptr); + + // Final round & shift and Reconstruction and Store + { + __m128i d[8]; + for (i = 0; i < 8; i++) { + inptr[i] = _mm_add_epi16(inptr[i], sixteen); + d[i] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); + inptr[i] = _mm_srai_epi16(inptr[i], 5); + d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd); + // Store + _mm_storeu_si128((__m128i *)(dest + stride*i), d[i]); + } + } + } else { + // Run the un-optimised column transform + tran_low_t temp_in[8], temp_out[8]; + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j * 8 + i]; + vpx_highbd_idct8_c(temp_in, temp_out, bd); + for (j = 0; j < 8; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); + } + } + } +} + +void vpx_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[8 * 8] = { 0 }; + tran_low_t *outptr = out; + int i, j, test; + __m128i inptr[8]; + __m128i min_input, max_input, temp1, temp2, sign_bits; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + const __m128i zero = _mm_set1_epi16(0); + const __m128i sixteen = _mm_set1_epi16(16); + const __m128i max = _mm_set1_epi16(6201); + const __m128i min = _mm_set1_epi16(-6201); + int optimised_cols = 0; + + // Load input into __m128i & pack to 16 bits + for (i = 0; i < 8; i++) { + temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4)); + inptr[i] = _mm_packs_epi32(temp1, temp2); + } + + // Find the min & max for the row transform + // only first 4 row has non-zero coefs + max_input = _mm_max_epi16(inptr[0], inptr[1]); + min_input = _mm_min_epi16(inptr[0], inptr[1]); + for (i = 2; i < 4; i++) { + max_input = _mm_max_epi16(max_input, inptr[i]); + min_input = _mm_min_epi16(min_input, inptr[i]); + } + max_input = _mm_cmpgt_epi16(max_input, max); + min_input = _mm_cmplt_epi16(min_input, min); + temp1 = _mm_or_si128(max_input, min_input); + test = _mm_movemask_epi8(temp1); + + if (!test) { + // Do the row transform + idct8_sse2(inptr); + + // Find the min & max for the column transform + // N.B. Only first 4 cols contain non-zero coeffs + max_input = _mm_max_epi16(inptr[0], inptr[1]); + min_input = _mm_min_epi16(inptr[0], inptr[1]); + for (i = 2; i < 8; i++) { + max_input = _mm_max_epi16(max_input, inptr[i]); + min_input = _mm_min_epi16(min_input, inptr[i]); + } + max_input = _mm_cmpgt_epi16(max_input, max); + min_input = _mm_cmplt_epi16(min_input, min); + temp1 = _mm_or_si128(max_input, min_input); + test = _mm_movemask_epi8(temp1); + + if (test) { + // Use fact only first 4 rows contain non-zero coeffs + array_transpose_4X8(inptr, inptr); + for (i = 0; i < 4; i++) { + sign_bits = _mm_cmplt_epi16(inptr[i], zero); + temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits); + temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits); + _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2); + } + } else { + // Set to use the optimised transform for the column + optimised_cols = 1; + } + } else { + // Run the un-optimised row transform + for (i = 0; i < 4; ++i) { + vpx_highbd_idct8_c(input, outptr, bd); + input += 8; + outptr += 8; + } + } + + if (optimised_cols) { + idct8_sse2(inptr); + + // Final round & shift and Reconstruction and Store + { + __m128i d[8]; + for (i = 0; i < 8; i++) { + inptr[i] = _mm_add_epi16(inptr[i], sixteen); + d[i] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); + inptr[i] = _mm_srai_epi16(inptr[i], 5); + d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd); + // Store + _mm_storeu_si128((__m128i *)(dest + stride*i), d[i]); + } + } + } else { + // Run the un-optimised column transform + tran_low_t temp_in[8], temp_out[8]; + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j * 8 + i]; + vpx_highbd_idct8_c(temp_in, temp_out, bd); + for (j = 0; j < 8; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); + } + } + } +} + +void vpx_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[16 * 16]; + tran_low_t *outptr = out; + int i, j, test; + __m128i inptr[32]; + __m128i min_input, max_input, temp1, temp2, sign_bits; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + const __m128i zero = _mm_set1_epi16(0); + const __m128i rounding = _mm_set1_epi16(32); + const __m128i max = _mm_set1_epi16(3155); + const __m128i min = _mm_set1_epi16(-3155); + int optimised_cols = 0; + + // Load input into __m128i & pack to 16 bits + for (i = 0; i < 16; i++) { + temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4)); + inptr[i] = _mm_packs_epi32(temp1, temp2); + temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12)); + inptr[i + 16] = _mm_packs_epi32(temp1, temp2); + } + + // Find the min & max for the row transform + max_input = _mm_max_epi16(inptr[0], inptr[1]); + min_input = _mm_min_epi16(inptr[0], inptr[1]); + for (i = 2; i < 32; i++) { + max_input = _mm_max_epi16(max_input, inptr[i]); + min_input = _mm_min_epi16(min_input, inptr[i]); + } + max_input = _mm_cmpgt_epi16(max_input, max); + min_input = _mm_cmplt_epi16(min_input, min); + temp1 = _mm_or_si128(max_input, min_input); + test = _mm_movemask_epi8(temp1); + + if (!test) { + // Do the row transform + idct16_sse2(inptr, inptr + 16); + + // Find the min & max for the column transform + max_input = _mm_max_epi16(inptr[0], inptr[1]); + min_input = _mm_min_epi16(inptr[0], inptr[1]); + for (i = 2; i < 32; i++) { + max_input = _mm_max_epi16(max_input, inptr[i]); + min_input = _mm_min_epi16(min_input, inptr[i]); + } + max_input = _mm_cmpgt_epi16(max_input, max); + min_input = _mm_cmplt_epi16(min_input, min); + temp1 = _mm_or_si128(max_input, min_input); + test = _mm_movemask_epi8(temp1); + + if (test) { + array_transpose_16x16(inptr, inptr + 16); + for (i = 0; i < 16; i++) { + sign_bits = _mm_cmplt_epi16(inptr[i], zero); + temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits); + temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2); + sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero); + temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits); + temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2); + } + } else { + // Set to use the optimised transform for the column + optimised_cols = 1; + } + } else { + // Run the un-optimised row transform + for (i = 0; i < 16; ++i) { + vpx_highbd_idct16_c(input, outptr, bd); + input += 16; + outptr += 16; + } + } + + if (optimised_cols) { + idct16_sse2(inptr, inptr + 16); + + // Final round & shift and Reconstruction and Store + { + __m128i d[2]; + for (i = 0; i < 16; i++) { + inptr[i ] = _mm_add_epi16(inptr[i ], rounding); + inptr[i+16] = _mm_add_epi16(inptr[i+16], rounding); + d[0] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); + d[1] = _mm_loadu_si128((const __m128i *)(dest + stride*i + 8)); + inptr[i ] = _mm_srai_epi16(inptr[i ], 6); + inptr[i+16] = _mm_srai_epi16(inptr[i+16], 6); + d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i ]), bd); + d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i+16]), bd); + // Store + _mm_storeu_si128((__m128i *)(dest + stride*i), d[0]); + _mm_storeu_si128((__m128i *)(dest + stride*i + 8), d[1]); + } + } + } else { + // Run the un-optimised column transform + tran_low_t temp_in[16], temp_out[16]; + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j * 16 + i]; + vpx_highbd_idct16_c(temp_in, temp_out, bd); + for (j = 0; j < 16; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + } + } + } +} + +void vpx_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + tran_low_t out[16 * 16] = { 0 }; + tran_low_t *outptr = out; + int i, j, test; + __m128i inptr[32]; + __m128i min_input, max_input, temp1, temp2, sign_bits; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + const __m128i zero = _mm_set1_epi16(0); + const __m128i rounding = _mm_set1_epi16(32); + const __m128i max = _mm_set1_epi16(3155); + const __m128i min = _mm_set1_epi16(-3155); + int optimised_cols = 0; + + // Load input into __m128i & pack to 16 bits + for (i = 0; i < 16; i++) { + temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4)); + inptr[i] = _mm_packs_epi32(temp1, temp2); + temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8)); + temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12)); + inptr[i + 16] = _mm_packs_epi32(temp1, temp2); + } + + // Find the min & max for the row transform + // Since all non-zero dct coefficients are in upper-left 4x4 area, + // we only need to consider first 4 rows here. + max_input = _mm_max_epi16(inptr[0], inptr[1]); + min_input = _mm_min_epi16(inptr[0], inptr[1]); + for (i = 2; i < 4; i++) { + max_input = _mm_max_epi16(max_input, inptr[i]); + min_input = _mm_min_epi16(min_input, inptr[i]); + } + max_input = _mm_cmpgt_epi16(max_input, max); + min_input = _mm_cmplt_epi16(min_input, min); + temp1 = _mm_or_si128(max_input, min_input); + test = _mm_movemask_epi8(temp1); + + if (!test) { + // Do the row transform (N.B. This transposes inptr) + idct16_sse2(inptr, inptr + 16); + + // Find the min & max for the column transform + // N.B. Only first 4 cols contain non-zero coeffs + max_input = _mm_max_epi16(inptr[0], inptr[1]); + min_input = _mm_min_epi16(inptr[0], inptr[1]); + for (i = 2; i < 16; i++) { + max_input = _mm_max_epi16(max_input, inptr[i]); + min_input = _mm_min_epi16(min_input, inptr[i]); + } + max_input = _mm_cmpgt_epi16(max_input, max); + min_input = _mm_cmplt_epi16(min_input, min); + temp1 = _mm_or_si128(max_input, min_input); + test = _mm_movemask_epi8(temp1); + + if (test) { + // Use fact only first 4 rows contain non-zero coeffs + array_transpose_8x8(inptr, inptr); + array_transpose_8x8(inptr + 8, inptr + 16); + for (i = 0; i < 4; i++) { + sign_bits = _mm_cmplt_epi16(inptr[i], zero); + temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits); + temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2); + sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero); + temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits); + temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1); + _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2); + } + } else { + // Set to use the optimised transform for the column + optimised_cols = 1; + } + } else { + // Run the un-optimised row transform + for (i = 0; i < 4; ++i) { + vpx_highbd_idct16_c(input, outptr, bd); + input += 16; + outptr += 16; + } + } + + if (optimised_cols) { + idct16_sse2(inptr, inptr + 16); + + // Final round & shift and Reconstruction and Store + { + __m128i d[2]; + for (i = 0; i < 16; i++) { + inptr[i ] = _mm_add_epi16(inptr[i ], rounding); + inptr[i+16] = _mm_add_epi16(inptr[i+16], rounding); + d[0] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); + d[1] = _mm_loadu_si128((const __m128i *)(dest + stride*i + 8)); + inptr[i ] = _mm_srai_epi16(inptr[i ], 6); + inptr[i+16] = _mm_srai_epi16(inptr[i+16], 6); + d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i ]), bd); + d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i+16]), bd); + // Store + _mm_storeu_si128((__m128i *)(dest + stride*i), d[0]); + _mm_storeu_si128((__m128i *)(dest + stride*i + 8), d[1]); + } + } + } else { + // Run the un-optimised column transform + tran_low_t temp_in[16], temp_out[16]; + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j * 16 + i]; + vpx_highbd_idct16_c(temp_in, temp_out, bd); + for (j = 0; j < 16; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + } + } + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h new file mode 100644 index 00000000000..bd520c18e56 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_X86_INV_TXFM_SSE2_H_ +#define VPX_DSP_X86_INV_TXFM_SSE2_H_ + +#include // SSE2 +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/inv_txfm.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" + +// perform 8x8 transpose +static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { + const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); + const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]); + const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]); + const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]); + const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]); + const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]); + const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]); + const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]); + + const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); + const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5); + const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); + const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5); + const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3); + const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); + const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3); + const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); + + res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1); + res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1); + res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3); + res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3); + res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5); + res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5); + res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7); + res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7); +} + +#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \ + { \ + const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ + const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ + \ + in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \ + in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \ + } + +static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) { + const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); + const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]); + const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]); + const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]); + + const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); + const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); + const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); + const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); + + out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4); + out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4); + out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6); + out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6); +} + +static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) { + __m128i tbuf[8]; + array_transpose_8x8(res0, res0); + array_transpose_8x8(res1, tbuf); + array_transpose_8x8(res0 + 8, res1); + array_transpose_8x8(res1 + 8, res1 + 8); + + res0[8] = tbuf[0]; + res0[9] = tbuf[1]; + res0[10] = tbuf[2]; + res0[11] = tbuf[3]; + res0[12] = tbuf[4]; + res0[13] = tbuf[5]; + res0[14] = tbuf[6]; + res0[15] = tbuf[7]; +} + +// Function to allow 8 bit optimisations to be used when profile 0 is used with +// highbitdepth enabled +static INLINE __m128i load_input_data(const tran_low_t *data) { +#if CONFIG_VP9_HIGHBITDEPTH + return octa_set_epi16(data[0], data[1], data[2], data[3], data[4], data[5], + data[6], data[7]); +#else + return _mm_load_si128((const __m128i *)data); +#endif +} + +static INLINE void load_buffer_8x16(const tran_low_t *input, __m128i *in) { + in[0] = load_input_data(input + 0 * 16); + in[1] = load_input_data(input + 1 * 16); + in[2] = load_input_data(input + 2 * 16); + in[3] = load_input_data(input + 3 * 16); + in[4] = load_input_data(input + 4 * 16); + in[5] = load_input_data(input + 5 * 16); + in[6] = load_input_data(input + 6 * 16); + in[7] = load_input_data(input + 7 * 16); + + in[8] = load_input_data(input + 8 * 16); + in[9] = load_input_data(input + 9 * 16); + in[10] = load_input_data(input + 10 * 16); + in[11] = load_input_data(input + 11 * 16); + in[12] = load_input_data(input + 12 * 16); + in[13] = load_input_data(input + 13 * 16); + in[14] = load_input_data(input + 14 * 16); + in[15] = load_input_data(input + 15 * 16); +} + +#define RECON_AND_STORE(dest, in_x) \ + { \ + __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ + d0 = _mm_unpacklo_epi8(d0, zero); \ + d0 = _mm_add_epi16(in_x, d0); \ + d0 = _mm_packus_epi16(d0, d0); \ + _mm_storel_epi64((__m128i *)(dest), d0); \ + } + +static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { + const __m128i final_rounding = _mm_set1_epi16(1<<5); + const __m128i zero = _mm_setzero_si128(); + // Final rounding and shift + in[0] = _mm_adds_epi16(in[0], final_rounding); + in[1] = _mm_adds_epi16(in[1], final_rounding); + in[2] = _mm_adds_epi16(in[2], final_rounding); + in[3] = _mm_adds_epi16(in[3], final_rounding); + in[4] = _mm_adds_epi16(in[4], final_rounding); + in[5] = _mm_adds_epi16(in[5], final_rounding); + in[6] = _mm_adds_epi16(in[6], final_rounding); + in[7] = _mm_adds_epi16(in[7], final_rounding); + in[8] = _mm_adds_epi16(in[8], final_rounding); + in[9] = _mm_adds_epi16(in[9], final_rounding); + in[10] = _mm_adds_epi16(in[10], final_rounding); + in[11] = _mm_adds_epi16(in[11], final_rounding); + in[12] = _mm_adds_epi16(in[12], final_rounding); + in[13] = _mm_adds_epi16(in[13], final_rounding); + in[14] = _mm_adds_epi16(in[14], final_rounding); + in[15] = _mm_adds_epi16(in[15], final_rounding); + + in[0] = _mm_srai_epi16(in[0], 6); + in[1] = _mm_srai_epi16(in[1], 6); + in[2] = _mm_srai_epi16(in[2], 6); + in[3] = _mm_srai_epi16(in[3], 6); + in[4] = _mm_srai_epi16(in[4], 6); + in[5] = _mm_srai_epi16(in[5], 6); + in[6] = _mm_srai_epi16(in[6], 6); + in[7] = _mm_srai_epi16(in[7], 6); + in[8] = _mm_srai_epi16(in[8], 6); + in[9] = _mm_srai_epi16(in[9], 6); + in[10] = _mm_srai_epi16(in[10], 6); + in[11] = _mm_srai_epi16(in[11], 6); + in[12] = _mm_srai_epi16(in[12], 6); + in[13] = _mm_srai_epi16(in[13], 6); + in[14] = _mm_srai_epi16(in[14], 6); + in[15] = _mm_srai_epi16(in[15], 6); + + RECON_AND_STORE(dest + 0 * stride, in[0]); + RECON_AND_STORE(dest + 1 * stride, in[1]); + RECON_AND_STORE(dest + 2 * stride, in[2]); + RECON_AND_STORE(dest + 3 * stride, in[3]); + RECON_AND_STORE(dest + 4 * stride, in[4]); + RECON_AND_STORE(dest + 5 * stride, in[5]); + RECON_AND_STORE(dest + 6 * stride, in[6]); + RECON_AND_STORE(dest + 7 * stride, in[7]); + RECON_AND_STORE(dest + 8 * stride, in[8]); + RECON_AND_STORE(dest + 9 * stride, in[9]); + RECON_AND_STORE(dest + 10 * stride, in[10]); + RECON_AND_STORE(dest + 11 * stride, in[11]); + RECON_AND_STORE(dest + 12 * stride, in[12]); + RECON_AND_STORE(dest + 13 * stride, in[13]); + RECON_AND_STORE(dest + 14 * stride, in[14]); + RECON_AND_STORE(dest + 15 * stride, in[15]); +} + +void idct4_sse2(__m128i *in); +void idct8_sse2(__m128i *in); +void idct16_sse2(__m128i *in0, __m128i *in1); +void iadst4_sse2(__m128i *in); +void iadst8_sse2(__m128i *in); +void iadst16_sse2(__m128i *in0, __m128i *in1); + +#endif // VPX_DSP_X86_INV_TXFM_SSE2_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm new file mode 100644 index 00000000000..20baf820f6b --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm @@ -0,0 +1,1793 @@ +; +; Copyright (c) 2014 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "third_party/x86inc/x86inc.asm" + +; This file provides SSSE3 version of the inverse transformation. Part +; of the functions are originally derived from the ffmpeg project. +; Note that the current version applies to x86 64-bit only. + +SECTION_RODATA + +pw_11585x2: times 8 dw 23170 + +pw_m2404x2: times 8 dw -2404*2 +pw_m4756x2: times 8 dw -4756*2 +pw_m5520x2: times 8 dw -5520*2 +pw_m8423x2: times 8 dw -8423*2 +pw_m9102x2: times 8 dw -9102*2 +pw_m10394x2: times 8 dw -10394*2 +pw_m11003x2: times 8 dw -11003*2 + +pw_16364x2: times 8 dw 16364*2 +pw_16305x2: times 8 dw 16305*2 +pw_16207x2: times 8 dw 16207*2 +pw_16069x2: times 8 dw 16069*2 +pw_15893x2: times 8 dw 15893*2 +pw_15679x2: times 8 dw 15679*2 +pw_15426x2: times 8 dw 15426*2 +pw_15137x2: times 8 dw 15137*2 +pw_14811x2: times 8 dw 14811*2 +pw_14449x2: times 8 dw 14449*2 +pw_14053x2: times 8 dw 14053*2 +pw_13623x2: times 8 dw 13623*2 +pw_13160x2: times 8 dw 13160*2 +pw_12665x2: times 8 dw 12665*2 +pw_12140x2: times 8 dw 12140*2 +pw__9760x2: times 8 dw 9760*2 +pw__7723x2: times 8 dw 7723*2 +pw__7005x2: times 8 dw 7005*2 +pw__6270x2: times 8 dw 6270*2 +pw__3981x2: times 8 dw 3981*2 +pw__3196x2: times 8 dw 3196*2 +pw__1606x2: times 8 dw 1606*2 +pw___804x2: times 8 dw 804*2 + +pd_8192: times 4 dd 8192 +pw_32: times 8 dw 32 +pw_16: times 8 dw 16 + +%macro TRANSFORM_COEFFS 2 +pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2 +pw_m%2_%1: dw -%2, %1, -%2, %1, -%2, %1, -%2, %1 +pw_m%1_m%2: dw -%1, -%2, -%1, -%2, -%1, -%2, -%1, -%2 +%endmacro + +TRANSFORM_COEFFS 6270, 15137 +TRANSFORM_COEFFS 3196, 16069 +TRANSFORM_COEFFS 13623, 9102 + +; constants for 32x32_34 +TRANSFORM_COEFFS 804, 16364 +TRANSFORM_COEFFS 15426, 5520 +TRANSFORM_COEFFS 3981, 15893 +TRANSFORM_COEFFS 16207, 2404 +TRANSFORM_COEFFS 1606, 16305 +TRANSFORM_COEFFS 15679, 4756 +TRANSFORM_COEFFS 11585, 11585 + +; constants for 32x32_1024 +TRANSFORM_COEFFS 12140, 11003 +TRANSFORM_COEFFS 7005, 14811 +TRANSFORM_COEFFS 14053, 8423 +TRANSFORM_COEFFS 9760, 13160 +TRANSFORM_COEFFS 12665, 10394 +TRANSFORM_COEFFS 7723, 14449 + +%macro PAIR_PP_COEFFS 2 +dpw_%1_%2: dw %1, %1, %1, %1, %2, %2, %2, %2 +%endmacro + +%macro PAIR_MP_COEFFS 2 +dpw_m%1_%2: dw -%1, -%1, -%1, -%1, %2, %2, %2, %2 +%endmacro + +%macro PAIR_MM_COEFFS 2 +dpw_m%1_m%2: dw -%1, -%1, -%1, -%1, -%2, -%2, -%2, -%2 +%endmacro + +PAIR_PP_COEFFS 30274, 12540 +PAIR_PP_COEFFS 6392, 32138 +PAIR_MP_COEFFS 18204, 27246 + +PAIR_PP_COEFFS 12540, 12540 +PAIR_PP_COEFFS 30274, 30274 +PAIR_PP_COEFFS 6392, 6392 +PAIR_PP_COEFFS 32138, 32138 +PAIR_MM_COEFFS 18204, 18204 +PAIR_PP_COEFFS 27246, 27246 + +SECTION .text + +%if ARCH_X86_64 +%macro SUM_SUB 3 + psubw m%3, m%1, m%2 + paddw m%1, m%2 + SWAP %2, %3 +%endmacro + +; butterfly operation +%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2 + pmaddwd m%1, m%3, %5 + pmaddwd m%2, m%3, %6 + paddd m%1, %4 + paddd m%2, %4 + psrad m%1, 14 + psrad m%2, 14 +%endmacro + +%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2 + punpckhwd m%6, m%2, m%1 + MUL_ADD_2X %7, %6, %6, %5, [pw_m%4_%3], [pw_%3_%4] + punpcklwd m%2, m%1 + MUL_ADD_2X %1, %2, %2, %5, [pw_m%4_%3], [pw_%3_%4] + packssdw m%1, m%7 + packssdw m%2, m%6 +%endmacro + +%macro BUTTERFLY_4Xmm 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2 + punpckhwd m%6, m%2, m%1 + MUL_ADD_2X %7, %6, %6, %5, [pw_m%4_%3], [pw_m%3_m%4] + punpcklwd m%2, m%1 + MUL_ADD_2X %1, %2, %2, %5, [pw_m%4_%3], [pw_m%3_m%4] + packssdw m%1, m%7 + packssdw m%2, m%6 +%endmacro + +; matrix transpose +%macro INTERLEAVE_2X 4 + punpckh%1 m%4, m%2, m%3 + punpckl%1 m%2, m%3 + SWAP %3, %4 +%endmacro + +%macro TRANSPOSE8X8 9 + INTERLEAVE_2X wd, %1, %2, %9 + INTERLEAVE_2X wd, %3, %4, %9 + INTERLEAVE_2X wd, %5, %6, %9 + INTERLEAVE_2X wd, %7, %8, %9 + + INTERLEAVE_2X dq, %1, %3, %9 + INTERLEAVE_2X dq, %2, %4, %9 + INTERLEAVE_2X dq, %5, %7, %9 + INTERLEAVE_2X dq, %6, %8, %9 + + INTERLEAVE_2X qdq, %1, %5, %9 + INTERLEAVE_2X qdq, %3, %7, %9 + INTERLEAVE_2X qdq, %2, %6, %9 + INTERLEAVE_2X qdq, %4, %8, %9 + + SWAP %2, %5 + SWAP %4, %7 +%endmacro + +%macro IDCT8_1D 0 + SUM_SUB 0, 4, 9 + BUTTERFLY_4X 2, 6, 6270, 15137, m8, 9, 10 + pmulhrsw m0, m12 + pmulhrsw m4, m12 + BUTTERFLY_4X 1, 7, 3196, 16069, m8, 9, 10 + BUTTERFLY_4X 5, 3, 13623, 9102, m8, 9, 10 + + SUM_SUB 1, 5, 9 + SUM_SUB 7, 3, 9 + SUM_SUB 0, 6, 9 + SUM_SUB 4, 2, 9 + SUM_SUB 3, 5, 9 + pmulhrsw m3, m12 + pmulhrsw m5, m12 + + SUM_SUB 0, 7, 9 + SUM_SUB 4, 3, 9 + SUM_SUB 2, 5, 9 + SUM_SUB 6, 1, 9 + + SWAP 3, 6 + SWAP 1, 4 +%endmacro + +; This macro handles 8 pixels per line +%macro ADD_STORE_8P_2X 5; src1, src2, tmp1, tmp2, zero + paddw m%1, m11 + paddw m%2, m11 + psraw m%1, 5 + psraw m%2, 5 + + movh m%3, [outputq] + movh m%4, [outputq + strideq] + punpcklbw m%3, m%5 + punpcklbw m%4, m%5 + paddw m%3, m%1 + paddw m%4, m%2 + packuswb m%3, m%5 + packuswb m%4, m%5 + movh [outputq], m%3 + movh [outputq + strideq], m%4 +%endmacro + +INIT_XMM ssse3 +; full inverse 8x8 2D-DCT transform +cglobal idct8x8_64_add, 3, 5, 13, input, output, stride + mova m8, [pd_8192] + mova m11, [pw_16] + mova m12, [pw_11585x2] + + lea r3, [2 * strideq] +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [inputq + 0] + packssdw m0, [inputq + 16] + mova m1, [inputq + 32] + packssdw m1, [inputq + 48] + mova m2, [inputq + 64] + packssdw m2, [inputq + 80] + mova m3, [inputq + 96] + packssdw m3, [inputq + 112] + mova m4, [inputq + 128] + packssdw m4, [inputq + 144] + mova m5, [inputq + 160] + packssdw m5, [inputq + 176] + mova m6, [inputq + 192] + packssdw m6, [inputq + 208] + mova m7, [inputq + 224] + packssdw m7, [inputq + 240] +%else + mova m0, [inputq + 0] + mova m1, [inputq + 16] + mova m2, [inputq + 32] + mova m3, [inputq + 48] + mova m4, [inputq + 64] + mova m5, [inputq + 80] + mova m6, [inputq + 96] + mova m7, [inputq + 112] +%endif + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + IDCT8_1D + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + IDCT8_1D + + pxor m12, m12 + ADD_STORE_8P_2X 0, 1, 9, 10, 12 + lea outputq, [outputq + r3] + ADD_STORE_8P_2X 2, 3, 9, 10, 12 + lea outputq, [outputq + r3] + ADD_STORE_8P_2X 4, 5, 9, 10, 12 + lea outputq, [outputq + r3] + ADD_STORE_8P_2X 6, 7, 9, 10, 12 + + RET + +; inverse 8x8 2D-DCT transform with only first 10 coeffs non-zero +cglobal idct8x8_12_add, 3, 5, 13, input, output, stride + mova m8, [pd_8192] + mova m11, [pw_16] + mova m12, [pw_11585x2] + + lea r3, [2 * strideq] + +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [inputq + 0] + packssdw m0, [inputq + 16] + mova m1, [inputq + 32] + packssdw m1, [inputq + 48] + mova m2, [inputq + 64] + packssdw m2, [inputq + 80] + mova m3, [inputq + 96] + packssdw m3, [inputq + 112] +%else + mova m0, [inputq + 0] + mova m1, [inputq + 16] + mova m2, [inputq + 32] + mova m3, [inputq + 48] +%endif + + punpcklwd m0, m1 + punpcklwd m2, m3 + punpckhdq m9, m0, m2 + punpckldq m0, m2 + SWAP 2, 9 + + ; m0 -> [0], [0] + ; m1 -> [1], [1] + ; m2 -> [2], [2] + ; m3 -> [3], [3] + punpckhqdq m10, m0, m0 + punpcklqdq m0, m0 + punpckhqdq m9, m2, m2 + punpcklqdq m2, m2 + SWAP 1, 10 + SWAP 3, 9 + + pmulhrsw m0, m12 + pmulhrsw m2, [dpw_30274_12540] + pmulhrsw m1, [dpw_6392_32138] + pmulhrsw m3, [dpw_m18204_27246] + + SUM_SUB 0, 2, 9 + SUM_SUB 1, 3, 9 + + punpcklqdq m9, m3, m3 + punpckhqdq m5, m3, m9 + + SUM_SUB 3, 5, 9 + punpckhqdq m5, m3 + pmulhrsw m5, m12 + + punpckhqdq m9, m1, m5 + punpcklqdq m1, m5 + SWAP 5, 9 + + SUM_SUB 0, 5, 9 + SUM_SUB 2, 1, 9 + + punpckhqdq m3, m0, m0 + punpckhqdq m4, m1, m1 + punpckhqdq m6, m5, m5 + punpckhqdq m7, m2, m2 + + punpcklwd m0, m3 + punpcklwd m7, m2 + punpcklwd m1, m4 + punpcklwd m6, m5 + + punpckhdq m4, m0, m7 + punpckldq m0, m7 + punpckhdq m10, m1, m6 + punpckldq m5, m1, m6 + + punpckhqdq m1, m0, m5 + punpcklqdq m0, m5 + punpckhqdq m3, m4, m10 + punpcklqdq m2, m4, m10 + + + pmulhrsw m0, m12 + pmulhrsw m6, m2, [dpw_30274_30274] + pmulhrsw m4, m2, [dpw_12540_12540] + + pmulhrsw m7, m1, [dpw_32138_32138] + pmulhrsw m1, [dpw_6392_6392] + pmulhrsw m5, m3, [dpw_m18204_m18204] + pmulhrsw m3, [dpw_27246_27246] + + mova m2, m0 + SUM_SUB 0, 6, 9 + SUM_SUB 2, 4, 9 + SUM_SUB 1, 5, 9 + SUM_SUB 7, 3, 9 + + SUM_SUB 3, 5, 9 + pmulhrsw m3, m12 + pmulhrsw m5, m12 + + SUM_SUB 0, 7, 9 + SUM_SUB 2, 3, 9 + SUM_SUB 4, 5, 9 + SUM_SUB 6, 1, 9 + + SWAP 3, 6 + SWAP 1, 2 + SWAP 2, 4 + + + pxor m12, m12 + ADD_STORE_8P_2X 0, 1, 9, 10, 12 + lea outputq, [outputq + r3] + ADD_STORE_8P_2X 2, 3, 9, 10, 12 + lea outputq, [outputq + r3] + ADD_STORE_8P_2X 4, 5, 9, 10, 12 + lea outputq, [outputq + r3] + ADD_STORE_8P_2X 6, 7, 9, 10, 12 + + RET + +%define idx0 16 * 0 +%define idx1 16 * 1 +%define idx2 16 * 2 +%define idx3 16 * 3 +%define idx4 16 * 4 +%define idx5 16 * 5 +%define idx6 16 * 6 +%define idx7 16 * 7 +%define idx8 16 * 0 +%define idx9 16 * 1 +%define idx10 16 * 2 +%define idx11 16 * 3 +%define idx12 16 * 4 +%define idx13 16 * 5 +%define idx14 16 * 6 +%define idx15 16 * 7 +%define idx16 16 * 0 +%define idx17 16 * 1 +%define idx18 16 * 2 +%define idx19 16 * 3 +%define idx20 16 * 4 +%define idx21 16 * 5 +%define idx22 16 * 6 +%define idx23 16 * 7 +%define idx24 16 * 0 +%define idx25 16 * 1 +%define idx26 16 * 2 +%define idx27 16 * 3 +%define idx28 16 * 4 +%define idx29 16 * 5 +%define idx30 16 * 6 +%define idx31 16 * 7 + +; FROM idct32x32_add_neon.asm +; +; Instead of doing the transforms stage by stage, it is done by loading +; some input values and doing as many stages as possible to minimize the +; storing/loading of intermediate results. To fit within registers, the +; final coefficients are cut into four blocks: +; BLOCK A: 16-19,28-31 +; BLOCK B: 20-23,24-27 +; BLOCK C: 8-11,12-15 +; BLOCK D: 0-3,4-7 +; Blocks A and C are straight calculation through the various stages. In +; block B, further calculations are performed using the results from +; block A. In block D, further calculations are performed using the results +; from block C and then the final calculations are done using results from +; block A and B which have been combined at the end of block B. +; + +%macro IDCT32X32_34 4 + ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m11, m1 + pmulhrsw m1, [pw___804x2] ; stp1_16 + mova [r4 + 0], m0 + pmulhrsw m11, [pw_16364x2] ; stp2_31 + mova [r4 + 16 * 2], m2 + mova m12, m7 + pmulhrsw m7, [pw_15426x2] ; stp1_28 + mova [r4 + 16 * 4], m4 + pmulhrsw m12, [pw_m5520x2] ; stp2_19 + mova [r4 + 16 * 6], m6 + + ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m2, m1 ; stp1_16 + mova m0, m11 ; stp1_31 + mova m4, m7 ; stp1_28 + mova m15, m12 ; stp1_19 + + ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 + BUTTERFLY_4Xmm 4, 15, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 + + ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 + SUM_SUB 0, 15, 9 ; stp2_17, stp2_18 + SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 + SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 + + ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 4, 15, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 + BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 + + ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m6, m5 + pmulhrsw m5, [pw__3981x2] ; stp1_20 + mova [stp + %4 + idx28], m12 + mova [stp + %4 + idx29], m15 + pmulhrsw m6, [pw_15893x2] ; stp2_27 + mova [stp + %4 + idx30], m2 + mova m2, m3 + pmulhrsw m3, [pw_m2404x2] ; stp1_23 + mova [stp + %4 + idx31], m11 + pmulhrsw m2, [pw_16207x2] ; stp2_24 + + ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m13, m5 ; stp1_20 + mova m14, m6 ; stp1_27 + mova m15, m3 ; stp1_23 + mova m11, m2 ; stp1_24 + + ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 + BUTTERFLY_4Xmm 11, 15, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 + + ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 + SUM_SUB 15, 14, 9 ; stp2_22, stp2_21 + SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 + SUM_SUB 11, 13, 9 ; stp2_25, stp2_26 + + ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 + BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 + + ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 3, 9 ; stp2_16, stp2_23 + SUM_SUB 0, 15, 9 ; stp2_17, stp2_22 + SUM_SUB 4, 14, 9 ; stp2_18, stp2_21 + SUM_SUB 7, 5, 9 ; stp2_19, stp2_20 + mova [stp + %3 + idx16], m1 + mova [stp + %3 + idx17], m0 + mova [stp + %3 + idx18], m4 + mova [stp + %3 + idx19], m7 + + mova m4, [stp + %4 + idx28] + mova m7, [stp + %4 + idx29] + mova m10, [stp + %4 + idx30] + mova m12, [stp + %4 + idx31] + SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 + SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 + SUM_SUB 10, 11, 9 ; stp2_30, stp2_25 + SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 + mova [stp + %4 + idx28], m4 + mova [stp + %4 + idx29], m7 + mova [stp + %4 + idx30], m10 + mova [stp + %4 + idx31], m12 + + ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 6, 5, 9 + pmulhrsw m6, m10 ; stp1_27 + pmulhrsw m5, m10 ; stp1_20 + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_26 + pmulhrsw m14, m10 ; stp1_21 + SUM_SUB 11, 15, 9 + pmulhrsw m11, m10 ; stp1_25 + pmulhrsw m15, m10 ; stp1_22 + SUM_SUB 2, 3, 9 + pmulhrsw m2, m10 ; stp1_24 + pmulhrsw m3, m10 ; stp1_23 +%else + BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 + SWAP 6, 5 + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 + SWAP 13, 14 + BUTTERFLY_4X 11, 15, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 + SWAP 11, 15 + BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 + SWAP 2, 3 +%endif + + mova [stp + %4 + idx24], m2 + mova [stp + %4 + idx25], m11 + mova [stp + %4 + idx26], m13 + mova [stp + %4 + idx27], m6 + + ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 2] + mova m6, [rsp + transposed_in + 16 * 6] + + mova m1, m0 + pmulhrsw m0, [pw__1606x2] ; stp1_8 + mova [stp + %3 + idx20], m5 + mova [stp + %3 + idx21], m14 + pmulhrsw m1, [pw_16305x2] ; stp2_15 + mova [stp + %3 + idx22], m15 + mova m7, m6 + pmulhrsw m7, [pw_m4756x2] ; stp2_11 + mova [stp + %3 + idx23], m3 + pmulhrsw m6, [pw_15679x2] ; stp1_12 + + ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m3, m0 ; stp1_8 + mova m2, m1 ; stp1_15 + + ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 + mova m4, m7 ; stp1_11 + mova m5, m6 ; stp1_12 + BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 + + ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 + SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 + SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 + SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 + + ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 5, 4, 9 + pmulhrsw m5, m10 ; stp1_13 + pmulhrsw m4, m10 ; stp1_10 + SUM_SUB 6, 7, 9 + pmulhrsw m6, m10 ; stp1_12 + pmulhrsw m7, m10 ; stp1_11 +%else + BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 + SWAP 5, 4 + BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 + SWAP 6, 7 +%endif + + ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova [stp + %2 + idx8], m0 + mova [stp + %2 + idx9], m2 + mova [stp + %2 + idx10], m4 + mova [stp + %2 + idx11], m7 + + ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m11, [rsp + transposed_in + 16 * 4] + mova m12, m11 + pmulhrsw m11, [pw__3196x2] ; stp1_4 + pmulhrsw m12, [pw_16069x2] ; stp1_7 + + ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 0] + mova m10, [pw_11585x2] + pmulhrsw m0, m10 ; stp1_1 + + mova m14, m11 ; stp1_4 + mova m13, m12 ; stp1_7 + + ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_6 + pmulhrsw m14, m10 ; stp1_5 +%else + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 + SWAP 13, 14 +%endif + mova m7, m0 ; stp1_0 = stp1_1 + mova m4, m0 ; stp1_1 + mova m2, m7 ; stp1_0 + + ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 + SUM_SUB 7, 13, 9 ; stp1_1, stp1_6 + SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 + SUM_SUB 4, 11, 9 ; stp1_3, stp1_4 + + ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 1, 9 ; stp1_0, stp1_15 + SUM_SUB 7, 3, 9 ; stp1_1, stp1_14 + SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 + SUM_SUB 4, 6, 9 ; stp1_3, stp1_12 + + ; 0-3, 28-31 final stage + mova m15, [stp + %4 + idx30] + mova m10, [stp + %4 + idx31] + SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 + SUM_SUB 7, 15, 9 ; stp1_1, stp1_30 + mova [stp + %1 + idx0], m0 + mova [stp + %1 + idx1], m7 + mova [stp + %4 + idx30], m15 + mova [stp + %4 + idx31], m10 + mova m7, [stp + %4 + idx28] + mova m0, [stp + %4 + idx29] + SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 + SUM_SUB 4, 7, 9 ; stp1_3, stp1_28 + mova [stp + %1 + idx2], m2 + mova [stp + %1 + idx3], m4 + mova [stp + %4 + idx28], m7 + mova [stp + %4 + idx29], m0 + + ; 12-15, 16-19 final stage + mova m0, [stp + %3 + idx16] + mova m7, [stp + %3 + idx17] + mova m2, [stp + %3 + idx18] + mova m4, [stp + %3 + idx19] + SUM_SUB 1, 0, 9 ; stp1_15, stp1_16 + SUM_SUB 3, 7, 9 ; stp1_14, stp1_17 + SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 + SUM_SUB 6, 4, 9 ; stp1_12, stp1_19 + mova [stp + %2 + idx12], m6 + mova [stp + %2 + idx13], m5 + mova [stp + %2 + idx14], m3 + mova [stp + %2 + idx15], m1 + mova [stp + %3 + idx16], m0 + mova [stp + %3 + idx17], m7 + mova [stp + %3 + idx18], m2 + mova [stp + %3 + idx19], m4 + + mova m4, [stp + %2 + idx8] + mova m5, [stp + %2 + idx9] + mova m6, [stp + %2 + idx10] + mova m7, [stp + %2 + idx11] + SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 + SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 + SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 + SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 + + ; 4-7, 24-27 final stage + mova m0, [stp + %4 + idx27] + mova m1, [stp + %4 + idx26] + mova m2, [stp + %4 + idx25] + mova m3, [stp + %4 + idx24] + SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 + SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 + SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 + SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 + mova [stp + %4 + idx27], m0 + mova [stp + %4 + idx26], m1 + mova [stp + %4 + idx25], m2 + mova [stp + %4 + idx24], m3 + mova [stp + %1 + idx4], m11 + mova [stp + %1 + idx5], m14 + mova [stp + %1 + idx6], m13 + mova [stp + %1 + idx7], m12 + + ; 8-11, 20-23 final stage + mova m0, [stp + %3 + idx20] + mova m1, [stp + %3 + idx21] + mova m2, [stp + %3 + idx22] + mova m3, [stp + %3 + idx23] + SUM_SUB 7, 0, 9 ; stp1_11, stp_20 + SUM_SUB 6, 1, 9 ; stp1_10, stp_21 + SUM_SUB 5, 2, 9 ; stp1_9, stp_22 + SUM_SUB 4, 3, 9 ; stp1_8, stp_23 + mova [stp + %2 + idx8], m4 + mova [stp + %2 + idx9], m5 + mova [stp + %2 + idx10], m6 + mova [stp + %2 + idx11], m7 + mova [stp + %3 + idx20], m0 + mova [stp + %3 + idx21], m1 + mova [stp + %3 + idx22], m2 + mova [stp + %3 + idx23], m3 +%endmacro + +%macro RECON_AND_STORE 1 + mova m11, [pw_32] + lea stp, [rsp + %1] + mov r6, 32 + pxor m8, m8 +%%recon_and_store: + mova m0, [stp + 16 * 32 * 0] + mova m1, [stp + 16 * 32 * 1] + mova m2, [stp + 16 * 32 * 2] + mova m3, [stp + 16 * 32 * 3] + add stp, 16 + + paddw m0, m11 + paddw m1, m11 + paddw m2, m11 + paddw m3, m11 + psraw m0, 6 + psraw m1, 6 + psraw m2, 6 + psraw m3, 6 + movh m4, [outputq + 0] + movh m5, [outputq + 8] + movh m6, [outputq + 16] + movh m7, [outputq + 24] + punpcklbw m4, m8 + punpcklbw m5, m8 + punpcklbw m6, m8 + punpcklbw m7, m8 + paddw m0, m4 + paddw m1, m5 + paddw m2, m6 + paddw m3, m7 + packuswb m0, m1 + packuswb m2, m3 + mova [outputq + 0], m0 + mova [outputq + 16], m2 + lea outputq, [outputq + strideq] + dec r6 + jnz %%recon_and_store +%endmacro + +%define i32x32_size 16*32*5 +%define pass_two_start 16*32*0 +%define transposed_in 16*32*4 +%define pass_one_start 16*32*0 +%define stp r8 + +INIT_XMM ssse3 +cglobal idct32x32_34_add, 3, 11, 16, i32x32_size, input, output, stride + mova m8, [pd_8192] + lea stp, [rsp + pass_one_start] + +idct32x32_34: + mov r3, inputq + lea r4, [rsp + transposed_in] + +idct32x32_34_transpose: +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [r3 + 0] + packssdw m0, [r3 + 16] + mova m1, [r3 + 32 * 4] + packssdw m1, [r3 + 32 * 4 + 16] + mova m2, [r3 + 32 * 8] + packssdw m2, [r3 + 32 * 8 + 16] + mova m3, [r3 + 32 * 12] + packssdw m3, [r3 + 32 * 12 + 16] + mova m4, [r3 + 32 * 16] + packssdw m4, [r3 + 32 * 16 + 16] + mova m5, [r3 + 32 * 20] + packssdw m5, [r3 + 32 * 20 + 16] + mova m6, [r3 + 32 * 24] + packssdw m6, [r3 + 32 * 24 + 16] + mova m7, [r3 + 32 * 28] + packssdw m7, [r3 + 32 * 28 + 16] +%else + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 4] + mova m2, [r3 + 16 * 8] + mova m3, [r3 + 16 * 12] + mova m4, [r3 + 16 * 16] + mova m5, [r3 + 16 * 20] + mova m6, [r3 + 16 * 24] + mova m7, [r3 + 16 * 28] +%endif + + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + IDCT32X32_34 16*0, 16*32, 16*64, 16*96 + lea stp, [stp + 16 * 8] + mov r6, 4 + lea stp, [rsp + pass_one_start] + lea r9, [rsp + pass_one_start] + +idct32x32_34_2: + lea r4, [rsp + transposed_in] + mov r3, r9 + +idct32x32_34_transpose_2: + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 1] + mova m2, [r3 + 16 * 2] + mova m3, [r3 + 16 * 3] + mova m4, [r3 + 16 * 4] + mova m5, [r3 + 16 * 5] + mova m6, [r3 + 16 * 6] + mova m7, [r3 + 16 * 7] + + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + IDCT32X32_34 16*0, 16*8, 16*16, 16*24 + + lea stp, [stp + 16 * 32] + add r9, 16 * 32 + dec r6 + jnz idct32x32_34_2 + + RECON_AND_STORE pass_two_start + + RET + +%macro IDCT32X32_135 4 + ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m1, [rsp + transposed_in + 16 * 1] + mova m11, m1 + pmulhrsw m1, [pw___804x2] ; stp1_16 + pmulhrsw m11, [pw_16364x2] ; stp2_31 + + mova m7, [rsp + transposed_in + 16 * 7] + mova m12, m7 + pmulhrsw m7, [pw_15426x2] ; stp1_28 + pmulhrsw m12, [pw_m5520x2] ; stp2_19 + + mova m3, [rsp + transposed_in + 16 * 9] + mova m4, m3 + pmulhrsw m3, [pw__7005x2] ; stp1_18 + pmulhrsw m4, [pw_14811x2] ; stp2_29 + + mova m0, [rsp + transposed_in + 16 * 15] + mova m2, m0 + pmulhrsw m0, [pw_12140x2] ; stp1_30 + pmulhrsw m2, [pw_m11003x2] ; stp2_17 + + ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 2, 9 ; stp2_16, stp2_17 + SUM_SUB 12, 3, 9 ; stp2_19, stp2_18 + SUM_SUB 7, 4, 9 ; stp2_28, stp2_29 + SUM_SUB 11, 0, 9 ; stp2_31, stp2_30 + + ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 + BUTTERFLY_4Xmm 4, 3, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 + + ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 + SUM_SUB 0, 3, 9 ; stp2_17, stp2_18 + SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 + SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 + + ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 4, 3, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 + BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 + + mova [stp + %3 + idx16], m1 + mova [stp + %3 + idx17], m0 + mova [stp + %3 + idx18], m4 + mova [stp + %3 + idx19], m7 + mova [stp + %4 + idx28], m12 + mova [stp + %4 + idx29], m3 + mova [stp + %4 + idx30], m2 + mova [stp + %4 + idx31], m11 + + ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m2, [rsp + transposed_in + 16 * 3] + mova m3, m2 + pmulhrsw m3, [pw_m2404x2] ; stp1_23 + pmulhrsw m2, [pw_16207x2] ; stp2_24 + + mova m5, [rsp + transposed_in + 16 * 5] + mova m6, m5 + pmulhrsw m5, [pw__3981x2] ; stp1_20 + pmulhrsw m6, [pw_15893x2] ; stp2_27 + + mova m14, [rsp + transposed_in + 16 * 11] + mova m13, m14 + pmulhrsw m13, [pw_m8423x2] ; stp1_21 + pmulhrsw m14, [pw_14053x2] ; stp2_26 + + mova m0, [rsp + transposed_in + 16 * 13] + mova m1, m0 + pmulhrsw m0, [pw__9760x2] ; stp1_22 + pmulhrsw m1, [pw_13160x2] ; stp2_25 + + ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 5, 13, 9 ; stp2_20, stp2_21 + SUM_SUB 3, 0, 9 ; stp2_23, stp2_22 + SUM_SUB 2, 1, 9 ; stp2_24, stp2_25 + SUM_SUB 6, 14, 9 ; stp2_27, stp2_26 + + ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 + BUTTERFLY_4Xmm 1, 0, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 + + ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 + SUM_SUB 0, 14, 9 ; stp2_22, stp2_21 + SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 + SUM_SUB 1, 13, 9 ; stp2_25, stp2_26 + + ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 + BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 + + ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m4, [stp + %3 + idx16] + mova m7, [stp + %3 + idx17] + mova m11, [stp + %3 + idx18] + mova m12, [stp + %3 + idx19] + SUM_SUB 4, 3, 9 ; stp2_16, stp2_23 + SUM_SUB 7, 0, 9 ; stp2_17, stp2_22 + SUM_SUB 11, 14, 9 ; stp2_18, stp2_21 + SUM_SUB 12, 5, 9 ; stp2_19, stp2_20 + mova [stp + %3 + idx16], m4 + mova [stp + %3 + idx17], m7 + mova [stp + %3 + idx18], m11 + mova [stp + %3 + idx19], m12 + + mova m4, [stp + %4 + idx28] + mova m7, [stp + %4 + idx29] + mova m11, [stp + %4 + idx30] + mova m12, [stp + %4 + idx31] + SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 + SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 + SUM_SUB 11, 1, 9 ; stp2_30, stp2_25 + SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 + mova [stp + %4 + idx28], m4 + mova [stp + %4 + idx29], m7 + mova [stp + %4 + idx30], m11 + mova [stp + %4 + idx31], m12 + + ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 6, 5, 9 + pmulhrsw m6, m10 ; stp1_27 + pmulhrsw m5, m10 ; stp1_20 + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_26 + pmulhrsw m14, m10 ; stp1_21 + SUM_SUB 1, 0, 9 + pmulhrsw m1, m10 ; stp1_25 + pmulhrsw m0, m10 ; stp1_22 + SUM_SUB 2, 3, 9 + pmulhrsw m2, m10 ; stp1_25 + pmulhrsw m3, m10 ; stp1_22 +%else + BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 + SWAP 6, 5 + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 + SWAP 13, 14 + BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 + SWAP 1, 0 + BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 + SWAP 2, 3 +%endif + mova [stp + %3 + idx20], m5 + mova [stp + %3 + idx21], m14 + mova [stp + %3 + idx22], m0 + mova [stp + %3 + idx23], m3 + mova [stp + %4 + idx24], m2 + mova [stp + %4 + idx25], m1 + mova [stp + %4 + idx26], m13 + mova [stp + %4 + idx27], m6 + + ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 2] + mova m1, m0 + pmulhrsw m0, [pw__1606x2] ; stp1_8 + pmulhrsw m1, [pw_16305x2] ; stp2_15 + + mova m6, [rsp + transposed_in + 16 * 6] + mova m7, m6 + pmulhrsw m7, [pw_m4756x2] ; stp2_11 + pmulhrsw m6, [pw_15679x2] ; stp1_12 + + mova m4, [rsp + transposed_in + 16 * 10] + mova m5, m4 + pmulhrsw m4, [pw__7723x2] ; stp1_10 + pmulhrsw m5, [pw_14449x2] ; stp2_13 + + mova m2, [rsp + transposed_in + 16 * 14] + mova m3, m2 + pmulhrsw m3, [pw_m10394x2] ; stp1_9 + pmulhrsw m2, [pw_12665x2] ; stp2_14 + + ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 3, 9 ; stp1_8, stp1_9 + SUM_SUB 7, 4, 9 ; stp1_11, stp1_10 + SUM_SUB 6, 5, 9 ; stp1_12, stp1_13 + SUM_SUB 1, 2, 9 ; stp1_15, stp1_14 + + ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 + BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 + + ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 + SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 + SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 + SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 + + ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 5, 4, 9 + pmulhrsw m5, m10 ; stp1_13 + pmulhrsw m4, m10 ; stp1_10 + SUM_SUB 6, 7, 9 + pmulhrsw m6, m10 ; stp1_12 + pmulhrsw m7, m10 ; stp1_11 +%else + BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 + SWAP 5, 4 + BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 + SWAP 6, 7 +%endif + ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova [stp + %2 + idx8], m0 + mova [stp + %2 + idx9], m2 + mova [stp + %2 + idx10], m4 + mova [stp + %2 + idx11], m7 + mova [stp + %2 + idx12], m6 + mova [stp + %2 + idx13], m5 + mova [stp + %2 + idx14], m3 + mova [stp + %2 + idx15], m1 + + ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m11, [rsp + transposed_in + 16 * 4] + mova m12, m11 + pmulhrsw m11, [pw__3196x2] ; stp1_4 + pmulhrsw m12, [pw_16069x2] ; stp1_7 + + mova m13, [rsp + transposed_in + 16 * 12] + mova m14, m13 + pmulhrsw m13, [pw_13623x2] ; stp1_6 + pmulhrsw m14, [pw_m9102x2] ; stp1_5 + + ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 0] + mova m2, [rsp + transposed_in + 16 * 8] + pmulhrsw m0, [pw_11585x2] ; stp1_1 + mova m3, m2 + pmulhrsw m2, [pw__6270x2] ; stp1_2 + pmulhrsw m3, [pw_15137x2] ; stp1_3 + + SUM_SUB 11, 14, 9 ; stp1_4, stp1_5 + SUM_SUB 12, 13, 9 ; stp1_7, stp1_6 + + ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_6 + pmulhrsw m14, m10 ; stp1_5 +%else + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 + SWAP 13, 14 +%endif + mova m1, m0 ; stp1_0 = stp1_1 + SUM_SUB 0, 3, 9 ; stp1_0, stp1_3 + SUM_SUB 1, 2, 9 ; stp1_1, stp1_2 + + ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 + SUM_SUB 1, 13, 9 ; stp1_1, stp1_6 + SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 + SUM_SUB 3, 11, 9 ; stp1_3, stp1_4 + + ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m4, [stp + %2 + idx12] + mova m5, [stp + %2 + idx13] + mova m6, [stp + %2 + idx14] + mova m7, [stp + %2 + idx15] + SUM_SUB 0, 7, 9 ; stp1_0, stp1_15 + SUM_SUB 1, 6, 9 ; stp1_1, stp1_14 + SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 + SUM_SUB 3, 4, 9 ; stp1_3, stp1_12 + + ; 0-3, 28-31 final stage + mova m10, [stp + %4 + idx31] + mova m15, [stp + %4 + idx30] + SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 + SUM_SUB 1, 15, 9 ; stp1_1, stp1_30 + mova [stp + %1 + idx0], m0 + mova [stp + %1 + idx1], m1 + mova [stp + %4 + idx31], m10 + mova [stp + %4 + idx30], m15 + mova m0, [stp + %4 + idx29] + mova m1, [stp + %4 + idx28] + SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 + SUM_SUB 3, 1, 9 ; stp1_3, stp1_28 + mova [stp + %1 + idx2], m2 + mova [stp + %1 + idx3], m3 + mova [stp + %4 + idx29], m0 + mova [stp + %4 + idx28], m1 + + ; 12-15, 16-19 final stage + mova m0, [stp + %3 + idx16] + mova m1, [stp + %3 + idx17] + mova m2, [stp + %3 + idx18] + mova m3, [stp + %3 + idx19] + SUM_SUB 7, 0, 9 ; stp1_15, stp1_16 + SUM_SUB 6, 1, 9 ; stp1_14, stp1_17 + SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 + SUM_SUB 4, 3, 9 ; stp1_12, stp1_19 + mova [stp + %2 + idx12], m4 + mova [stp + %2 + idx13], m5 + mova [stp + %2 + idx14], m6 + mova [stp + %2 + idx15], m7 + mova [stp + %3 + idx16], m0 + mova [stp + %3 + idx17], m1 + mova [stp + %3 + idx18], m2 + mova [stp + %3 + idx19], m3 + + mova m4, [stp + %2 + idx8] + mova m5, [stp + %2 + idx9] + mova m6, [stp + %2 + idx10] + mova m7, [stp + %2 + idx11] + SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 + SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 + SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 + SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 + + ; 4-7, 24-27 final stage + mova m3, [stp + %4 + idx24] + mova m2, [stp + %4 + idx25] + mova m1, [stp + %4 + idx26] + mova m0, [stp + %4 + idx27] + SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 + SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 + SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 + SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 + mova [stp + %4 + idx24], m3 + mova [stp + %4 + idx25], m2 + mova [stp + %4 + idx26], m1 + mova [stp + %4 + idx27], m0 + mova [stp + %1 + idx4], m11 + mova [stp + %1 + idx5], m14 + mova [stp + %1 + idx6], m13 + mova [stp + %1 + idx7], m12 + + ; 8-11, 20-23 final stage + mova m0, [stp + %3 + idx20] + mova m1, [stp + %3 + idx21] + mova m2, [stp + %3 + idx22] + mova m3, [stp + %3 + idx23] + SUM_SUB 7, 0, 9 ; stp1_11, stp_20 + SUM_SUB 6, 1, 9 ; stp1_10, stp_21 + SUM_SUB 5, 2, 9 ; stp1_9, stp_22 + SUM_SUB 4, 3, 9 ; stp1_8, stp_23 + mova [stp + %2 + idx8], m4 + mova [stp + %2 + idx9], m5 + mova [stp + %2 + idx10], m6 + mova [stp + %2 + idx11], m7 + mova [stp + %3 + idx20], m0 + mova [stp + %3 + idx21], m1 + mova [stp + %3 + idx22], m2 + mova [stp + %3 + idx23], m3 +%endmacro + +INIT_XMM ssse3 +cglobal idct32x32_135_add, 3, 11, 16, i32x32_size, input, output, stride + mova m8, [pd_8192] + mov r6, 2 + lea stp, [rsp + pass_one_start] + +idct32x32_135: + mov r3, inputq + lea r4, [rsp + transposed_in] + mov r7, 2 + +idct32x32_135_transpose: +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [r3 + 0] + packssdw m0, [r3 + 16] + mova m1, [r3 + 32 * 4] + packssdw m1, [r3 + 32 * 4 + 16] + mova m2, [r3 + 32 * 8] + packssdw m2, [r3 + 32 * 8 + 16] + mova m3, [r3 + 32 * 12] + packssdw m3, [r3 + 32 * 12 + 16] + mova m4, [r3 + 32 * 16] + packssdw m4, [r3 + 32 * 16 + 16] + mova m5, [r3 + 32 * 20] + packssdw m5, [r3 + 32 * 20 + 16] + mova m6, [r3 + 32 * 24] + packssdw m6, [r3 + 32 * 24 + 16] + mova m7, [r3 + 32 * 28] + packssdw m7, [r3 + 32 * 28 + 16] +%else + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 4] + mova m2, [r3 + 16 * 8] + mova m3, [r3 + 16 * 12] + mova m4, [r3 + 16 * 16] + mova m5, [r3 + 16 * 20] + mova m6, [r3 + 16 * 24] + mova m7, [r3 + 16 * 28] +%endif + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + mova [r4 + 0], m0 + mova [r4 + 16 * 1], m1 + mova [r4 + 16 * 2], m2 + mova [r4 + 16 * 3], m3 + mova [r4 + 16 * 4], m4 + mova [r4 + 16 * 5], m5 + mova [r4 + 16 * 6], m6 + mova [r4 + 16 * 7], m7 + +%if CONFIG_VP9_HIGHBITDEPTH + add r3, 32 +%else + add r3, 16 +%endif + add r4, 16 * 8 + dec r7 + jne idct32x32_135_transpose + + IDCT32X32_135 16*0, 16*32, 16*64, 16*96 + lea stp, [stp + 16 * 8] +%if CONFIG_VP9_HIGHBITDEPTH + lea inputq, [inputq + 32 * 32] +%else + lea inputq, [inputq + 16 * 32] +%endif + dec r6 + jnz idct32x32_135 + + mov r6, 4 + lea stp, [rsp + pass_one_start] + lea r9, [rsp + pass_one_start] + +idct32x32_135_2: + lea r4, [rsp + transposed_in] + mov r3, r9 + mov r7, 2 + +idct32x32_135_transpose_2: + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 1] + mova m2, [r3 + 16 * 2] + mova m3, [r3 + 16 * 3] + mova m4, [r3 + 16 * 4] + mova m5, [r3 + 16 * 5] + mova m6, [r3 + 16 * 6] + mova m7, [r3 + 16 * 7] + + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + mova [r4 + 0], m0 + mova [r4 + 16 * 1], m1 + mova [r4 + 16 * 2], m2 + mova [r4 + 16 * 3], m3 + mova [r4 + 16 * 4], m4 + mova [r4 + 16 * 5], m5 + mova [r4 + 16 * 6], m6 + mova [r4 + 16 * 7], m7 + + add r3, 16 * 8 + add r4, 16 * 8 + dec r7 + jne idct32x32_135_transpose_2 + + IDCT32X32_135 16*0, 16*8, 16*16, 16*24 + + lea stp, [stp + 16 * 32] + add r9, 16 * 32 + dec r6 + jnz idct32x32_135_2 + + RECON_AND_STORE pass_two_start + + RET + +%macro IDCT32X32_1024 4 + ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m1, [rsp + transposed_in + 16 * 1] + mova m11, [rsp + transposed_in + 16 * 31] + BUTTERFLY_4X 1, 11, 804, 16364, m8, 9, 10 ; stp1_16, stp1_31 + + mova m0, [rsp + transposed_in + 16 * 15] + mova m2, [rsp + transposed_in + 16 * 17] + BUTTERFLY_4X 2, 0, 12140, 11003, m8, 9, 10 ; stp1_17, stp1_30 + + mova m7, [rsp + transposed_in + 16 * 7] + mova m12, [rsp + transposed_in + 16 * 25] + BUTTERFLY_4X 12, 7, 15426, 5520, m8, 9, 10 ; stp1_19, stp1_28 + + mova m3, [rsp + transposed_in + 16 * 9] + mova m4, [rsp + transposed_in + 16 * 23] + BUTTERFLY_4X 3, 4, 7005, 14811, m8, 9, 10 ; stp1_18, stp1_29 + + ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 2, 9 ; stp2_16, stp2_17 + SUM_SUB 12, 3, 9 ; stp2_19, stp2_18 + SUM_SUB 7, 4, 9 ; stp2_28, stp2_29 + SUM_SUB 11, 0, 9 ; stp2_31, stp2_30 + + ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 + BUTTERFLY_4Xmm 4, 3, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 + + ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 + SUM_SUB 0, 3, 9 ; stp2_17, stp2_18 + SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 + SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 + + ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 4, 3, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 + BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 + + mova [stp + %3 + idx16], m1 + mova [stp + %3 + idx17], m0 + mova [stp + %3 + idx18], m4 + mova [stp + %3 + idx19], m7 + mova [stp + %4 + idx28], m12 + mova [stp + %4 + idx29], m3 + mova [stp + %4 + idx30], m2 + mova [stp + %4 + idx31], m11 + + ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m5, [rsp + transposed_in + 16 * 5] + mova m6, [rsp + transposed_in + 16 * 27] + BUTTERFLY_4X 5, 6, 3981, 15893, m8, 9, 10 ; stp1_20, stp1_27 + + mova m13, [rsp + transposed_in + 16 * 21] + mova m14, [rsp + transposed_in + 16 * 11] + BUTTERFLY_4X 13, 14, 14053, 8423, m8, 9, 10 ; stp1_21, stp1_26 + + mova m0, [rsp + transposed_in + 16 * 13] + mova m1, [rsp + transposed_in + 16 * 19] + BUTTERFLY_4X 0, 1, 9760, 13160, m8, 9, 10 ; stp1_22, stp1_25 + + mova m2, [rsp + transposed_in + 16 * 3] + mova m3, [rsp + transposed_in + 16 * 29] + BUTTERFLY_4X 3, 2, 16207, 2404, m8, 9, 10 ; stp1_23, stp1_24 + + ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 5, 13, 9 ; stp2_20, stp2_21 + SUM_SUB 3, 0, 9 ; stp2_23, stp2_22 + SUM_SUB 2, 1, 9 ; stp2_24, stp2_25 + SUM_SUB 6, 14, 9 ; stp2_27, stp2_26 + + ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 + BUTTERFLY_4Xmm 1, 0, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 + + ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 + SUM_SUB 0, 14, 9 ; stp2_22, stp2_21 + SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 + SUM_SUB 1, 13, 9 ; stp2_25, stp2_26 + + ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 + BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 + + ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m4, [stp + %3 + idx16] + mova m7, [stp + %3 + idx17] + mova m11, [stp + %3 + idx18] + mova m12, [stp + %3 + idx19] + SUM_SUB 4, 3, 9 ; stp2_16, stp2_23 + SUM_SUB 7, 0, 9 ; stp2_17, stp2_22 + SUM_SUB 11, 14, 9 ; stp2_18, stp2_21 + SUM_SUB 12, 5, 9 ; stp2_19, stp2_20 + mova [stp + %3 + idx16], m4 + mova [stp + %3 + idx17], m7 + mova [stp + %3 + idx18], m11 + mova [stp + %3 + idx19], m12 + + mova m4, [stp + %4 + idx28] + mova m7, [stp + %4 + idx29] + mova m11, [stp + %4 + idx30] + mova m12, [stp + %4 + idx31] + SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 + SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 + SUM_SUB 11, 1, 9 ; stp2_30, stp2_25 + SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 + mova [stp + %4 + idx28], m4 + mova [stp + %4 + idx29], m7 + mova [stp + %4 + idx30], m11 + mova [stp + %4 + idx31], m12 + + ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 6, 5, 9 + pmulhrsw m6, m10 ; stp1_27 + pmulhrsw m5, m10 ; stp1_20 + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_26 + pmulhrsw m14, m10 ; stp1_21 + SUM_SUB 1, 0, 9 + pmulhrsw m1, m10 ; stp1_25 + pmulhrsw m0, m10 ; stp1_22 + SUM_SUB 2, 3, 9 + pmulhrsw m2, m10 ; stp1_25 + pmulhrsw m3, m10 ; stp1_22 +%else + BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 + SWAP 6, 5 + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 + SWAP 13, 14 + BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 + SWAP 1, 0 + BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 + SWAP 2, 3 +%endif + mova [stp + %3 + idx20], m5 + mova [stp + %3 + idx21], m14 + mova [stp + %3 + idx22], m0 + mova [stp + %3 + idx23], m3 + mova [stp + %4 + idx24], m2 + mova [stp + %4 + idx25], m1 + mova [stp + %4 + idx26], m13 + mova [stp + %4 + idx27], m6 + + ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 2] + mova m1, [rsp + transposed_in + 16 * 30] + BUTTERFLY_4X 0, 1, 1606, 16305, m8, 9, 10 ; stp1_8, stp1_15 + + mova m2, [rsp + transposed_in + 16 * 14] + mova m3, [rsp + transposed_in + 16 * 18] + BUTTERFLY_4X 3, 2, 12665, 10394, m8, 9, 10 ; stp1_9, stp1_14 + + mova m4, [rsp + transposed_in + 16 * 10] + mova m5, [rsp + transposed_in + 16 * 22] + BUTTERFLY_4X 4, 5, 7723, 14449, m8, 9, 10 ; stp1_10, stp1_13 + + mova m6, [rsp + transposed_in + 16 * 6] + mova m7, [rsp + transposed_in + 16 * 26] + BUTTERFLY_4X 7, 6, 15679, 4756, m8, 9, 10 ; stp1_11, stp1_12 + + ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 3, 9 ; stp1_8, stp1_9 + SUM_SUB 7, 4, 9 ; stp1_11, stp1_10 + SUM_SUB 6, 5, 9 ; stp1_12, stp1_13 + SUM_SUB 1, 2, 9 ; stp1_15, stp1_14 + + ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 + BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 + + ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 + SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 + SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 + SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 + + ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 5, 4, 9 + pmulhrsw m5, m10 ; stp1_13 + pmulhrsw m4, m10 ; stp1_10 + SUM_SUB 6, 7, 9 + pmulhrsw m6, m10 ; stp1_12 + pmulhrsw m7, m10 ; stp1_11 +%else + BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 + SWAP 5, 4 + BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 + SWAP 6, 7 +%endif + ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova [stp + %2 + idx8], m0 + mova [stp + %2 + idx9], m2 + mova [stp + %2 + idx10], m4 + mova [stp + %2 + idx11], m7 + mova [stp + %2 + idx12], m6 + mova [stp + %2 + idx13], m5 + mova [stp + %2 + idx14], m3 + mova [stp + %2 + idx15], m1 + + ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ; + ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m11, [rsp + transposed_in + 16 * 4] + mova m12, [rsp + transposed_in + 16 * 28] + BUTTERFLY_4X 11, 12, 3196, 16069, m8, 9, 10 ; stp1_4, stp1_7 + + mova m13, [rsp + transposed_in + 16 * 12] + mova m14, [rsp + transposed_in + 16 * 20] + BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_5, stp1_6 + + ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m0, [rsp + transposed_in + 16 * 0] + mova m1, [rsp + transposed_in + 16 * 16] + +%if 0 ; overflow occurs in SUM_SUB when using test streams + mova m10, [pw_11585x2] + SUM_SUB 0, 1, 9 + pmulhrsw m0, m10 ; stp1_1 + pmulhrsw m1, m10 ; stp1_0 +%else + BUTTERFLY_4X 0, 1, 11585, 11585, m8, 9, 10 ; stp1_1, stp1_0 + SWAP 0, 1 +%endif + mova m2, [rsp + transposed_in + 16 * 8] + mova m3, [rsp + transposed_in + 16 * 24] + BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_2, stp1_3 + + mova m10, [pw_11585x2] + SUM_SUB 11, 14, 9 ; stp1_4, stp1_5 + SUM_SUB 12, 13, 9 ; stp1_7, stp1_6 + + ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +%if 0 ; overflow occurs in SUM_SUB when using test streams + SUM_SUB 13, 14, 9 + pmulhrsw m13, m10 ; stp1_6 + pmulhrsw m14, m10 ; stp1_5 +%else + BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 + SWAP 13, 14 +%endif + SUM_SUB 0, 3, 9 ; stp1_0, stp1_3 + SUM_SUB 1, 2, 9 ; stp1_1, stp1_2 + + ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 + SUM_SUB 1, 13, 9 ; stp1_1, stp1_6 + SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 + SUM_SUB 3, 11, 9 ; stp1_3, stp1_4 + + ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + mova m4, [stp + %2 + idx12] + mova m5, [stp + %2 + idx13] + mova m6, [stp + %2 + idx14] + mova m7, [stp + %2 + idx15] + SUM_SUB 0, 7, 9 ; stp1_0, stp1_15 + SUM_SUB 1, 6, 9 ; stp1_1, stp1_14 + SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 + SUM_SUB 3, 4, 9 ; stp1_3, stp1_12 + + ; 0-3, 28-31 final stage + mova m10, [stp + %4 + idx31] + mova m15, [stp + %4 + idx30] + SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 + SUM_SUB 1, 15, 9 ; stp1_1, stp1_30 + mova [stp + %1 + idx0], m0 + mova [stp + %1 + idx1], m1 + mova [stp + %4 + idx31], m10 + mova [stp + %4 + idx30], m15 + mova m0, [stp + %4 + idx29] + mova m1, [stp + %4 + idx28] + SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 + SUM_SUB 3, 1, 9 ; stp1_3, stp1_28 + mova [stp + %1 + idx2], m2 + mova [stp + %1 + idx3], m3 + mova [stp + %4 + idx29], m0 + mova [stp + %4 + idx28], m1 + + ; 12-15, 16-19 final stage + mova m0, [stp + %3 + idx16] + mova m1, [stp + %3 + idx17] + mova m2, [stp + %3 + idx18] + mova m3, [stp + %3 + idx19] + SUM_SUB 7, 0, 9 ; stp1_15, stp1_16 + SUM_SUB 6, 1, 9 ; stp1_14, stp1_17 + SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 + SUM_SUB 4, 3, 9 ; stp1_12, stp1_19 + mova [stp + %2 + idx12], m4 + mova [stp + %2 + idx13], m5 + mova [stp + %2 + idx14], m6 + mova [stp + %2 + idx15], m7 + mova [stp + %3 + idx16], m0 + mova [stp + %3 + idx17], m1 + mova [stp + %3 + idx18], m2 + mova [stp + %3 + idx19], m3 + + mova m4, [stp + %2 + idx8] + mova m5, [stp + %2 + idx9] + mova m6, [stp + %2 + idx10] + mova m7, [stp + %2 + idx11] + SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 + SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 + SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 + SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 + + ; 4-7, 24-27 final stage + mova m3, [stp + %4 + idx24] + mova m2, [stp + %4 + idx25] + mova m1, [stp + %4 + idx26] + mova m0, [stp + %4 + idx27] + SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 + SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 + SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 + SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 + mova [stp + %4 + idx24], m3 + mova [stp + %4 + idx25], m2 + mova [stp + %4 + idx26], m1 + mova [stp + %4 + idx27], m0 + mova [stp + %1 + idx4], m11 + mova [stp + %1 + idx5], m14 + mova [stp + %1 + idx6], m13 + mova [stp + %1 + idx7], m12 + + ; 8-11, 20-23 final stage + mova m0, [stp + %3 + idx20] + mova m1, [stp + %3 + idx21] + mova m2, [stp + %3 + idx22] + mova m3, [stp + %3 + idx23] + SUM_SUB 7, 0, 9 ; stp1_11, stp_20 + SUM_SUB 6, 1, 9 ; stp1_10, stp_21 + SUM_SUB 5, 2, 9 ; stp1_9, stp_22 + SUM_SUB 4, 3, 9 ; stp1_8, stp_23 + mova [stp + %2 + idx8], m4 + mova [stp + %2 + idx9], m5 + mova [stp + %2 + idx10], m6 + mova [stp + %2 + idx11], m7 + mova [stp + %3 + idx20], m0 + mova [stp + %3 + idx21], m1 + mova [stp + %3 + idx22], m2 + mova [stp + %3 + idx23], m3 +%endmacro + +INIT_XMM ssse3 +cglobal idct32x32_1024_add, 3, 11, 16, i32x32_size, input, output, stride + mova m8, [pd_8192] + mov r6, 4 + lea stp, [rsp + pass_one_start] + +idct32x32_1024: + mov r3, inputq + lea r4, [rsp + transposed_in] + mov r7, 4 + +idct32x32_1024_transpose: +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [r3 + 0] + packssdw m0, [r3 + 16] + mova m1, [r3 + 32 * 4] + packssdw m1, [r3 + 32 * 4 + 16] + mova m2, [r3 + 32 * 8] + packssdw m2, [r3 + 32 * 8 + 16] + mova m3, [r3 + 32 * 12] + packssdw m3, [r3 + 32 * 12 + 16] + mova m4, [r3 + 32 * 16] + packssdw m4, [r3 + 32 * 16 + 16] + mova m5, [r3 + 32 * 20] + packssdw m5, [r3 + 32 * 20 + 16] + mova m6, [r3 + 32 * 24] + packssdw m6, [r3 + 32 * 24 + 16] + mova m7, [r3 + 32 * 28] + packssdw m7, [r3 + 32 * 28 + 16] +%else + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 4] + mova m2, [r3 + 16 * 8] + mova m3, [r3 + 16 * 12] + mova m4, [r3 + 16 * 16] + mova m5, [r3 + 16 * 20] + mova m6, [r3 + 16 * 24] + mova m7, [r3 + 16 * 28] +%endif + + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + mova [r4 + 0], m0 + mova [r4 + 16 * 1], m1 + mova [r4 + 16 * 2], m2 + mova [r4 + 16 * 3], m3 + mova [r4 + 16 * 4], m4 + mova [r4 + 16 * 5], m5 + mova [r4 + 16 * 6], m6 + mova [r4 + 16 * 7], m7 +%if CONFIG_VP9_HIGHBITDEPTH + add r3, 32 +%else + add r3, 16 +%endif + add r4, 16 * 8 + dec r7 + jne idct32x32_1024_transpose + + IDCT32X32_1024 16*0, 16*32, 16*64, 16*96 + + lea stp, [stp + 16 * 8] +%if CONFIG_VP9_HIGHBITDEPTH + lea inputq, [inputq + 32 * 32] +%else + lea inputq, [inputq + 16 * 32] +%endif + dec r6 + jnz idct32x32_1024 + + mov r6, 4 + lea stp, [rsp + pass_one_start] + lea r9, [rsp + pass_one_start] + +idct32x32_1024_2: + lea r4, [rsp + transposed_in] + mov r3, r9 + mov r7, 4 + +idct32x32_1024_transpose_2: + mova m0, [r3 + 0] + mova m1, [r3 + 16 * 1] + mova m2, [r3 + 16 * 2] + mova m3, [r3 + 16 * 3] + mova m4, [r3 + 16 * 4] + mova m5, [r3 + 16 * 5] + mova m6, [r3 + 16 * 6] + mova m7, [r3 + 16 * 7] + + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 + + mova [r4 + 0], m0 + mova [r4 + 16 * 1], m1 + mova [r4 + 16 * 2], m2 + mova [r4 + 16 * 3], m3 + mova [r4 + 16 * 4], m4 + mova [r4 + 16 * 5], m5 + mova [r4 + 16 * 6], m6 + mova [r4 + 16 * 7], m7 + + add r3, 16 * 8 + add r4, 16 * 8 + dec r7 + jne idct32x32_1024_transpose_2 + + IDCT32X32_1024 16*0, 16*8, 16*16, 16*24 + + lea stp, [stp + 16 * 32] + add r9, 16 * 32 + dec r6 + jnz idct32x32_1024_2 + + RECON_AND_STORE pass_two_start + + RET +%endif diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm new file mode 100644 index 00000000000..fbbcd76bd7b --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm @@ -0,0 +1,109 @@ +; +; Copyright (c) 2015 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "third_party/x86inc/x86inc.asm" + +SECTION .text + +%macro REORDER_INPUTS 0 + ; a c d b to a b c d + SWAP 1, 3, 2 +%endmacro + +%macro TRANSFORM_COLS 0 + ; input: + ; m0 a + ; m1 b + ; m2 c + ; m3 d + paddw m0, m2 + psubw m3, m1 + + ; wide subtract + punpcklwd m4, m0 + punpcklwd m5, m3 + psrad m4, 16 + psrad m5, 16 + psubd m4, m5 + psrad m4, 1 + packssdw m4, m4 ; e + + psubw m5, m4, m1 ; b + psubw m4, m2 ; c + psubw m0, m5 + paddw m3, m4 + ; m0 a + SWAP 1, 5 ; m1 b + SWAP 2, 4 ; m2 c + ; m3 d +%endmacro + +%macro TRANSPOSE_4X4 0 + punpcklwd m0, m2 + punpcklwd m1, m3 + mova m2, m0 + punpcklwd m0, m1 + punpckhwd m2, m1 + pshufd m1, m0, 0x0e + pshufd m3, m2, 0x0e +%endmacro + +; transpose a 4x4 int16 matrix in xmm0 and xmm1 to the bottom half of xmm0-xmm3 +%macro TRANSPOSE_4X4_WIDE 0 + mova m3, m0 + punpcklwd m0, m1 + punpckhwd m3, m1 + mova m2, m0 + punpcklwd m0, m3 + punpckhwd m2, m3 + pshufd m1, m0, 0x0e + pshufd m3, m2, 0x0e +%endmacro + +%macro ADD_STORE_4P_2X 5 ; src1, src2, tmp1, tmp2, zero + movd m%3, [outputq] + movd m%4, [outputq + strideq] + punpcklbw m%3, m%5 + punpcklbw m%4, m%5 + paddw m%1, m%3 + paddw m%2, m%4 + packuswb m%1, m%5 + packuswb m%2, m%5 + movd [outputq], m%1 + movd [outputq + strideq], m%2 +%endmacro + +INIT_XMM sse2 +cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride +%if CONFIG_VP9_HIGHBITDEPTH + mova m0, [inputq + 0] + packssdw m0, [inputq + 16] + mova m1, [inputq + 32] + packssdw m1, [inputq + 48] +%else + mova m0, [inputq + 0] + mova m1, [inputq + 16] +%endif + psraw m0, 2 + psraw m1, 2 + + TRANSPOSE_4X4_WIDE + REORDER_INPUTS + TRANSFORM_COLS + TRANSPOSE_4X4 + REORDER_INPUTS + TRANSFORM_COLS + + pxor m4, m4 + ADD_STORE_4P_2X 0, 1, 5, 6, 4 + lea outputq, [outputq + 2 * strideq] + ADD_STORE_4P_2X 2, 3, 5, 6, 4 + + RET diff --git a/thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c b/thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c new file mode 100644 index 00000000000..be1087c1e95 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c @@ -0,0 +1,979 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include /* AVX2 */ + +#include "./vpx_dsp_rtcd.h" +#include "vpx_ports/mem.h" + +void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { + __m128i mask, hev, flat, flat2; + const __m128i zero = _mm_set1_epi16(0); + const __m128i one = _mm_set1_epi8(1); + __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; + __m128i abs_p1p0; + + const __m128i thresh = _mm_broadcastb_epi8( + _mm_cvtsi32_si128((int) _thresh[0])); + const __m128i limit = _mm_broadcastb_epi8( + _mm_cvtsi32_si128((int) _limit[0])); + const __m128i blimit = _mm_broadcastb_epi8( + _mm_cvtsi32_si128((int) _blimit[0])); + + q4p4 = _mm_loadl_epi64((__m128i *) (s - 5 * p)); + q4p4 = _mm_castps_si128( + _mm_loadh_pi(_mm_castsi128_ps(q4p4), (__m64 *) (s + 4 * p))); + q3p3 = _mm_loadl_epi64((__m128i *) (s - 4 * p)); + q3p3 = _mm_castps_si128( + _mm_loadh_pi(_mm_castsi128_ps(q3p3), (__m64 *) (s + 3 * p))); + q2p2 = _mm_loadl_epi64((__m128i *) (s - 3 * p)); + q2p2 = _mm_castps_si128( + _mm_loadh_pi(_mm_castsi128_ps(q2p2), (__m64 *) (s + 2 * p))); + q1p1 = _mm_loadl_epi64((__m128i *) (s - 2 * p)); + q1p1 = _mm_castps_si128( + _mm_loadh_pi(_mm_castsi128_ps(q1p1), (__m64 *) (s + 1 * p))); + p1q1 = _mm_shuffle_epi32(q1p1, 78); + q0p0 = _mm_loadl_epi64((__m128i *) (s - 1 * p)); + q0p0 = _mm_castps_si128( + _mm_loadh_pi(_mm_castsi128_ps(q0p0), (__m64 *) (s - 0 * p))); + p0q0 = _mm_shuffle_epi32(q0p0, 78); + + { + __m128i abs_p1q1, abs_p0q0, abs_q1q0, fe, ff, work; + abs_p1p0 = _mm_or_si128(_mm_subs_epu8(q1p1, q0p0), + _mm_subs_epu8(q0p0, q1p1)); + abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); + fe = _mm_set1_epi8(0xfe); + ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); + abs_p0q0 = _mm_or_si128(_mm_subs_epu8(q0p0, p0q0), + _mm_subs_epu8(p0q0, q0p0)); + abs_p1q1 = _mm_or_si128(_mm_subs_epu8(q1p1, p1q1), + _mm_subs_epu8(p1q1, q1p1)); + flat = _mm_max_epu8(abs_p1p0, abs_q1q0); + hev = _mm_subs_epu8(flat, thresh); + hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); + + abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); + abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); + // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; + mask = _mm_max_epu8(abs_p1p0, mask); + // mask |= (abs(p1 - p0) > limit) * -1; + // mask |= (abs(q1 - q0) > limit) * -1; + + work = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(q2p2, q1p1), + _mm_subs_epu8(q1p1, q2p2)), + _mm_or_si128(_mm_subs_epu8(q3p3, q2p2), + _mm_subs_epu8(q2p2, q3p3))); + mask = _mm_max_epu8(work, mask); + mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); + mask = _mm_subs_epu8(mask, limit); + mask = _mm_cmpeq_epi8(mask, zero); + } + + // lp filter + { + const __m128i t4 = _mm_set1_epi8(4); + const __m128i t3 = _mm_set1_epi8(3); + const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i t1 = _mm_set1_epi16(0x1); + __m128i qs1ps1 = _mm_xor_si128(q1p1, t80); + __m128i qs0ps0 = _mm_xor_si128(q0p0, t80); + __m128i qs0 = _mm_xor_si128(p0q0, t80); + __m128i qs1 = _mm_xor_si128(p1q1, t80); + __m128i filt; + __m128i work_a; + __m128i filter1, filter2; + __m128i flat2_q6p6, flat2_q5p5, flat2_q4p4, flat2_q3p3, flat2_q2p2; + __m128i flat2_q1p1, flat2_q0p0, flat_q2p2, flat_q1p1, flat_q0p0; + + filt = _mm_and_si128(_mm_subs_epi8(qs1ps1, qs1), hev); + work_a = _mm_subs_epi8(qs0, qs0ps0); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + /* (vpx_filter + 3 * (qs0 - ps0)) & mask */ + filt = _mm_and_si128(filt, mask); + + filter1 = _mm_adds_epi8(filt, t4); + filter2 = _mm_adds_epi8(filt, t3); + + filter1 = _mm_unpacklo_epi8(zero, filter1); + filter1 = _mm_srai_epi16(filter1, 0xB); + filter2 = _mm_unpacklo_epi8(zero, filter2); + filter2 = _mm_srai_epi16(filter2, 0xB); + + /* Filter1 >> 3 */ + filt = _mm_packs_epi16(filter2, _mm_subs_epi16(zero, filter1)); + qs0ps0 = _mm_xor_si128(_mm_adds_epi8(qs0ps0, filt), t80); + + /* filt >> 1 */ + filt = _mm_adds_epi16(filter1, t1); + filt = _mm_srai_epi16(filt, 1); + filt = _mm_andnot_si128( + _mm_srai_epi16(_mm_unpacklo_epi8(zero, hev), 0x8), filt); + filt = _mm_packs_epi16(filt, _mm_subs_epi16(zero, filt)); + qs1ps1 = _mm_xor_si128(_mm_adds_epi8(qs1ps1, filt), t80); + // loopfilter done + + { + __m128i work; + flat = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(q2p2, q0p0), + _mm_subs_epu8(q0p0, q2p2)), + _mm_or_si128(_mm_subs_epu8(q3p3, q0p0), + _mm_subs_epu8(q0p0, q3p3))); + flat = _mm_max_epu8(abs_p1p0, flat); + flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); + flat = _mm_subs_epu8(flat, one); + flat = _mm_cmpeq_epi8(flat, zero); + flat = _mm_and_si128(flat, mask); + + q5p5 = _mm_loadl_epi64((__m128i *) (s - 6 * p)); + q5p5 = _mm_castps_si128( + _mm_loadh_pi(_mm_castsi128_ps(q5p5), + (__m64 *) (s + 5 * p))); + + q6p6 = _mm_loadl_epi64((__m128i *) (s - 7 * p)); + q6p6 = _mm_castps_si128( + _mm_loadh_pi(_mm_castsi128_ps(q6p6), + (__m64 *) (s + 6 * p))); + + flat2 = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(q4p4, q0p0), + _mm_subs_epu8(q0p0, q4p4)), + _mm_or_si128(_mm_subs_epu8(q5p5, q0p0), + _mm_subs_epu8(q0p0, q5p5))); + + q7p7 = _mm_loadl_epi64((__m128i *) (s - 8 * p)); + q7p7 = _mm_castps_si128( + _mm_loadh_pi(_mm_castsi128_ps(q7p7), + (__m64 *) (s + 7 * p))); + + work = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(q6p6, q0p0), + _mm_subs_epu8(q0p0, q6p6)), + _mm_or_si128(_mm_subs_epu8(q7p7, q0p0), + _mm_subs_epu8(q0p0, q7p7))); + + flat2 = _mm_max_epu8(work, flat2); + flat2 = _mm_max_epu8(flat2, _mm_srli_si128(flat2, 8)); + flat2 = _mm_subs_epu8(flat2, one); + flat2 = _mm_cmpeq_epi8(flat2, zero); + flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask + } + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // flat and wide flat calculations + { + const __m128i eight = _mm_set1_epi16(8); + const __m128i four = _mm_set1_epi16(4); + __m128i p7_16, p6_16, p5_16, p4_16, p3_16, p2_16, p1_16, p0_16; + __m128i q7_16, q6_16, q5_16, q4_16, q3_16, q2_16, q1_16, q0_16; + __m128i pixelFilter_p, pixelFilter_q; + __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0; + __m128i sum_p7, sum_q7, sum_p3, sum_q3, res_p, res_q; + + p7_16 = _mm_unpacklo_epi8(q7p7, zero); + p6_16 = _mm_unpacklo_epi8(q6p6, zero); + p5_16 = _mm_unpacklo_epi8(q5p5, zero); + p4_16 = _mm_unpacklo_epi8(q4p4, zero); + p3_16 = _mm_unpacklo_epi8(q3p3, zero); + p2_16 = _mm_unpacklo_epi8(q2p2, zero); + p1_16 = _mm_unpacklo_epi8(q1p1, zero); + p0_16 = _mm_unpacklo_epi8(q0p0, zero); + q0_16 = _mm_unpackhi_epi8(q0p0, zero); + q1_16 = _mm_unpackhi_epi8(q1p1, zero); + q2_16 = _mm_unpackhi_epi8(q2p2, zero); + q3_16 = _mm_unpackhi_epi8(q3p3, zero); + q4_16 = _mm_unpackhi_epi8(q4p4, zero); + q5_16 = _mm_unpackhi_epi8(q5p5, zero); + q6_16 = _mm_unpackhi_epi8(q6p6, zero); + q7_16 = _mm_unpackhi_epi8(q7p7, zero); + + pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6_16, p5_16), + _mm_add_epi16(p4_16, p3_16)); + pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6_16, q5_16), + _mm_add_epi16(q4_16, q3_16)); + + pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, + _mm_add_epi16(p2_16, p1_16)); + pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); + + pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, + _mm_add_epi16(q2_16, q1_16)); + pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); + pixelFilter_p = _mm_add_epi16(eight, + _mm_add_epi16(pixelFilter_p, pixelFilter_q)); + pixetFilter_p2p1p0 = _mm_add_epi16(four, + _mm_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); + res_p = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_p, _mm_add_epi16(p7_16, p0_16)), + 4); + res_q = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_p, _mm_add_epi16(q7_16, q0_16)), + 4); + flat2_q0p0 = _mm_packus_epi16(res_p, res_q); + res_p = _mm_srli_epi16( + _mm_add_epi16(pixetFilter_p2p1p0, + _mm_add_epi16(p3_16, p0_16)), 3); + res_q = _mm_srli_epi16( + _mm_add_epi16(pixetFilter_p2p1p0, + _mm_add_epi16(q3_16, q0_16)), 3); + + flat_q0p0 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(p7_16, p7_16); + sum_q7 = _mm_add_epi16(q7_16, q7_16); + sum_p3 = _mm_add_epi16(p3_16, p3_16); + sum_q3 = _mm_add_epi16(q3_16, q3_16); + + pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p6_16); + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q6_16); + res_p = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p1_16)), + 4); + res_q = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q1_16)), + 4); + flat2_q1p1 = _mm_packus_epi16(res_p, res_q); + + pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2_16); + pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2_16); + res_p = _mm_srli_epi16( + _mm_add_epi16(pixetFilter_p2p1p0, + _mm_add_epi16(sum_p3, p1_16)), 3); + res_q = _mm_srli_epi16( + _mm_add_epi16(pixetFilter_q2q1q0, + _mm_add_epi16(sum_q3, q1_16)), 3); + flat_q1p1 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(sum_p7, p7_16); + sum_q7 = _mm_add_epi16(sum_q7, q7_16); + sum_p3 = _mm_add_epi16(sum_p3, p3_16); + sum_q3 = _mm_add_epi16(sum_q3, q3_16); + + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5_16); + pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p5_16); + res_p = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p2_16)), + 4); + res_q = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q2_16)), + 4); + flat2_q2p2 = _mm_packus_epi16(res_p, res_q); + + pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1_16); + pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1_16); + + res_p = _mm_srli_epi16( + _mm_add_epi16(pixetFilter_p2p1p0, + _mm_add_epi16(sum_p3, p2_16)), 3); + res_q = _mm_srli_epi16( + _mm_add_epi16(pixetFilter_q2q1q0, + _mm_add_epi16(sum_q3, q2_16)), 3); + flat_q2p2 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(sum_p7, p7_16); + sum_q7 = _mm_add_epi16(sum_q7, q7_16); + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4_16); + pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4_16); + res_p = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p3_16)), + 4); + res_q = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q3_16)), + 4); + flat2_q3p3 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(sum_p7, p7_16); + sum_q7 = _mm_add_epi16(sum_q7, q7_16); + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3_16); + pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3_16); + res_p = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p4_16)), + 4); + res_q = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q4_16)), + 4); + flat2_q4p4 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(sum_p7, p7_16); + sum_q7 = _mm_add_epi16(sum_q7, q7_16); + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2_16); + pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2_16); + res_p = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p5_16)), + 4); + res_q = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q5_16)), + 4); + flat2_q5p5 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(sum_p7, p7_16); + sum_q7 = _mm_add_epi16(sum_q7, q7_16); + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1_16); + pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1_16); + res_p = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p6_16)), + 4); + res_q = _mm_srli_epi16( + _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q6_16)), + 4); + flat2_q6p6 = _mm_packus_epi16(res_p, res_q); + } + // wide flat + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + flat = _mm_shuffle_epi32(flat, 68); + flat2 = _mm_shuffle_epi32(flat2, 68); + + q2p2 = _mm_andnot_si128(flat, q2p2); + flat_q2p2 = _mm_and_si128(flat, flat_q2p2); + q2p2 = _mm_or_si128(q2p2, flat_q2p2); + + qs1ps1 = _mm_andnot_si128(flat, qs1ps1); + flat_q1p1 = _mm_and_si128(flat, flat_q1p1); + q1p1 = _mm_or_si128(qs1ps1, flat_q1p1); + + qs0ps0 = _mm_andnot_si128(flat, qs0ps0); + flat_q0p0 = _mm_and_si128(flat, flat_q0p0); + q0p0 = _mm_or_si128(qs0ps0, flat_q0p0); + + q6p6 = _mm_andnot_si128(flat2, q6p6); + flat2_q6p6 = _mm_and_si128(flat2, flat2_q6p6); + q6p6 = _mm_or_si128(q6p6, flat2_q6p6); + _mm_storel_epi64((__m128i *) (s - 7 * p), q6p6); + _mm_storeh_pi((__m64 *) (s + 6 * p), _mm_castsi128_ps(q6p6)); + + q5p5 = _mm_andnot_si128(flat2, q5p5); + flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5); + q5p5 = _mm_or_si128(q5p5, flat2_q5p5); + _mm_storel_epi64((__m128i *) (s - 6 * p), q5p5); + _mm_storeh_pi((__m64 *) (s + 5 * p), _mm_castsi128_ps(q5p5)); + + q4p4 = _mm_andnot_si128(flat2, q4p4); + flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4); + q4p4 = _mm_or_si128(q4p4, flat2_q4p4); + _mm_storel_epi64((__m128i *) (s - 5 * p), q4p4); + _mm_storeh_pi((__m64 *) (s + 4 * p), _mm_castsi128_ps(q4p4)); + + q3p3 = _mm_andnot_si128(flat2, q3p3); + flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3); + q3p3 = _mm_or_si128(q3p3, flat2_q3p3); + _mm_storel_epi64((__m128i *) (s - 4 * p), q3p3); + _mm_storeh_pi((__m64 *) (s + 3 * p), _mm_castsi128_ps(q3p3)); + + q2p2 = _mm_andnot_si128(flat2, q2p2); + flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2); + q2p2 = _mm_or_si128(q2p2, flat2_q2p2); + _mm_storel_epi64((__m128i *) (s - 3 * p), q2p2); + _mm_storeh_pi((__m64 *) (s + 2 * p), _mm_castsi128_ps(q2p2)); + + q1p1 = _mm_andnot_si128(flat2, q1p1); + flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1); + q1p1 = _mm_or_si128(q1p1, flat2_q1p1); + _mm_storel_epi64((__m128i *) (s - 2 * p), q1p1); + _mm_storeh_pi((__m64 *) (s + 1 * p), _mm_castsi128_ps(q1p1)); + + q0p0 = _mm_andnot_si128(flat2, q0p0); + flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0); + q0p0 = _mm_or_si128(q0p0, flat2_q0p0); + _mm_storel_epi64((__m128i *) (s - 1 * p), q0p0); + _mm_storeh_pi((__m64 *) (s - 0 * p), _mm_castsi128_ps(q0p0)); + } +} + +DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = { + 0, 128, 1, 128, 2, 128, 3, 128, 4, 128, 5, 128, 6, 128, 7, 128, + 8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128 +}; + +void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { + __m128i mask, hev, flat, flat2; + const __m128i zero = _mm_set1_epi16(0); + const __m128i one = _mm_set1_epi8(1); + __m128i p7, p6, p5; + __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; + __m128i q5, q6, q7; + __m256i p256_7, q256_7, p256_6, q256_6, p256_5, q256_5, p256_4, + q256_4, p256_3, q256_3, p256_2, q256_2, p256_1, q256_1, + p256_0, q256_0; + + const __m128i thresh = _mm_broadcastb_epi8( + _mm_cvtsi32_si128((int) _thresh[0])); + const __m128i limit = _mm_broadcastb_epi8( + _mm_cvtsi32_si128((int) _limit[0])); + const __m128i blimit = _mm_broadcastb_epi8( + _mm_cvtsi32_si128((int) _blimit[0])); + + p256_4 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 5 * p))); + p256_3 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 4 * p))); + p256_2 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 3 * p))); + p256_1 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 2 * p))); + p256_0 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 1 * p))); + q256_0 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 0 * p))); + q256_1 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 1 * p))); + q256_2 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 2 * p))); + q256_3 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 3 * p))); + q256_4 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 4 * p))); + + p4 = _mm256_castsi256_si128(p256_4); + p3 = _mm256_castsi256_si128(p256_3); + p2 = _mm256_castsi256_si128(p256_2); + p1 = _mm256_castsi256_si128(p256_1); + p0 = _mm256_castsi256_si128(p256_0); + q0 = _mm256_castsi256_si128(q256_0); + q1 = _mm256_castsi256_si128(q256_1); + q2 = _mm256_castsi256_si128(q256_2); + q3 = _mm256_castsi256_si128(q256_3); + q4 = _mm256_castsi256_si128(q256_4); + + { + const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), + _mm_subs_epu8(p0, p1)); + const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), + _mm_subs_epu8(q0, q1)); + const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); + __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), + _mm_subs_epu8(q0, p0)); + __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), + _mm_subs_epu8(q1, p1)); + __m128i work; + flat = _mm_max_epu8(abs_p1p0, abs_q1q0); + hev = _mm_subs_epu8(flat, thresh); + hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); + + abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); + abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); + // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; + mask = _mm_max_epu8(flat, mask); + // mask |= (abs(p1 - p0) > limit) * -1; + // mask |= (abs(q1 - q0) > limit) * -1; + work = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(p2, p1), _mm_subs_epu8(p1, p2)), + _mm_or_si128(_mm_subs_epu8(p3, p2), _mm_subs_epu8(p2, p3))); + mask = _mm_max_epu8(work, mask); + work = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(q2, q1), _mm_subs_epu8(q1, q2)), + _mm_or_si128(_mm_subs_epu8(q3, q2), _mm_subs_epu8(q2, q3))); + mask = _mm_max_epu8(work, mask); + mask = _mm_subs_epu8(mask, limit); + mask = _mm_cmpeq_epi8(mask, zero); + } + + // lp filter + { + const __m128i t4 = _mm_set1_epi8(4); + const __m128i t3 = _mm_set1_epi8(3); + const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i te0 = _mm_set1_epi8(0xe0); + const __m128i t1f = _mm_set1_epi8(0x1f); + const __m128i t1 = _mm_set1_epi8(0x1); + const __m128i t7f = _mm_set1_epi8(0x7f); + + __m128i ps1 = _mm_xor_si128(p1, t80); + __m128i ps0 = _mm_xor_si128(p0, t80); + __m128i qs0 = _mm_xor_si128(q0, t80); + __m128i qs1 = _mm_xor_si128(q1, t80); + __m128i filt; + __m128i work_a; + __m128i filter1, filter2; + __m128i flat2_p6, flat2_p5, flat2_p4, flat2_p3, flat2_p2, flat2_p1, + flat2_p0, flat2_q0, flat2_q1, flat2_q2, flat2_q3, flat2_q4, + flat2_q5, flat2_q6, flat_p2, flat_p1, flat_p0, flat_q0, flat_q1, + flat_q2; + + filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); + work_a = _mm_subs_epi8(qs0, ps0); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + /* (vpx_filter + 3 * (qs0 - ps0)) & mask */ + filt = _mm_and_si128(filt, mask); + + filter1 = _mm_adds_epi8(filt, t4); + filter2 = _mm_adds_epi8(filt, t3); + + /* Filter1 >> 3 */ + work_a = _mm_cmpgt_epi8(zero, filter1); + filter1 = _mm_srli_epi16(filter1, 3); + work_a = _mm_and_si128(work_a, te0); + filter1 = _mm_and_si128(filter1, t1f); + filter1 = _mm_or_si128(filter1, work_a); + qs0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); + + /* Filter2 >> 3 */ + work_a = _mm_cmpgt_epi8(zero, filter2); + filter2 = _mm_srli_epi16(filter2, 3); + work_a = _mm_and_si128(work_a, te0); + filter2 = _mm_and_si128(filter2, t1f); + filter2 = _mm_or_si128(filter2, work_a); + ps0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); + + /* filt >> 1 */ + filt = _mm_adds_epi8(filter1, t1); + work_a = _mm_cmpgt_epi8(zero, filt); + filt = _mm_srli_epi16(filt, 1); + work_a = _mm_and_si128(work_a, t80); + filt = _mm_and_si128(filt, t7f); + filt = _mm_or_si128(filt, work_a); + filt = _mm_andnot_si128(hev, filt); + ps1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); + qs1 = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); + // loopfilter done + + { + __m128i work; + work = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(p2, p0), _mm_subs_epu8(p0, p2)), + _mm_or_si128(_mm_subs_epu8(q2, q0), _mm_subs_epu8(q0, q2))); + flat = _mm_max_epu8(work, flat); + work = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(p3, p0), _mm_subs_epu8(p0, p3)), + _mm_or_si128(_mm_subs_epu8(q3, q0), _mm_subs_epu8(q0, q3))); + flat = _mm_max_epu8(work, flat); + work = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(p4, p0), _mm_subs_epu8(p0, p4)), + _mm_or_si128(_mm_subs_epu8(q4, q0), _mm_subs_epu8(q0, q4))); + flat = _mm_subs_epu8(flat, one); + flat = _mm_cmpeq_epi8(flat, zero); + flat = _mm_and_si128(flat, mask); + + p256_5 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 6 * p))); + q256_5 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 5 * p))); + p5 = _mm256_castsi256_si128(p256_5); + q5 = _mm256_castsi256_si128(q256_5); + flat2 = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(p5, p0), _mm_subs_epu8(p0, p5)), + _mm_or_si128(_mm_subs_epu8(q5, q0), _mm_subs_epu8(q0, q5))); + + flat2 = _mm_max_epu8(work, flat2); + p256_6 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 7 * p))); + q256_6 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 6 * p))); + p6 = _mm256_castsi256_si128(p256_6); + q6 = _mm256_castsi256_si128(q256_6); + work = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(p6, p0), _mm_subs_epu8(p0, p6)), + _mm_or_si128(_mm_subs_epu8(q6, q0), _mm_subs_epu8(q0, q6))); + + flat2 = _mm_max_epu8(work, flat2); + + p256_7 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s - 8 * p))); + q256_7 = _mm256_castpd_si256(_mm256_broadcast_pd( + (__m128d const *)(s + 7 * p))); + p7 = _mm256_castsi256_si128(p256_7); + q7 = _mm256_castsi256_si128(q256_7); + work = _mm_max_epu8( + _mm_or_si128(_mm_subs_epu8(p7, p0), _mm_subs_epu8(p0, p7)), + _mm_or_si128(_mm_subs_epu8(q7, q0), _mm_subs_epu8(q0, q7))); + + flat2 = _mm_max_epu8(work, flat2); + flat2 = _mm_subs_epu8(flat2, one); + flat2 = _mm_cmpeq_epi8(flat2, zero); + flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask + } + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // flat and wide flat calculations + { + const __m256i eight = _mm256_set1_epi16(8); + const __m256i four = _mm256_set1_epi16(4); + __m256i pixelFilter_p, pixelFilter_q, pixetFilter_p2p1p0, + pixetFilter_q2q1q0, sum_p7, sum_q7, sum_p3, sum_q3, res_p, + res_q; + + const __m256i filter = _mm256_load_si256( + (__m256i const *)filt_loopfilter_avx2); + p256_7 = _mm256_shuffle_epi8(p256_7, filter); + p256_6 = _mm256_shuffle_epi8(p256_6, filter); + p256_5 = _mm256_shuffle_epi8(p256_5, filter); + p256_4 = _mm256_shuffle_epi8(p256_4, filter); + p256_3 = _mm256_shuffle_epi8(p256_3, filter); + p256_2 = _mm256_shuffle_epi8(p256_2, filter); + p256_1 = _mm256_shuffle_epi8(p256_1, filter); + p256_0 = _mm256_shuffle_epi8(p256_0, filter); + q256_0 = _mm256_shuffle_epi8(q256_0, filter); + q256_1 = _mm256_shuffle_epi8(q256_1, filter); + q256_2 = _mm256_shuffle_epi8(q256_2, filter); + q256_3 = _mm256_shuffle_epi8(q256_3, filter); + q256_4 = _mm256_shuffle_epi8(q256_4, filter); + q256_5 = _mm256_shuffle_epi8(q256_5, filter); + q256_6 = _mm256_shuffle_epi8(q256_6, filter); + q256_7 = _mm256_shuffle_epi8(q256_7, filter); + + pixelFilter_p = _mm256_add_epi16(_mm256_add_epi16(p256_6, p256_5), + _mm256_add_epi16(p256_4, p256_3)); + pixelFilter_q = _mm256_add_epi16(_mm256_add_epi16(q256_6, q256_5), + _mm256_add_epi16(q256_4, q256_3)); + + pixetFilter_p2p1p0 = _mm256_add_epi16(p256_0, + _mm256_add_epi16(p256_2, p256_1)); + pixelFilter_p = _mm256_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); + + pixetFilter_q2q1q0 = _mm256_add_epi16(q256_0, + _mm256_add_epi16(q256_2, q256_1)); + pixelFilter_q = _mm256_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); + + pixelFilter_p = _mm256_add_epi16(eight, + _mm256_add_epi16(pixelFilter_p, pixelFilter_q)); + + pixetFilter_p2p1p0 = _mm256_add_epi16(four, + _mm256_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); + + res_p = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_p, + _mm256_add_epi16(p256_7, p256_0)), 4); + + flat2_p0 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), + 168)); + + res_q = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_p, + _mm256_add_epi16(q256_7, q256_0)), 4); + + flat2_q0 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), + 168)); + + res_p = _mm256_srli_epi16( + _mm256_add_epi16(pixetFilter_p2p1p0, + _mm256_add_epi16(p256_3, p256_0)), 3); + + flat_p0 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), + 168)); + + res_q = _mm256_srli_epi16( + _mm256_add_epi16(pixetFilter_p2p1p0, + _mm256_add_epi16(q256_3, q256_0)), 3); + + flat_q0 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), + 168)); + + sum_p7 = _mm256_add_epi16(p256_7, p256_7); + + sum_q7 = _mm256_add_epi16(q256_7, q256_7); + + sum_p3 = _mm256_add_epi16(p256_3, p256_3); + + sum_q3 = _mm256_add_epi16(q256_3, q256_3); + + pixelFilter_q = _mm256_sub_epi16(pixelFilter_p, p256_6); + + pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_6); + + res_p = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_p, + _mm256_add_epi16(sum_p7, p256_1)), 4); + + flat2_p1 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), + 168)); + + res_q = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_q, + _mm256_add_epi16(sum_q7, q256_1)), 4); + + flat2_q1 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), + 168)); + + pixetFilter_q2q1q0 = _mm256_sub_epi16(pixetFilter_p2p1p0, p256_2); + + pixetFilter_p2p1p0 = _mm256_sub_epi16(pixetFilter_p2p1p0, q256_2); + + res_p = _mm256_srli_epi16( + _mm256_add_epi16(pixetFilter_p2p1p0, + _mm256_add_epi16(sum_p3, p256_1)), 3); + + flat_p1 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), + 168)); + + res_q = _mm256_srli_epi16( + _mm256_add_epi16(pixetFilter_q2q1q0, + _mm256_add_epi16(sum_q3, q256_1)), 3); + + flat_q1 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), + 168)); + + sum_p7 = _mm256_add_epi16(sum_p7, p256_7); + + sum_q7 = _mm256_add_epi16(sum_q7, q256_7); + + sum_p3 = _mm256_add_epi16(sum_p3, p256_3); + + sum_q3 = _mm256_add_epi16(sum_q3, q256_3); + + pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_5); + + pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_5); + + res_p = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_p, + _mm256_add_epi16(sum_p7, p256_2)), 4); + + flat2_p2 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), + 168)); + + res_q = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_q, + _mm256_add_epi16(sum_q7, q256_2)), 4); + + flat2_q2 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), + 168)); + + pixetFilter_p2p1p0 = _mm256_sub_epi16(pixetFilter_p2p1p0, q256_1); + + pixetFilter_q2q1q0 = _mm256_sub_epi16(pixetFilter_q2q1q0, p256_1); + + res_p = _mm256_srli_epi16( + _mm256_add_epi16(pixetFilter_p2p1p0, + _mm256_add_epi16(sum_p3, p256_2)), 3); + + flat_p2 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), + 168)); + + res_q = _mm256_srli_epi16( + _mm256_add_epi16(pixetFilter_q2q1q0, + _mm256_add_epi16(sum_q3, q256_2)), 3); + + flat_q2 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), + 168)); + + sum_p7 = _mm256_add_epi16(sum_p7, p256_7); + + sum_q7 = _mm256_add_epi16(sum_q7, q256_7); + + pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_4); + + pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_4); + + res_p = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_p, + _mm256_add_epi16(sum_p7, p256_3)), 4); + + flat2_p3 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), + 168)); + + res_q = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_q, + _mm256_add_epi16(sum_q7, q256_3)), 4); + + flat2_q3 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), + 168)); + + sum_p7 = _mm256_add_epi16(sum_p7, p256_7); + + sum_q7 = _mm256_add_epi16(sum_q7, q256_7); + + pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_3); + + pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_3); + + res_p = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_p, + _mm256_add_epi16(sum_p7, p256_4)), 4); + + flat2_p4 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), + 168)); + + res_q = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_q, + _mm256_add_epi16(sum_q7, q256_4)), 4); + + flat2_q4 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), + 168)); + + sum_p7 = _mm256_add_epi16(sum_p7, p256_7); + + sum_q7 = _mm256_add_epi16(sum_q7, q256_7); + + pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_2); + + pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_2); + + res_p = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_p, + _mm256_add_epi16(sum_p7, p256_5)), 4); + + flat2_p5 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), + 168)); + + res_q = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_q, + _mm256_add_epi16(sum_q7, q256_5)), 4); + + flat2_q5 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), + 168)); + + sum_p7 = _mm256_add_epi16(sum_p7, p256_7); + + sum_q7 = _mm256_add_epi16(sum_q7, q256_7); + + pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_1); + + pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_1); + + res_p = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_p, + _mm256_add_epi16(sum_p7, p256_6)), 4); + + flat2_p6 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), + 168)); + + res_q = _mm256_srli_epi16( + _mm256_add_epi16(pixelFilter_q, + _mm256_add_epi16(sum_q7, q256_6)), 4); + + flat2_q6 = _mm256_castsi256_si128( + _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), + 168)); + } + + // wide flat + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + p2 = _mm_andnot_si128(flat, p2); + flat_p2 = _mm_and_si128(flat, flat_p2); + p2 = _mm_or_si128(flat_p2, p2); + + p1 = _mm_andnot_si128(flat, ps1); + flat_p1 = _mm_and_si128(flat, flat_p1); + p1 = _mm_or_si128(flat_p1, p1); + + p0 = _mm_andnot_si128(flat, ps0); + flat_p0 = _mm_and_si128(flat, flat_p0); + p0 = _mm_or_si128(flat_p0, p0); + + q0 = _mm_andnot_si128(flat, qs0); + flat_q0 = _mm_and_si128(flat, flat_q0); + q0 = _mm_or_si128(flat_q0, q0); + + q1 = _mm_andnot_si128(flat, qs1); + flat_q1 = _mm_and_si128(flat, flat_q1); + q1 = _mm_or_si128(flat_q1, q1); + + q2 = _mm_andnot_si128(flat, q2); + flat_q2 = _mm_and_si128(flat, flat_q2); + q2 = _mm_or_si128(flat_q2, q2); + + p6 = _mm_andnot_si128(flat2, p6); + flat2_p6 = _mm_and_si128(flat2, flat2_p6); + p6 = _mm_or_si128(flat2_p6, p6); + _mm_storeu_si128((__m128i *) (s - 7 * p), p6); + + p5 = _mm_andnot_si128(flat2, p5); + flat2_p5 = _mm_and_si128(flat2, flat2_p5); + p5 = _mm_or_si128(flat2_p5, p5); + _mm_storeu_si128((__m128i *) (s - 6 * p), p5); + + p4 = _mm_andnot_si128(flat2, p4); + flat2_p4 = _mm_and_si128(flat2, flat2_p4); + p4 = _mm_or_si128(flat2_p4, p4); + _mm_storeu_si128((__m128i *) (s - 5 * p), p4); + + p3 = _mm_andnot_si128(flat2, p3); + flat2_p3 = _mm_and_si128(flat2, flat2_p3); + p3 = _mm_or_si128(flat2_p3, p3); + _mm_storeu_si128((__m128i *) (s - 4 * p), p3); + + p2 = _mm_andnot_si128(flat2, p2); + flat2_p2 = _mm_and_si128(flat2, flat2_p2); + p2 = _mm_or_si128(flat2_p2, p2); + _mm_storeu_si128((__m128i *) (s - 3 * p), p2); + + p1 = _mm_andnot_si128(flat2, p1); + flat2_p1 = _mm_and_si128(flat2, flat2_p1); + p1 = _mm_or_si128(flat2_p1, p1); + _mm_storeu_si128((__m128i *) (s - 2 * p), p1); + + p0 = _mm_andnot_si128(flat2, p0); + flat2_p0 = _mm_and_si128(flat2, flat2_p0); + p0 = _mm_or_si128(flat2_p0, p0); + _mm_storeu_si128((__m128i *) (s - 1 * p), p0); + + q0 = _mm_andnot_si128(flat2, q0); + flat2_q0 = _mm_and_si128(flat2, flat2_q0); + q0 = _mm_or_si128(flat2_q0, q0); + _mm_storeu_si128((__m128i *) (s - 0 * p), q0); + + q1 = _mm_andnot_si128(flat2, q1); + flat2_q1 = _mm_and_si128(flat2, flat2_q1); + q1 = _mm_or_si128(flat2_q1, q1); + _mm_storeu_si128((__m128i *) (s + 1 * p), q1); + + q2 = _mm_andnot_si128(flat2, q2); + flat2_q2 = _mm_and_si128(flat2, flat2_q2); + q2 = _mm_or_si128(flat2_q2, q2); + _mm_storeu_si128((__m128i *) (s + 2 * p), q2); + + q3 = _mm_andnot_si128(flat2, q3); + flat2_q3 = _mm_and_si128(flat2, flat2_q3); + q3 = _mm_or_si128(flat2_q3, q3); + _mm_storeu_si128((__m128i *) (s + 3 * p), q3); + + q4 = _mm_andnot_si128(flat2, q4); + flat2_q4 = _mm_and_si128(flat2, flat2_q4); + q4 = _mm_or_si128(flat2_q4, q4); + _mm_storeu_si128((__m128i *) (s + 4 * p), q4); + + q5 = _mm_andnot_si128(flat2, q5); + flat2_q5 = _mm_and_si128(flat2, flat2_q5); + q5 = _mm_or_si128(flat2_q5, q5); + _mm_storeu_si128((__m128i *) (s + 5 * p), q5); + + q6 = _mm_andnot_si128(flat2, q6); + flat2_q6 = _mm_and_si128(flat2, flat2_q6); + q6 = _mm_or_si128(flat2_q6, q6); + _mm_storeu_si128((__m128i *) (s + 6 * p), q6); + } +} diff --git a/thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c b/thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c new file mode 100644 index 00000000000..739adf31d06 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c @@ -0,0 +1,1776 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include // SSE2 + +#include "./vpx_dsp_rtcd.h" +#include "vpx_ports/mem.h" +#include "vpx_ports/emmintrin_compat.h" + +static INLINE __m128i abs_diff(__m128i a, __m128i b) { + return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a)); +} + +// filter_mask and hev_mask +#define FILTER_HEV_MASK do { \ + /* (abs(q1 - q0), abs(p1 - p0) */ \ + __m128i flat = abs_diff(q1p1, q0p0); \ + /* abs(p1 - q1), abs(p0 - q0) */ \ + const __m128i abs_p1q1p0q0 = abs_diff(p1p0, q1q0); \ + __m128i abs_p0q0, abs_p1q1, work; \ + \ + /* const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); */ \ + hev = _mm_unpacklo_epi8(_mm_max_epu8(flat, _mm_srli_si128(flat, 8)), zero); \ + hev = _mm_cmpgt_epi16(hev, thresh); \ + hev = _mm_packs_epi16(hev, hev); \ + \ + /* const int8_t mask = filter_mask(*limit, *blimit, */ \ + /* p3, p2, p1, p0, q0, q1, q2, q3); */ \ + abs_p0q0 = _mm_adds_epu8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p0 - q0) * 2 */\ + abs_p1q1 = _mm_unpackhi_epi8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p1 - q1) */\ + abs_p1q1 = _mm_srli_epi16(abs_p1q1, 9); \ + abs_p1q1 = _mm_packs_epi16(abs_p1q1, abs_p1q1); /* abs(p1 - q1) / 2 */ \ + /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */ \ + mask = _mm_adds_epu8(abs_p0q0, abs_p1q1); \ + /* abs(p3 - p2), abs(p2 - p1) */ \ + work = abs_diff(p3p2, p2p1); \ + flat = _mm_max_epu8(work, flat); \ + /* abs(q3 - q2), abs(q2 - q1) */ \ + work = abs_diff(q3q2, q2q1); \ + flat = _mm_max_epu8(work, flat); \ + flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); \ + mask = _mm_unpacklo_epi64(mask, flat); \ + mask = _mm_subs_epu8(mask, limit); \ + mask = _mm_cmpeq_epi8(mask, zero); \ + mask = _mm_and_si128(mask, _mm_srli_si128(mask, 8)); \ +} while (0) + +#define FILTER4 do { \ + const __m128i t3t4 = _mm_set_epi8(3, 3, 3, 3, 3, 3, 3, 3, \ + 4, 4, 4, 4, 4, 4, 4, 4); \ + const __m128i t80 = _mm_set1_epi8(0x80); \ + __m128i filter, filter2filter1, work; \ + \ + ps1ps0 = _mm_xor_si128(p1p0, t80); /* ^ 0x80 */ \ + qs1qs0 = _mm_xor_si128(q1q0, t80); \ + \ + /* int8_t filter = signed_char_clamp(ps1 - qs1) & hev; */ \ + work = _mm_subs_epi8(ps1ps0, qs1qs0); \ + filter = _mm_and_si128(_mm_srli_si128(work, 8), hev); \ + /* filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; */ \ + filter = _mm_subs_epi8(filter, work); \ + filter = _mm_subs_epi8(filter, work); \ + filter = _mm_subs_epi8(filter, work); /* + 3 * (qs0 - ps0) */ \ + filter = _mm_and_si128(filter, mask); /* & mask */ \ + filter = _mm_unpacklo_epi64(filter, filter); \ + \ + /* filter1 = signed_char_clamp(filter + 4) >> 3; */ \ + /* filter2 = signed_char_clamp(filter + 3) >> 3; */ \ + filter2filter1 = _mm_adds_epi8(filter, t3t4); /* signed_char_clamp */ \ + filter = _mm_unpackhi_epi8(filter2filter1, filter2filter1); \ + filter2filter1 = _mm_unpacklo_epi8(filter2filter1, filter2filter1); \ + filter2filter1 = _mm_srai_epi16(filter2filter1, 11); /* >> 3 */ \ + filter = _mm_srai_epi16(filter, 11); /* >> 3 */ \ + filter2filter1 = _mm_packs_epi16(filter2filter1, filter); \ + \ + /* filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; */ \ + filter = _mm_subs_epi8(filter2filter1, ff); /* + 1 */ \ + filter = _mm_unpacklo_epi8(filter, filter); \ + filter = _mm_srai_epi16(filter, 9); /* round */ \ + filter = _mm_packs_epi16(filter, filter); \ + filter = _mm_andnot_si128(hev, filter); \ + \ + hev = _mm_unpackhi_epi64(filter2filter1, filter); \ + filter2filter1 = _mm_unpacklo_epi64(filter2filter1, filter); \ + \ + /* signed_char_clamp(qs1 - filter), signed_char_clamp(qs0 - filter1) */ \ + qs1qs0 = _mm_subs_epi8(qs1qs0, filter2filter1); \ + /* signed_char_clamp(ps1 + filter), signed_char_clamp(ps0 + filter2) */ \ + ps1ps0 = _mm_adds_epi8(ps1ps0, hev); \ + qs1qs0 = _mm_xor_si128(qs1qs0, t80); /* ^ 0x80 */ \ + ps1ps0 = _mm_xor_si128(ps1ps0, t80); /* ^ 0x80 */ \ +} while (0) + +void vpx_lpf_horizontal_4_sse2(uint8_t *s, int p /* pitch */, + const uint8_t *_blimit, const uint8_t *_limit, + const uint8_t *_thresh) { + const __m128i zero = _mm_set1_epi16(0); + const __m128i limit = + _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit), + _mm_loadl_epi64((const __m128i *)_limit)); + const __m128i thresh = + _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero); + const __m128i ff = _mm_cmpeq_epi8(zero, zero); + __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; + __m128i mask, hev; + + p3p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)), + _mm_loadl_epi64((__m128i *)(s - 4 * p))); + q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)), + _mm_loadl_epi64((__m128i *)(s + 1 * p))); + q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)), + _mm_loadl_epi64((__m128i *)(s + 0 * p))); + q3q2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s + 2 * p)), + _mm_loadl_epi64((__m128i *)(s + 3 * p))); + p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); + p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); + q1q0 = _mm_unpackhi_epi64(q0p0, q1p1); + q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2); + + FILTER_HEV_MASK; + FILTER4; + + _mm_storeh_pi((__m64 *)(s - 2 * p), _mm_castsi128_ps(ps1ps0)); // *op1 + _mm_storel_epi64((__m128i *)(s - 1 * p), ps1ps0); // *op0 + _mm_storel_epi64((__m128i *)(s + 0 * p), qs1qs0); // *oq0 + _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(qs1qs0)); // *oq1 +} + +void vpx_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */, + const uint8_t *_blimit, const uint8_t *_limit, + const uint8_t *_thresh) { + const __m128i zero = _mm_set1_epi16(0); + const __m128i limit = + _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit), + _mm_loadl_epi64((const __m128i *)_limit)); + const __m128i thresh = + _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero); + const __m128i ff = _mm_cmpeq_epi8(zero, zero); + __m128i x0, x1, x2, x3; + __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; + __m128i mask, hev; + + // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 + q1q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 0 * p - 4)), + _mm_loadl_epi64((__m128i *)(s + 1 * p - 4))); + + // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 + x1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 2 * p - 4)), + _mm_loadl_epi64((__m128i *)(s + 3 * p - 4))); + + // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 + x2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 4 * p - 4)), + _mm_loadl_epi64((__m128i *)(s + 5 * p - 4))); + + // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 + x3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 6 * p - 4)), + _mm_loadl_epi64((__m128i *)(s + 7 * p - 4))); + + // Transpose 8x8 + // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + p1p0 = _mm_unpacklo_epi16(q1q0, x1); + // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 + x0 = _mm_unpacklo_epi16(x2, x3); + // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 + p3p2 = _mm_unpacklo_epi32(p1p0, x0); + // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 + p1p0 = _mm_unpackhi_epi32(p1p0, x0); + p3p2 = _mm_unpackhi_epi64(p3p2, _mm_slli_si128(p3p2, 8)); // swap lo and high + p1p0 = _mm_unpackhi_epi64(p1p0, _mm_slli_si128(p1p0, 8)); // swap lo and high + + // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + q1q0 = _mm_unpackhi_epi16(q1q0, x1); + // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77 + x2 = _mm_unpackhi_epi16(x2, x3); + // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 + q3q2 = _mm_unpackhi_epi32(q1q0, x2); + // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 + q1q0 = _mm_unpacklo_epi32(q1q0, x2); + + q0p0 = _mm_unpacklo_epi64(p1p0, q1q0); + q1p1 = _mm_unpackhi_epi64(p1p0, q1q0); + p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); + p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); + q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2); + + FILTER_HEV_MASK; + FILTER4; + + // Transpose 8x4 to 4x8 + // qs1qs0: 20 21 22 23 24 25 26 27 30 31 32 33 34 34 36 37 + // ps1ps0: 10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07 + // 00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17 + ps1ps0 = _mm_unpackhi_epi64(ps1ps0, _mm_slli_si128(ps1ps0, 8)); + // 10 30 11 31 12 32 13 33 14 34 15 35 16 36 17 37 + x0 = _mm_unpackhi_epi8(ps1ps0, qs1qs0); + // 00 20 01 21 02 22 03 23 04 24 05 25 06 26 07 27 + ps1ps0 = _mm_unpacklo_epi8(ps1ps0, qs1qs0); + // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + qs1qs0 = _mm_unpackhi_epi8(ps1ps0, x0); + // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ps1ps0 = _mm_unpacklo_epi8(ps1ps0, x0); + + *(int *)(s + 0 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + ps1ps0 = _mm_srli_si128(ps1ps0, 4); + *(int *)(s + 1 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + ps1ps0 = _mm_srli_si128(ps1ps0, 4); + *(int *)(s + 2 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + ps1ps0 = _mm_srli_si128(ps1ps0, 4); + *(int *)(s + 3 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + + *(int *)(s + 4 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + qs1qs0 = _mm_srli_si128(qs1qs0, 4); + *(int *)(s + 5 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + qs1qs0 = _mm_srli_si128(qs1qs0, 4); + *(int *)(s + 6 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + qs1qs0 = _mm_srli_si128(qs1qs0, 4); + *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0); +} + +void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { + const __m128i zero = _mm_set1_epi16(0); + const __m128i one = _mm_set1_epi8(1); + const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); + const __m128i limit = _mm_load_si128((const __m128i *)_limit); + const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); + __m128i mask, hev, flat, flat2; + __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; + __m128i abs_p1p0; + + q4p4 = _mm_loadl_epi64((__m128i *)(s - 5 * p)); + q4p4 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q4p4), + (__m64 *)(s + 4 * p))); + q3p3 = _mm_loadl_epi64((__m128i *)(s - 4 * p)); + q3p3 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q3p3), + (__m64 *)(s + 3 * p))); + q2p2 = _mm_loadl_epi64((__m128i *)(s - 3 * p)); + q2p2 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q2p2), + (__m64 *)(s + 2 * p))); + q1p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p)); + q1p1 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q1p1), + (__m64 *)(s + 1 * p))); + p1q1 = _mm_shuffle_epi32(q1p1, 78); + q0p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p)); + q0p0 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q0p0), + (__m64 *)(s - 0 * p))); + p0q0 = _mm_shuffle_epi32(q0p0, 78); + + { + __m128i abs_p1q1, abs_p0q0, abs_q1q0, fe, ff, work; + abs_p1p0 = abs_diff(q1p1, q0p0); + abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); + fe = _mm_set1_epi8(0xfe); + ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); + abs_p0q0 = abs_diff(q0p0, p0q0); + abs_p1q1 = abs_diff(q1p1, p1q1); + flat = _mm_max_epu8(abs_p1p0, abs_q1q0); + hev = _mm_subs_epu8(flat, thresh); + hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); + + abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); + abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); + // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; + mask = _mm_max_epu8(abs_p1p0, mask); + // mask |= (abs(p1 - p0) > limit) * -1; + // mask |= (abs(q1 - q0) > limit) * -1; + + work = _mm_max_epu8(abs_diff(q2p2, q1p1), + abs_diff(q3p3, q2p2)); + mask = _mm_max_epu8(work, mask); + mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); + mask = _mm_subs_epu8(mask, limit); + mask = _mm_cmpeq_epi8(mask, zero); + } + + // lp filter + { + const __m128i t4 = _mm_set1_epi8(4); + const __m128i t3 = _mm_set1_epi8(3); + const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i t1 = _mm_set1_epi16(0x1); + __m128i qs1ps1 = _mm_xor_si128(q1p1, t80); + __m128i qs0ps0 = _mm_xor_si128(q0p0, t80); + __m128i qs0 = _mm_xor_si128(p0q0, t80); + __m128i qs1 = _mm_xor_si128(p1q1, t80); + __m128i filt; + __m128i work_a; + __m128i filter1, filter2; + __m128i flat2_q6p6, flat2_q5p5, flat2_q4p4, flat2_q3p3, flat2_q2p2; + __m128i flat2_q1p1, flat2_q0p0, flat_q2p2, flat_q1p1, flat_q0p0; + + filt = _mm_and_si128(_mm_subs_epi8(qs1ps1, qs1), hev); + work_a = _mm_subs_epi8(qs0, qs0ps0); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + // (vpx_filter + 3 * (qs0 - ps0)) & mask + filt = _mm_and_si128(filt, mask); + + filter1 = _mm_adds_epi8(filt, t4); + filter2 = _mm_adds_epi8(filt, t3); + + filter1 = _mm_unpacklo_epi8(zero, filter1); + filter1 = _mm_srai_epi16(filter1, 0xB); + filter2 = _mm_unpacklo_epi8(zero, filter2); + filter2 = _mm_srai_epi16(filter2, 0xB); + + // Filter1 >> 3 + filt = _mm_packs_epi16(filter2, _mm_subs_epi16(zero, filter1)); + qs0ps0 = _mm_xor_si128(_mm_adds_epi8(qs0ps0, filt), t80); + + // filt >> 1 + filt = _mm_adds_epi16(filter1, t1); + filt = _mm_srai_epi16(filt, 1); + filt = _mm_andnot_si128(_mm_srai_epi16(_mm_unpacklo_epi8(zero, hev), 0x8), + filt); + filt = _mm_packs_epi16(filt, _mm_subs_epi16(zero, filt)); + qs1ps1 = _mm_xor_si128(_mm_adds_epi8(qs1ps1, filt), t80); + // loopfilter done + + { + __m128i work; + flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_diff(q3p3, q0p0)); + flat = _mm_max_epu8(abs_p1p0, flat); + flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); + flat = _mm_subs_epu8(flat, one); + flat = _mm_cmpeq_epi8(flat, zero); + flat = _mm_and_si128(flat, mask); + + q5p5 = _mm_loadl_epi64((__m128i *)(s - 6 * p)); + q5p5 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q5p5), + (__m64 *)(s + 5 * p))); + + q6p6 = _mm_loadl_epi64((__m128i *)(s - 7 * p)); + q6p6 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q6p6), + (__m64 *)(s + 6 * p))); + flat2 = _mm_max_epu8(abs_diff(q4p4, q0p0), abs_diff(q5p5, q0p0)); + + q7p7 = _mm_loadl_epi64((__m128i *)(s - 8 * p)); + q7p7 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q7p7), + (__m64 *)(s + 7 * p))); + work = _mm_max_epu8(abs_diff(q6p6, q0p0), abs_diff(q7p7, q0p0)); + flat2 = _mm_max_epu8(work, flat2); + flat2 = _mm_max_epu8(flat2, _mm_srli_si128(flat2, 8)); + flat2 = _mm_subs_epu8(flat2, one); + flat2 = _mm_cmpeq_epi8(flat2, zero); + flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask + } + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // flat and wide flat calculations + { + const __m128i eight = _mm_set1_epi16(8); + const __m128i four = _mm_set1_epi16(4); + __m128i p7_16, p6_16, p5_16, p4_16, p3_16, p2_16, p1_16, p0_16; + __m128i q7_16, q6_16, q5_16, q4_16, q3_16, q2_16, q1_16, q0_16; + __m128i pixelFilter_p, pixelFilter_q; + __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0; + __m128i sum_p7, sum_q7, sum_p3, sum_q3, res_p, res_q; + + p7_16 = _mm_unpacklo_epi8(q7p7, zero);; + p6_16 = _mm_unpacklo_epi8(q6p6, zero); + p5_16 = _mm_unpacklo_epi8(q5p5, zero); + p4_16 = _mm_unpacklo_epi8(q4p4, zero); + p3_16 = _mm_unpacklo_epi8(q3p3, zero); + p2_16 = _mm_unpacklo_epi8(q2p2, zero); + p1_16 = _mm_unpacklo_epi8(q1p1, zero); + p0_16 = _mm_unpacklo_epi8(q0p0, zero); + q0_16 = _mm_unpackhi_epi8(q0p0, zero); + q1_16 = _mm_unpackhi_epi8(q1p1, zero); + q2_16 = _mm_unpackhi_epi8(q2p2, zero); + q3_16 = _mm_unpackhi_epi8(q3p3, zero); + q4_16 = _mm_unpackhi_epi8(q4p4, zero); + q5_16 = _mm_unpackhi_epi8(q5p5, zero); + q6_16 = _mm_unpackhi_epi8(q6p6, zero); + q7_16 = _mm_unpackhi_epi8(q7p7, zero); + + pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6_16, p5_16), + _mm_add_epi16(p4_16, p3_16)); + pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6_16, q5_16), + _mm_add_epi16(q4_16, q3_16)); + + pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, _mm_add_epi16(p2_16, p1_16)); + pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); + + pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, _mm_add_epi16(q2_16, q1_16)); + pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); + pixelFilter_p = _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, + pixelFilter_q)); + pixetFilter_p2p1p0 = _mm_add_epi16(four, + _mm_add_epi16(pixetFilter_p2p1p0, + pixetFilter_q2q1q0)); + res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, + _mm_add_epi16(p7_16, p0_16)), 4); + res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, + _mm_add_epi16(q7_16, q0_16)), 4); + flat2_q0p0 = _mm_packus_epi16(res_p, res_q); + res_p = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, + _mm_add_epi16(p3_16, p0_16)), 3); + res_q = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, + _mm_add_epi16(q3_16, q0_16)), 3); + + flat_q0p0 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(p7_16, p7_16); + sum_q7 = _mm_add_epi16(q7_16, q7_16); + sum_p3 = _mm_add_epi16(p3_16, p3_16); + sum_q3 = _mm_add_epi16(q3_16, q3_16); + + pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p6_16); + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q6_16); + res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, + _mm_add_epi16(sum_p7, p1_16)), 4); + res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, + _mm_add_epi16(sum_q7, q1_16)), 4); + flat2_q1p1 = _mm_packus_epi16(res_p, res_q); + + pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2_16); + pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2_16); + res_p = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, + _mm_add_epi16(sum_p3, p1_16)), 3); + res_q = _mm_srli_epi16(_mm_add_epi16(pixetFilter_q2q1q0, + _mm_add_epi16(sum_q3, q1_16)), 3); + flat_q1p1 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(sum_p7, p7_16); + sum_q7 = _mm_add_epi16(sum_q7, q7_16); + sum_p3 = _mm_add_epi16(sum_p3, p3_16); + sum_q3 = _mm_add_epi16(sum_q3, q3_16); + + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5_16); + pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p5_16); + res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, + _mm_add_epi16(sum_p7, p2_16)), 4); + res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, + _mm_add_epi16(sum_q7, q2_16)), 4); + flat2_q2p2 = _mm_packus_epi16(res_p, res_q); + + pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1_16); + pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1_16); + + res_p = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, + _mm_add_epi16(sum_p3, p2_16)), 3); + res_q = _mm_srli_epi16(_mm_add_epi16(pixetFilter_q2q1q0, + _mm_add_epi16(sum_q3, q2_16)), 3); + flat_q2p2 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(sum_p7, p7_16); + sum_q7 = _mm_add_epi16(sum_q7, q7_16); + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4_16); + pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4_16); + res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, + _mm_add_epi16(sum_p7, p3_16)), 4); + res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, + _mm_add_epi16(sum_q7, q3_16)), 4); + flat2_q3p3 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(sum_p7, p7_16); + sum_q7 = _mm_add_epi16(sum_q7, q7_16); + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3_16); + pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3_16); + res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, + _mm_add_epi16(sum_p7, p4_16)), 4); + res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, + _mm_add_epi16(sum_q7, q4_16)), 4); + flat2_q4p4 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(sum_p7, p7_16); + sum_q7 = _mm_add_epi16(sum_q7, q7_16); + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2_16); + pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2_16); + res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, + _mm_add_epi16(sum_p7, p5_16)), 4); + res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, + _mm_add_epi16(sum_q7, q5_16)), 4); + flat2_q5p5 = _mm_packus_epi16(res_p, res_q); + + sum_p7 = _mm_add_epi16(sum_p7, p7_16); + sum_q7 = _mm_add_epi16(sum_q7, q7_16); + pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1_16); + pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1_16); + res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, + _mm_add_epi16(sum_p7, p6_16)), 4); + res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, + _mm_add_epi16(sum_q7, q6_16)), 4); + flat2_q6p6 = _mm_packus_epi16(res_p, res_q); + } + // wide flat + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + flat = _mm_shuffle_epi32(flat, 68); + flat2 = _mm_shuffle_epi32(flat2, 68); + + q2p2 = _mm_andnot_si128(flat, q2p2); + flat_q2p2 = _mm_and_si128(flat, flat_q2p2); + q2p2 = _mm_or_si128(q2p2, flat_q2p2); + + qs1ps1 = _mm_andnot_si128(flat, qs1ps1); + flat_q1p1 = _mm_and_si128(flat, flat_q1p1); + q1p1 = _mm_or_si128(qs1ps1, flat_q1p1); + + qs0ps0 = _mm_andnot_si128(flat, qs0ps0); + flat_q0p0 = _mm_and_si128(flat, flat_q0p0); + q0p0 = _mm_or_si128(qs0ps0, flat_q0p0); + + q6p6 = _mm_andnot_si128(flat2, q6p6); + flat2_q6p6 = _mm_and_si128(flat2, flat2_q6p6); + q6p6 = _mm_or_si128(q6p6, flat2_q6p6); + _mm_storel_epi64((__m128i *)(s - 7 * p), q6p6); + _mm_storeh_pi((__m64 *)(s + 6 * p), _mm_castsi128_ps(q6p6)); + + q5p5 = _mm_andnot_si128(flat2, q5p5); + flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5); + q5p5 = _mm_or_si128(q5p5, flat2_q5p5); + _mm_storel_epi64((__m128i *)(s - 6 * p), q5p5); + _mm_storeh_pi((__m64 *)(s + 5 * p), _mm_castsi128_ps(q5p5)); + + q4p4 = _mm_andnot_si128(flat2, q4p4); + flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4); + q4p4 = _mm_or_si128(q4p4, flat2_q4p4); + _mm_storel_epi64((__m128i *)(s - 5 * p), q4p4); + _mm_storeh_pi((__m64 *)(s + 4 * p), _mm_castsi128_ps(q4p4)); + + q3p3 = _mm_andnot_si128(flat2, q3p3); + flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3); + q3p3 = _mm_or_si128(q3p3, flat2_q3p3); + _mm_storel_epi64((__m128i *)(s - 4 * p), q3p3); + _mm_storeh_pi((__m64 *)(s + 3 * p), _mm_castsi128_ps(q3p3)); + + q2p2 = _mm_andnot_si128(flat2, q2p2); + flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2); + q2p2 = _mm_or_si128(q2p2, flat2_q2p2); + _mm_storel_epi64((__m128i *)(s - 3 * p), q2p2); + _mm_storeh_pi((__m64 *)(s + 2 * p), _mm_castsi128_ps(q2p2)); + + q1p1 = _mm_andnot_si128(flat2, q1p1); + flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1); + q1p1 = _mm_or_si128(q1p1, flat2_q1p1); + _mm_storel_epi64((__m128i *)(s - 2 * p), q1p1); + _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(q1p1)); + + q0p0 = _mm_andnot_si128(flat2, q0p0); + flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0); + q0p0 = _mm_or_si128(q0p0, flat2_q0p0); + _mm_storel_epi64((__m128i *)(s - 1 * p), q0p0); + _mm_storeh_pi((__m64 *)(s - 0 * p), _mm_castsi128_ps(q0p0)); + } +} + +static INLINE __m128i filter_add2_sub2(const __m128i *const total, + const __m128i *const a1, + const __m128i *const a2, + const __m128i *const s1, + const __m128i *const s2) { + __m128i x = _mm_add_epi16(*a1, *total); + x = _mm_add_epi16(_mm_sub_epi16(x, _mm_add_epi16(*s1, *s2)), *a2); + return x; +} + +static INLINE __m128i filter8_mask(const __m128i *const flat, + const __m128i *const other_filt, + const __m128i *const f8_lo, + const __m128i *const f8_hi) { + const __m128i f8 = _mm_packus_epi16(_mm_srli_epi16(*f8_lo, 3), + _mm_srli_epi16(*f8_hi, 3)); + const __m128i result = _mm_and_si128(*flat, f8); + return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); +} + +static INLINE __m128i filter16_mask(const __m128i *const flat, + const __m128i *const other_filt, + const __m128i *const f_lo, + const __m128i *const f_hi) { + const __m128i f = _mm_packus_epi16(_mm_srli_epi16(*f_lo, 4), + _mm_srli_epi16(*f_hi, 4)); + const __m128i result = _mm_and_si128(*flat, f); + return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); +} + +void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { + const __m128i zero = _mm_set1_epi16(0); + const __m128i one = _mm_set1_epi8(1); + const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); + const __m128i limit = _mm_load_si128((const __m128i *)_limit); + const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); + __m128i mask, hev, flat, flat2; + __m128i p7, p6, p5; + __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; + __m128i q5, q6, q7; + + __m128i op2, op1, op0, oq0, oq1, oq2; + + __m128i max_abs_p1p0q1q0; + + p7 = _mm_loadu_si128((__m128i *)(s - 8 * p)); + p6 = _mm_loadu_si128((__m128i *)(s - 7 * p)); + p5 = _mm_loadu_si128((__m128i *)(s - 6 * p)); + p4 = _mm_loadu_si128((__m128i *)(s - 5 * p)); + p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); + p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); + p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); + p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); + q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); + q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); + q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); + q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); + q4 = _mm_loadu_si128((__m128i *)(s + 4 * p)); + q5 = _mm_loadu_si128((__m128i *)(s + 5 * p)); + q6 = _mm_loadu_si128((__m128i *)(s + 6 * p)); + q7 = _mm_loadu_si128((__m128i *)(s + 7 * p)); + + { + const __m128i abs_p1p0 = abs_diff(p1, p0); + const __m128i abs_q1q0 = abs_diff(q1, q0); + const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i ff = _mm_cmpeq_epi8(zero, zero); + __m128i abs_p0q0 = abs_diff(p0, q0); + __m128i abs_p1q1 = abs_diff(p1, q1); + __m128i work; + max_abs_p1p0q1q0 = _mm_max_epu8(abs_p1p0, abs_q1q0); + + abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); + abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); + // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; + mask = _mm_max_epu8(max_abs_p1p0q1q0, mask); + // mask |= (abs(p1 - p0) > limit) * -1; + // mask |= (abs(q1 - q0) > limit) * -1; + work = _mm_max_epu8(abs_diff(p2, p1), abs_diff(p3, p2)); + mask = _mm_max_epu8(work, mask); + work = _mm_max_epu8(abs_diff(q2, q1), abs_diff(q3, q2)); + mask = _mm_max_epu8(work, mask); + mask = _mm_subs_epu8(mask, limit); + mask = _mm_cmpeq_epi8(mask, zero); + } + + { + __m128i work; + work = _mm_max_epu8(abs_diff(p2, p0), abs_diff(q2, q0)); + flat = _mm_max_epu8(work, max_abs_p1p0q1q0); + work = _mm_max_epu8(abs_diff(p3, p0), abs_diff(q3, q0)); + flat = _mm_max_epu8(work, flat); + work = _mm_max_epu8(abs_diff(p4, p0), abs_diff(q4, q0)); + flat = _mm_subs_epu8(flat, one); + flat = _mm_cmpeq_epi8(flat, zero); + flat = _mm_and_si128(flat, mask); + flat2 = _mm_max_epu8(abs_diff(p5, p0), abs_diff(q5, q0)); + flat2 = _mm_max_epu8(work, flat2); + work = _mm_max_epu8(abs_diff(p6, p0), abs_diff(q6, q0)); + flat2 = _mm_max_epu8(work, flat2); + work = _mm_max_epu8(abs_diff(p7, p0), abs_diff(q7, q0)); + flat2 = _mm_max_epu8(work, flat2); + flat2 = _mm_subs_epu8(flat2, one); + flat2 = _mm_cmpeq_epi8(flat2, zero); + flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask + } + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // filter4 + { + const __m128i t4 = _mm_set1_epi8(4); + const __m128i t3 = _mm_set1_epi8(3); + const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i te0 = _mm_set1_epi8(0xe0); + const __m128i t1f = _mm_set1_epi8(0x1f); + const __m128i t1 = _mm_set1_epi8(0x1); + const __m128i t7f = _mm_set1_epi8(0x7f); + const __m128i ff = _mm_cmpeq_epi8(t4, t4); + + __m128i filt; + __m128i work_a; + __m128i filter1, filter2; + + op1 = _mm_xor_si128(p1, t80); + op0 = _mm_xor_si128(p0, t80); + oq0 = _mm_xor_si128(q0, t80); + oq1 = _mm_xor_si128(q1, t80); + + hev = _mm_subs_epu8(max_abs_p1p0q1q0, thresh); + hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); + filt = _mm_and_si128(_mm_subs_epi8(op1, oq1), hev); + + work_a = _mm_subs_epi8(oq0, op0); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + // (vpx_filter + 3 * (qs0 - ps0)) & mask + filt = _mm_and_si128(filt, mask); + filter1 = _mm_adds_epi8(filt, t4); + filter2 = _mm_adds_epi8(filt, t3); + + // Filter1 >> 3 + work_a = _mm_cmpgt_epi8(zero, filter1); + filter1 = _mm_srli_epi16(filter1, 3); + work_a = _mm_and_si128(work_a, te0); + filter1 = _mm_and_si128(filter1, t1f); + filter1 = _mm_or_si128(filter1, work_a); + oq0 = _mm_xor_si128(_mm_subs_epi8(oq0, filter1), t80); + + // Filter2 >> 3 + work_a = _mm_cmpgt_epi8(zero, filter2); + filter2 = _mm_srli_epi16(filter2, 3); + work_a = _mm_and_si128(work_a, te0); + filter2 = _mm_and_si128(filter2, t1f); + filter2 = _mm_or_si128(filter2, work_a); + op0 = _mm_xor_si128(_mm_adds_epi8(op0, filter2), t80); + + // filt >> 1 + filt = _mm_adds_epi8(filter1, t1); + work_a = _mm_cmpgt_epi8(zero, filt); + filt = _mm_srli_epi16(filt, 1); + work_a = _mm_and_si128(work_a, t80); + filt = _mm_and_si128(filt, t7f); + filt = _mm_or_si128(filt, work_a); + filt = _mm_andnot_si128(hev, filt); + op1 = _mm_xor_si128(_mm_adds_epi8(op1, filt), t80); + oq1 = _mm_xor_si128(_mm_subs_epi8(oq1, filt), t80); + // loopfilter done + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // filter8 + { + const __m128i four = _mm_set1_epi16(4); + const __m128i p3_lo = _mm_unpacklo_epi8(p3, zero); + const __m128i p2_lo = _mm_unpacklo_epi8(p2, zero); + const __m128i p1_lo = _mm_unpacklo_epi8(p1, zero); + const __m128i p0_lo = _mm_unpacklo_epi8(p0, zero); + const __m128i q0_lo = _mm_unpacklo_epi8(q0, zero); + const __m128i q1_lo = _mm_unpacklo_epi8(q1, zero); + const __m128i q2_lo = _mm_unpacklo_epi8(q2, zero); + const __m128i q3_lo = _mm_unpacklo_epi8(q3, zero); + + const __m128i p3_hi = _mm_unpackhi_epi8(p3, zero); + const __m128i p2_hi = _mm_unpackhi_epi8(p2, zero); + const __m128i p1_hi = _mm_unpackhi_epi8(p1, zero); + const __m128i p0_hi = _mm_unpackhi_epi8(p0, zero); + const __m128i q0_hi = _mm_unpackhi_epi8(q0, zero); + const __m128i q1_hi = _mm_unpackhi_epi8(q1, zero); + const __m128i q2_hi = _mm_unpackhi_epi8(q2, zero); + const __m128i q3_hi = _mm_unpackhi_epi8(q3, zero); + __m128i f8_lo, f8_hi; + + f8_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, four), + _mm_add_epi16(p3_lo, p2_lo)); + f8_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, f8_lo), + _mm_add_epi16(p2_lo, p1_lo)); + f8_lo = _mm_add_epi16(_mm_add_epi16(p0_lo, q0_lo), f8_lo); + + f8_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, four), + _mm_add_epi16(p3_hi, p2_hi)); + f8_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, f8_hi), + _mm_add_epi16(p2_hi, p1_hi)); + f8_hi = _mm_add_epi16(_mm_add_epi16(p0_hi, q0_hi), f8_hi); + + op2 = filter8_mask(&flat, &p2, &f8_lo, &f8_hi); + + f8_lo = filter_add2_sub2(&f8_lo, &q1_lo, &p1_lo, &p2_lo, &p3_lo); + f8_hi = filter_add2_sub2(&f8_hi, &q1_hi, &p1_hi, &p2_hi, &p3_hi); + op1 = filter8_mask(&flat, &op1, &f8_lo, &f8_hi); + + f8_lo = filter_add2_sub2(&f8_lo, &q2_lo, &p0_lo, &p1_lo, &p3_lo); + f8_hi = filter_add2_sub2(&f8_hi, &q2_hi, &p0_hi, &p1_hi, &p3_hi); + op0 = filter8_mask(&flat, &op0, &f8_lo, &f8_hi); + + f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q0_lo, &p0_lo, &p3_lo); + f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q0_hi, &p0_hi, &p3_hi); + oq0 = filter8_mask(&flat, &oq0, &f8_lo, &f8_hi); + + f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q1_lo, &q0_lo, &p2_lo); + f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q1_hi, &q0_hi, &p2_hi); + oq1 = filter8_mask(&flat, &oq1, &f8_lo, &f8_hi); + + f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q2_lo, &q1_lo, &p1_lo); + f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q2_hi, &q1_hi, &p1_hi); + oq2 = filter8_mask(&flat, &q2, &f8_lo, &f8_hi); + } + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // wide flat calculations + { + const __m128i eight = _mm_set1_epi16(8); + const __m128i p7_lo = _mm_unpacklo_epi8(p7, zero); + const __m128i p6_lo = _mm_unpacklo_epi8(p6, zero); + const __m128i p5_lo = _mm_unpacklo_epi8(p5, zero); + const __m128i p4_lo = _mm_unpacklo_epi8(p4, zero); + const __m128i p3_lo = _mm_unpacklo_epi8(p3, zero); + const __m128i p2_lo = _mm_unpacklo_epi8(p2, zero); + const __m128i p1_lo = _mm_unpacklo_epi8(p1, zero); + const __m128i p0_lo = _mm_unpacklo_epi8(p0, zero); + const __m128i q0_lo = _mm_unpacklo_epi8(q0, zero); + const __m128i q1_lo = _mm_unpacklo_epi8(q1, zero); + const __m128i q2_lo = _mm_unpacklo_epi8(q2, zero); + const __m128i q3_lo = _mm_unpacklo_epi8(q3, zero); + const __m128i q4_lo = _mm_unpacklo_epi8(q4, zero); + const __m128i q5_lo = _mm_unpacklo_epi8(q5, zero); + const __m128i q6_lo = _mm_unpacklo_epi8(q6, zero); + const __m128i q7_lo = _mm_unpacklo_epi8(q7, zero); + + const __m128i p7_hi = _mm_unpackhi_epi8(p7, zero); + const __m128i p6_hi = _mm_unpackhi_epi8(p6, zero); + const __m128i p5_hi = _mm_unpackhi_epi8(p5, zero); + const __m128i p4_hi = _mm_unpackhi_epi8(p4, zero); + const __m128i p3_hi = _mm_unpackhi_epi8(p3, zero); + const __m128i p2_hi = _mm_unpackhi_epi8(p2, zero); + const __m128i p1_hi = _mm_unpackhi_epi8(p1, zero); + const __m128i p0_hi = _mm_unpackhi_epi8(p0, zero); + const __m128i q0_hi = _mm_unpackhi_epi8(q0, zero); + const __m128i q1_hi = _mm_unpackhi_epi8(q1, zero); + const __m128i q2_hi = _mm_unpackhi_epi8(q2, zero); + const __m128i q3_hi = _mm_unpackhi_epi8(q3, zero); + const __m128i q4_hi = _mm_unpackhi_epi8(q4, zero); + const __m128i q5_hi = _mm_unpackhi_epi8(q5, zero); + const __m128i q6_hi = _mm_unpackhi_epi8(q6, zero); + const __m128i q7_hi = _mm_unpackhi_epi8(q7, zero); + + __m128i f_lo; + __m128i f_hi; + + f_lo = _mm_sub_epi16(_mm_slli_epi16(p7_lo, 3), p7_lo); // p7 * 7 + f_lo = _mm_add_epi16(_mm_slli_epi16(p6_lo, 1), + _mm_add_epi16(p4_lo, f_lo)); + f_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, f_lo), + _mm_add_epi16(p2_lo, p1_lo)); + f_lo = _mm_add_epi16(_mm_add_epi16(p0_lo, q0_lo), f_lo); + f_lo = _mm_add_epi16(_mm_add_epi16(p5_lo, eight), f_lo); + + f_hi = _mm_sub_epi16(_mm_slli_epi16(p7_hi, 3), p7_hi); // p7 * 7 + f_hi = _mm_add_epi16(_mm_slli_epi16(p6_hi, 1), + _mm_add_epi16(p4_hi, f_hi)); + f_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, f_hi), + _mm_add_epi16(p2_hi, p1_hi)); + f_hi = _mm_add_epi16(_mm_add_epi16(p0_hi, q0_hi), f_hi); + f_hi = _mm_add_epi16(_mm_add_epi16(p5_hi, eight), f_hi); + + p6 = filter16_mask(&flat2, &p6, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s - 7 * p), p6); + + f_lo = filter_add2_sub2(&f_lo, &q1_lo, &p5_lo, &p6_lo, &p7_lo); + f_hi = filter_add2_sub2(&f_hi, &q1_hi, &p5_hi, &p6_hi, &p7_hi); + p5 = filter16_mask(&flat2, &p5, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s - 6 * p), p5); + + f_lo = filter_add2_sub2(&f_lo, &q2_lo, &p4_lo, &p5_lo, &p7_lo); + f_hi = filter_add2_sub2(&f_hi, &q2_hi, &p4_hi, &p5_hi, &p7_hi); + p4 = filter16_mask(&flat2, &p4, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s - 5 * p), p4); + + f_lo = filter_add2_sub2(&f_lo, &q3_lo, &p3_lo, &p4_lo, &p7_lo); + f_hi = filter_add2_sub2(&f_hi, &q3_hi, &p3_hi, &p4_hi, &p7_hi); + p3 = filter16_mask(&flat2, &p3, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s - 4 * p), p3); + + f_lo = filter_add2_sub2(&f_lo, &q4_lo, &p2_lo, &p3_lo, &p7_lo); + f_hi = filter_add2_sub2(&f_hi, &q4_hi, &p2_hi, &p3_hi, &p7_hi); + op2 = filter16_mask(&flat2, &op2, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s - 3 * p), op2); + + f_lo = filter_add2_sub2(&f_lo, &q5_lo, &p1_lo, &p2_lo, &p7_lo); + f_hi = filter_add2_sub2(&f_hi, &q5_hi, &p1_hi, &p2_hi, &p7_hi); + op1 = filter16_mask(&flat2, &op1, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s - 2 * p), op1); + + f_lo = filter_add2_sub2(&f_lo, &q6_lo, &p0_lo, &p1_lo, &p7_lo); + f_hi = filter_add2_sub2(&f_hi, &q6_hi, &p0_hi, &p1_hi, &p7_hi); + op0 = filter16_mask(&flat2, &op0, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s - 1 * p), op0); + + f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q0_lo, &p0_lo, &p7_lo); + f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q0_hi, &p0_hi, &p7_hi); + oq0 = filter16_mask(&flat2, &oq0, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s - 0 * p), oq0); + + f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q1_lo, &p6_lo, &q0_lo); + f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q1_hi, &p6_hi, &q0_hi); + oq1 = filter16_mask(&flat2, &oq1, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s + 1 * p), oq1); + + f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q2_lo, &p5_lo, &q1_lo); + f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q2_hi, &p5_hi, &q1_hi); + oq2 = filter16_mask(&flat2, &oq2, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s + 2 * p), oq2); + + f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q3_lo, &p4_lo, &q2_lo); + f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q3_hi, &p4_hi, &q2_hi); + q3 = filter16_mask(&flat2, &q3, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s + 3 * p), q3); + + f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q4_lo, &p3_lo, &q3_lo); + f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q4_hi, &p3_hi, &q3_hi); + q4 = filter16_mask(&flat2, &q4, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s + 4 * p), q4); + + f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q5_lo, &p2_lo, &q4_lo); + f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q5_hi, &p2_hi, &q4_hi); + q5 = filter16_mask(&flat2, &q5, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s + 5 * p), q5); + + f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q6_lo, &p1_lo, &q5_lo); + f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q6_hi, &p1_hi, &q5_hi); + q6 = filter16_mask(&flat2, &q6, &f_lo, &f_hi); + _mm_storeu_si128((__m128i *)(s + 6 * p), q6); + } + // wide flat + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + } +} + +void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { + DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); + const __m128i zero = _mm_set1_epi16(0); + const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); + const __m128i limit = _mm_load_si128((const __m128i *)_limit); + const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); + __m128i mask, hev, flat; + __m128i p3, p2, p1, p0, q0, q1, q2, q3; + __m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0; + + q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)), + _mm_loadl_epi64((__m128i *)(s + 3 * p))); + q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)), + _mm_loadl_epi64((__m128i *)(s + 2 * p))); + q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)), + _mm_loadl_epi64((__m128i *)(s + 1 * p))); + q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)), + _mm_loadl_epi64((__m128i *)(s - 0 * p))); + p1q1 = _mm_shuffle_epi32(q1p1, 78); + p0q0 = _mm_shuffle_epi32(q0p0, 78); + + { + // filter_mask and hev_mask + const __m128i one = _mm_set1_epi8(1); + const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i ff = _mm_cmpeq_epi8(fe, fe); + __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work; + abs_p1p0 = abs_diff(q1p1, q0p0); + abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); + + abs_p0q0 = abs_diff(q0p0, p0q0); + abs_p1q1 = abs_diff(q1p1, p1q1); + flat = _mm_max_epu8(abs_p1p0, abs_q1q0); + hev = _mm_subs_epu8(flat, thresh); + hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); + + abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); + abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); + // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; + mask = _mm_max_epu8(abs_p1p0, mask); + // mask |= (abs(p1 - p0) > limit) * -1; + // mask |= (abs(q1 - q0) > limit) * -1; + + work = _mm_max_epu8(abs_diff(q2p2, q1p1), + abs_diff(q3p3, q2p2)); + mask = _mm_max_epu8(work, mask); + mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); + mask = _mm_subs_epu8(mask, limit); + mask = _mm_cmpeq_epi8(mask, zero); + + // flat_mask4 + + flat = _mm_max_epu8(abs_diff(q2p2, q0p0), + abs_diff(q3p3, q0p0)); + flat = _mm_max_epu8(abs_p1p0, flat); + flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); + flat = _mm_subs_epu8(flat, one); + flat = _mm_cmpeq_epi8(flat, zero); + flat = _mm_and_si128(flat, mask); + } + + { + const __m128i four = _mm_set1_epi16(4); + unsigned char *src = s; + { + __m128i workp_a, workp_b, workp_shft; + p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero); + p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero); + p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero); + p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero); + q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero); + q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero); + q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero); + q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero); + + workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); + workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); + workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op2[0], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op1[0], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op0[0], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq0[0], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq1[0], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq2[0], + _mm_packus_epi16(workp_shft, workp_shft)); + } + } + // lp filter + { + const __m128i t4 = _mm_set1_epi8(4); + const __m128i t3 = _mm_set1_epi8(3); + const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i t1 = _mm_set1_epi8(0x1); + const __m128i ps1 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 2 * p)), + t80); + const __m128i ps0 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 1 * p)), + t80); + const __m128i qs0 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 0 * p)), + t80); + const __m128i qs1 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 1 * p)), + t80); + __m128i filt; + __m128i work_a; + __m128i filter1, filter2; + + filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); + work_a = _mm_subs_epi8(qs0, ps0); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + // (vpx_filter + 3 * (qs0 - ps0)) & mask + filt = _mm_and_si128(filt, mask); + + filter1 = _mm_adds_epi8(filt, t4); + filter2 = _mm_adds_epi8(filt, t3); + + // Filter1 >> 3 + filter1 = _mm_unpacklo_epi8(zero, filter1); + filter1 = _mm_srai_epi16(filter1, 11); + filter1 = _mm_packs_epi16(filter1, filter1); + + // Filter2 >> 3 + filter2 = _mm_unpacklo_epi8(zero, filter2); + filter2 = _mm_srai_epi16(filter2, 11); + filter2 = _mm_packs_epi16(filter2, zero); + + // filt >> 1 + filt = _mm_adds_epi8(filter1, t1); + filt = _mm_unpacklo_epi8(zero, filt); + filt = _mm_srai_epi16(filt, 9); + filt = _mm_packs_epi16(filt, zero); + + filt = _mm_andnot_si128(hev, filt); + + work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); + q0 = _mm_loadl_epi64((__m128i *)flat_oq0); + work_a = _mm_andnot_si128(flat, work_a); + q0 = _mm_and_si128(flat, q0); + q0 = _mm_or_si128(work_a, q0); + + work_a = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); + q1 = _mm_loadl_epi64((__m128i *)flat_oq1); + work_a = _mm_andnot_si128(flat, work_a); + q1 = _mm_and_si128(flat, q1); + q1 = _mm_or_si128(work_a, q1); + + work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); + q2 = _mm_loadl_epi64((__m128i *)flat_oq2); + work_a = _mm_andnot_si128(flat, work_a); + q2 = _mm_and_si128(flat, q2); + q2 = _mm_or_si128(work_a, q2); + + work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); + p0 = _mm_loadl_epi64((__m128i *)flat_op0); + work_a = _mm_andnot_si128(flat, work_a); + p0 = _mm_and_si128(flat, p0); + p0 = _mm_or_si128(work_a, p0); + + work_a = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); + p1 = _mm_loadl_epi64((__m128i *)flat_op1); + work_a = _mm_andnot_si128(flat, work_a); + p1 = _mm_and_si128(flat, p1); + p1 = _mm_or_si128(work_a, p1); + + work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); + p2 = _mm_loadl_epi64((__m128i *)flat_op2); + work_a = _mm_andnot_si128(flat, work_a); + p2 = _mm_and_si128(flat, p2); + p2 = _mm_or_si128(work_a, p2); + + _mm_storel_epi64((__m128i *)(s - 3 * p), p2); + _mm_storel_epi64((__m128i *)(s - 2 * p), p1); + _mm_storel_epi64((__m128i *)(s - 1 * p), p0); + _mm_storel_epi64((__m128i *)(s + 0 * p), q0); + _mm_storel_epi64((__m128i *)(s + 1 * p), q1); + _mm_storel_epi64((__m128i *)(s + 2 * p), q2); + } +} + +void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, + const uint8_t *_blimit0, + const uint8_t *_limit0, + const uint8_t *_thresh0, + const uint8_t *_blimit1, + const uint8_t *_limit1, + const uint8_t *_thresh1) { + DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); + const __m128i zero = _mm_set1_epi16(0); + const __m128i blimit = + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0), + _mm_load_si128((const __m128i *)_blimit1)); + const __m128i limit = + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_limit0), + _mm_load_si128((const __m128i *)_limit1)); + const __m128i thresh = + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_thresh0), + _mm_load_si128((const __m128i *)_thresh1)); + + __m128i mask, hev, flat; + __m128i p3, p2, p1, p0, q0, q1, q2, q3; + + p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); + p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); + p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); + p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); + q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); + q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); + q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); + q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); + { + const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), + _mm_subs_epu8(p0, p1)); + const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), + _mm_subs_epu8(q0, q1)); + const __m128i one = _mm_set1_epi8(1); + const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); + __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), + _mm_subs_epu8(q0, p0)); + __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), + _mm_subs_epu8(q1, p1)); + __m128i work; + + // filter_mask and hev_mask + flat = _mm_max_epu8(abs_p1p0, abs_q1q0); + hev = _mm_subs_epu8(flat, thresh); + hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); + + abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); + abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); + // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; + mask = _mm_max_epu8(flat, mask); + // mask |= (abs(p1 - p0) > limit) * -1; + // mask |= (abs(q1 - q0) > limit) * -1; + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1), + _mm_subs_epu8(p1, p2)), + _mm_or_si128(_mm_subs_epu8(p3, p2), + _mm_subs_epu8(p2, p3))); + mask = _mm_max_epu8(work, mask); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1), + _mm_subs_epu8(q1, q2)), + _mm_or_si128(_mm_subs_epu8(q3, q2), + _mm_subs_epu8(q2, q3))); + mask = _mm_max_epu8(work, mask); + mask = _mm_subs_epu8(mask, limit); + mask = _mm_cmpeq_epi8(mask, zero); + + // flat_mask4 + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0), + _mm_subs_epu8(p0, p2)), + _mm_or_si128(_mm_subs_epu8(q2, q0), + _mm_subs_epu8(q0, q2))); + flat = _mm_max_epu8(work, flat); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0), + _mm_subs_epu8(p0, p3)), + _mm_or_si128(_mm_subs_epu8(q3, q0), + _mm_subs_epu8(q0, q3))); + flat = _mm_max_epu8(work, flat); + flat = _mm_subs_epu8(flat, one); + flat = _mm_cmpeq_epi8(flat, zero); + flat = _mm_and_si128(flat, mask); + } + { + const __m128i four = _mm_set1_epi16(4); + unsigned char *src = s; + int i = 0; + + do { + __m128i workp_a, workp_b, workp_shft; + p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero); + p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero); + p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero); + p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero); + q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero); + q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero); + q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero); + q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero); + + workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); + workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); + workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op2[i * 8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op1[i * 8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op0[i * 8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq0[i * 8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq1[i * 8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq2[i * 8], + _mm_packus_epi16(workp_shft, workp_shft)); + + src += 8; + } while (++i < 2); + } + // lp filter + { + const __m128i t4 = _mm_set1_epi8(4); + const __m128i t3 = _mm_set1_epi8(3); + const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i te0 = _mm_set1_epi8(0xe0); + const __m128i t1f = _mm_set1_epi8(0x1f); + const __m128i t1 = _mm_set1_epi8(0x1); + const __m128i t7f = _mm_set1_epi8(0x7f); + + const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)), + t80); + const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)), + t80); + const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)), + t80); + const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)), + t80); + __m128i filt; + __m128i work_a; + __m128i filter1, filter2; + + filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); + work_a = _mm_subs_epi8(qs0, ps0); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + // (vpx_filter + 3 * (qs0 - ps0)) & mask + filt = _mm_and_si128(filt, mask); + + filter1 = _mm_adds_epi8(filt, t4); + filter2 = _mm_adds_epi8(filt, t3); + + // Filter1 >> 3 + work_a = _mm_cmpgt_epi8(zero, filter1); + filter1 = _mm_srli_epi16(filter1, 3); + work_a = _mm_and_si128(work_a, te0); + filter1 = _mm_and_si128(filter1, t1f); + filter1 = _mm_or_si128(filter1, work_a); + + // Filter2 >> 3 + work_a = _mm_cmpgt_epi8(zero, filter2); + filter2 = _mm_srli_epi16(filter2, 3); + work_a = _mm_and_si128(work_a, te0); + filter2 = _mm_and_si128(filter2, t1f); + filter2 = _mm_or_si128(filter2, work_a); + + // filt >> 1 + filt = _mm_adds_epi8(filter1, t1); + work_a = _mm_cmpgt_epi8(zero, filt); + filt = _mm_srli_epi16(filt, 1); + work_a = _mm_and_si128(work_a, t80); + filt = _mm_and_si128(filt, t7f); + filt = _mm_or_si128(filt, work_a); + + filt = _mm_andnot_si128(hev, filt); + + work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); + q0 = _mm_load_si128((__m128i *)flat_oq0); + work_a = _mm_andnot_si128(flat, work_a); + q0 = _mm_and_si128(flat, q0); + q0 = _mm_or_si128(work_a, q0); + + work_a = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); + q1 = _mm_load_si128((__m128i *)flat_oq1); + work_a = _mm_andnot_si128(flat, work_a); + q1 = _mm_and_si128(flat, q1); + q1 = _mm_or_si128(work_a, q1); + + work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); + q2 = _mm_load_si128((__m128i *)flat_oq2); + work_a = _mm_andnot_si128(flat, work_a); + q2 = _mm_and_si128(flat, q2); + q2 = _mm_or_si128(work_a, q2); + + work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); + p0 = _mm_load_si128((__m128i *)flat_op0); + work_a = _mm_andnot_si128(flat, work_a); + p0 = _mm_and_si128(flat, p0); + p0 = _mm_or_si128(work_a, p0); + + work_a = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); + p1 = _mm_load_si128((__m128i *)flat_op1); + work_a = _mm_andnot_si128(flat, work_a); + p1 = _mm_and_si128(flat, p1); + p1 = _mm_or_si128(work_a, p1); + + work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); + p2 = _mm_load_si128((__m128i *)flat_op2); + work_a = _mm_andnot_si128(flat, work_a); + p2 = _mm_and_si128(flat, p2); + p2 = _mm_or_si128(work_a, p2); + + _mm_storeu_si128((__m128i *)(s - 3 * p), p2); + _mm_storeu_si128((__m128i *)(s - 2 * p), p1); + _mm_storeu_si128((__m128i *)(s - 1 * p), p0); + _mm_storeu_si128((__m128i *)(s + 0 * p), q0); + _mm_storeu_si128((__m128i *)(s + 1 * p), q1); + _mm_storeu_si128((__m128i *)(s + 2 * p), q2); + } +} + +void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p, + const unsigned char *_blimit0, + const unsigned char *_limit0, + const unsigned char *_thresh0, + const unsigned char *_blimit1, + const unsigned char *_limit1, + const unsigned char *_thresh1) { + const __m128i blimit = + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0), + _mm_load_si128((const __m128i *)_blimit1)); + const __m128i limit = + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_limit0), + _mm_load_si128((const __m128i *)_limit1)); + const __m128i thresh = + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_thresh0), + _mm_load_si128((const __m128i *)_thresh1)); + const __m128i zero = _mm_set1_epi16(0); + __m128i p3, p2, p1, p0, q0, q1, q2, q3; + __m128i mask, hev, flat; + + p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); + p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); + p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); + p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); + q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); + q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); + q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); + q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); + + // filter_mask and hev_mask + { + const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), + _mm_subs_epu8(p0, p1)); + const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), + _mm_subs_epu8(q0, q1)); + const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); + __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), + _mm_subs_epu8(q0, p0)); + __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), + _mm_subs_epu8(q1, p1)); + __m128i work; + + flat = _mm_max_epu8(abs_p1p0, abs_q1q0); + hev = _mm_subs_epu8(flat, thresh); + hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); + + abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); + abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); + // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; + mask = _mm_max_epu8(flat, mask); + // mask |= (abs(p1 - p0) > limit) * -1; + // mask |= (abs(q1 - q0) > limit) * -1; + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1), + _mm_subs_epu8(p1, p2)), + _mm_or_si128(_mm_subs_epu8(p3, p2), + _mm_subs_epu8(p2, p3))); + mask = _mm_max_epu8(work, mask); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1), + _mm_subs_epu8(q1, q2)), + _mm_or_si128(_mm_subs_epu8(q3, q2), + _mm_subs_epu8(q2, q3))); + mask = _mm_max_epu8(work, mask); + mask = _mm_subs_epu8(mask, limit); + mask = _mm_cmpeq_epi8(mask, zero); + } + + // filter4 + { + const __m128i t4 = _mm_set1_epi8(4); + const __m128i t3 = _mm_set1_epi8(3); + const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i te0 = _mm_set1_epi8(0xe0); + const __m128i t1f = _mm_set1_epi8(0x1f); + const __m128i t1 = _mm_set1_epi8(0x1); + const __m128i t7f = _mm_set1_epi8(0x7f); + + const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)), + t80); + const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)), + t80); + const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)), + t80); + const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)), + t80); + __m128i filt; + __m128i work_a; + __m128i filter1, filter2; + + filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); + work_a = _mm_subs_epi8(qs0, ps0); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + // (vpx_filter + 3 * (qs0 - ps0)) & mask + filt = _mm_and_si128(filt, mask); + + filter1 = _mm_adds_epi8(filt, t4); + filter2 = _mm_adds_epi8(filt, t3); + + // Filter1 >> 3 + work_a = _mm_cmpgt_epi8(zero, filter1); + filter1 = _mm_srli_epi16(filter1, 3); + work_a = _mm_and_si128(work_a, te0); + filter1 = _mm_and_si128(filter1, t1f); + filter1 = _mm_or_si128(filter1, work_a); + + // Filter2 >> 3 + work_a = _mm_cmpgt_epi8(zero, filter2); + filter2 = _mm_srli_epi16(filter2, 3); + work_a = _mm_and_si128(work_a, te0); + filter2 = _mm_and_si128(filter2, t1f); + filter2 = _mm_or_si128(filter2, work_a); + + // filt >> 1 + filt = _mm_adds_epi8(filter1, t1); + work_a = _mm_cmpgt_epi8(zero, filt); + filt = _mm_srli_epi16(filt, 1); + work_a = _mm_and_si128(work_a, t80); + filt = _mm_and_si128(filt, t7f); + filt = _mm_or_si128(filt, work_a); + + filt = _mm_andnot_si128(hev, filt); + + q0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); + q1 = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); + p0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); + p1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); + + _mm_storeu_si128((__m128i *)(s - 2 * p), p1); + _mm_storeu_si128((__m128i *)(s - 1 * p), p0); + _mm_storeu_si128((__m128i *)(s + 0 * p), q0); + _mm_storeu_si128((__m128i *)(s + 1 * p), q1); + } +} + +static INLINE void transpose8x16(unsigned char *in0, unsigned char *in1, + int in_p, unsigned char *out, int out_p) { + __m128i x0, x1, x2, x3, x4, x5, x6, x7; + __m128i x8, x9, x10, x11, x12, x13, x14, x15; + + // 2-way interleave w/hoisting of unpacks + x0 = _mm_loadl_epi64((__m128i *)in0); // 1 + x1 = _mm_loadl_epi64((__m128i *)(in0 + in_p)); // 3 + x0 = _mm_unpacklo_epi8(x0, x1); // 1 + + x2 = _mm_loadl_epi64((__m128i *)(in0 + 2 * in_p)); // 5 + x3 = _mm_loadl_epi64((__m128i *)(in0 + 3*in_p)); // 7 + x1 = _mm_unpacklo_epi8(x2, x3); // 2 + + x4 = _mm_loadl_epi64((__m128i *)(in0 + 4*in_p)); // 9 + x5 = _mm_loadl_epi64((__m128i *)(in0 + 5*in_p)); // 11 + x2 = _mm_unpacklo_epi8(x4, x5); // 3 + + x6 = _mm_loadl_epi64((__m128i *)(in0 + 6*in_p)); // 13 + x7 = _mm_loadl_epi64((__m128i *)(in0 + 7*in_p)); // 15 + x3 = _mm_unpacklo_epi8(x6, x7); // 4 + x4 = _mm_unpacklo_epi16(x0, x1); // 9 + + x8 = _mm_loadl_epi64((__m128i *)in1); // 2 + x9 = _mm_loadl_epi64((__m128i *)(in1 + in_p)); // 4 + x8 = _mm_unpacklo_epi8(x8, x9); // 5 + x5 = _mm_unpacklo_epi16(x2, x3); // 10 + + x10 = _mm_loadl_epi64((__m128i *)(in1 + 2 * in_p)); // 6 + x11 = _mm_loadl_epi64((__m128i *)(in1 + 3*in_p)); // 8 + x9 = _mm_unpacklo_epi8(x10, x11); // 6 + + x12 = _mm_loadl_epi64((__m128i *)(in1 + 4*in_p)); // 10 + x13 = _mm_loadl_epi64((__m128i *)(in1 + 5*in_p)); // 12 + x10 = _mm_unpacklo_epi8(x12, x13); // 7 + x12 = _mm_unpacklo_epi16(x8, x9); // 11 + + x14 = _mm_loadl_epi64((__m128i *)(in1 + 6*in_p)); // 14 + x15 = _mm_loadl_epi64((__m128i *)(in1 + 7*in_p)); // 16 + x11 = _mm_unpacklo_epi8(x14, x15); // 8 + x13 = _mm_unpacklo_epi16(x10, x11); // 12 + + x6 = _mm_unpacklo_epi32(x4, x5); // 13 + x7 = _mm_unpackhi_epi32(x4, x5); // 14 + x14 = _mm_unpacklo_epi32(x12, x13); // 15 + x15 = _mm_unpackhi_epi32(x12, x13); // 16 + + // Store first 4-line result + _mm_storeu_si128((__m128i *)out, _mm_unpacklo_epi64(x6, x14)); + _mm_storeu_si128((__m128i *)(out + out_p), _mm_unpackhi_epi64(x6, x14)); + _mm_storeu_si128((__m128i *)(out + 2 * out_p), _mm_unpacklo_epi64(x7, x15)); + _mm_storeu_si128((__m128i *)(out + 3 * out_p), _mm_unpackhi_epi64(x7, x15)); + + x4 = _mm_unpackhi_epi16(x0, x1); + x5 = _mm_unpackhi_epi16(x2, x3); + x12 = _mm_unpackhi_epi16(x8, x9); + x13 = _mm_unpackhi_epi16(x10, x11); + + x6 = _mm_unpacklo_epi32(x4, x5); + x7 = _mm_unpackhi_epi32(x4, x5); + x14 = _mm_unpacklo_epi32(x12, x13); + x15 = _mm_unpackhi_epi32(x12, x13); + + // Store second 4-line result + _mm_storeu_si128((__m128i *)(out + 4 * out_p), _mm_unpacklo_epi64(x6, x14)); + _mm_storeu_si128((__m128i *)(out + 5 * out_p), _mm_unpackhi_epi64(x6, x14)); + _mm_storeu_si128((__m128i *)(out + 6 * out_p), _mm_unpacklo_epi64(x7, x15)); + _mm_storeu_si128((__m128i *)(out + 7 * out_p), _mm_unpackhi_epi64(x7, x15)); +} + +static INLINE void transpose(unsigned char *src[], int in_p, + unsigned char *dst[], int out_p, + int num_8x8_to_transpose) { + int idx8x8 = 0; + __m128i x0, x1, x2, x3, x4, x5, x6, x7; + do { + unsigned char *in = src[idx8x8]; + unsigned char *out = dst[idx8x8]; + + x0 = _mm_loadl_epi64((__m128i *)(in + 0*in_p)); // 00 01 02 03 04 05 06 07 + x1 = _mm_loadl_epi64((__m128i *)(in + 1*in_p)); // 10 11 12 13 14 15 16 17 + // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 + x0 = _mm_unpacklo_epi8(x0, x1); + + x2 = _mm_loadl_epi64((__m128i *)(in + 2*in_p)); // 20 21 22 23 24 25 26 27 + x3 = _mm_loadl_epi64((__m128i *)(in + 3*in_p)); // 30 31 32 33 34 35 36 37 + // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 + x1 = _mm_unpacklo_epi8(x2, x3); + + x4 = _mm_loadl_epi64((__m128i *)(in + 4*in_p)); // 40 41 42 43 44 45 46 47 + x5 = _mm_loadl_epi64((__m128i *)(in + 5*in_p)); // 50 51 52 53 54 55 56 57 + // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 + x2 = _mm_unpacklo_epi8(x4, x5); + + x6 = _mm_loadl_epi64((__m128i *)(in + 6*in_p)); // 60 61 62 63 64 65 66 67 + x7 = _mm_loadl_epi64((__m128i *)(in + 7*in_p)); // 70 71 72 73 74 75 76 77 + // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 + x3 = _mm_unpacklo_epi8(x6, x7); + + // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + x4 = _mm_unpacklo_epi16(x0, x1); + // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 + x5 = _mm_unpacklo_epi16(x2, x3); + // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 + x6 = _mm_unpacklo_epi32(x4, x5); + _mm_storel_pd((double *)(out + 0*out_p), + _mm_castsi128_pd(x6)); // 00 10 20 30 40 50 60 70 + _mm_storeh_pd((double *)(out + 1*out_p), + _mm_castsi128_pd(x6)); // 01 11 21 31 41 51 61 71 + // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 + x7 = _mm_unpackhi_epi32(x4, x5); + _mm_storel_pd((double *)(out + 2*out_p), + _mm_castsi128_pd(x7)); // 02 12 22 32 42 52 62 72 + _mm_storeh_pd((double *)(out + 3*out_p), + _mm_castsi128_pd(x7)); // 03 13 23 33 43 53 63 73 + + // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + x4 = _mm_unpackhi_epi16(x0, x1); + // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77 + x5 = _mm_unpackhi_epi16(x2, x3); + // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 + x6 = _mm_unpacklo_epi32(x4, x5); + _mm_storel_pd((double *)(out + 4*out_p), + _mm_castsi128_pd(x6)); // 04 14 24 34 44 54 64 74 + _mm_storeh_pd((double *)(out + 5*out_p), + _mm_castsi128_pd(x6)); // 05 15 25 35 45 55 65 75 + // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 + x7 = _mm_unpackhi_epi32(x4, x5); + + _mm_storel_pd((double *)(out + 6*out_p), + _mm_castsi128_pd(x7)); // 06 16 26 36 46 56 66 76 + _mm_storeh_pd((double *)(out + 7*out_p), + _mm_castsi128_pd(x7)); // 07 17 27 37 47 57 67 77 + } while (++idx8x8 < num_8x8_to_transpose); +} + +void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 8]); + unsigned char *src[2]; + unsigned char *dst[2]; + + // Transpose 8x16 + transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); + + // Loop filtering + vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, + blimit1, limit1, thresh1); + src[0] = t_dst; + src[1] = t_dst + 8; + dst[0] = s - 4; + dst[1] = s - 4 + p * 8; + + // Transpose back + transpose(src, 16, dst, p, 2); +} + +void vpx_lpf_vertical_8_sse2(unsigned char *s, int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh) { + DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 8]); + unsigned char *src[1]; + unsigned char *dst[1]; + + // Transpose 8x8 + src[0] = s - 4; + dst[0] = t_dst; + + transpose(src, p, dst, 8, 1); + + // Loop filtering + vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh); + + src[0] = t_dst; + dst[0] = s - 4; + + // Transpose back + transpose(src, 8, dst, p, 1); +} + +void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 8]); + unsigned char *src[2]; + unsigned char *dst[2]; + + // Transpose 8x16 + transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); + + // Loop filtering + vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, + blimit1, limit1, thresh1); + src[0] = t_dst; + src[1] = t_dst + 8; + + dst[0] = s - 4; + dst[1] = s - 4 + p * 8; + + // Transpose back + transpose(src, 16, dst, p, 2); +} + +void vpx_lpf_vertical_16_sse2(unsigned char *s, int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh) { + DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 16]); + unsigned char *src[2]; + unsigned char *dst[2]; + + src[0] = s - 8; + src[1] = s; + dst[0] = t_dst; + dst[1] = t_dst + 8 * 8; + + // Transpose 16x8 + transpose(src, p, dst, 8, 2); + + // Loop filtering + vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh); + + src[0] = t_dst; + src[1] = t_dst + 8 * 8; + dst[0] = s - 8; + dst[1] = s; + + // Transpose back + transpose(src, 8, dst, p, 2); +} + +void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p, + const uint8_t *blimit, const uint8_t *limit, + const uint8_t *thresh) { + DECLARE_ALIGNED(16, unsigned char, t_dst[256]); + + // Transpose 16x16 + transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16); + transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); + + // Loop filtering + vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh); + + // Transpose back + transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); + transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * p, p); +} diff --git a/thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h b/thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h new file mode 100644 index 00000000000..536b206876c --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_X86_TXFM_COMMON_SSE2_H_ +#define VPX_DSP_X86_TXFM_COMMON_SSE2_H_ + +#include +#include "vpx/vpx_integer.h" + +#define pair_set_epi16(a, b) \ + _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ + (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a)) + +#define dual_set_epi16(a, b) \ + _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \ + (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a)) + +#define octa_set_epi16(a, b, c, d, e, f, g, h) \ + _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \ + (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h)) + +#endif // VPX_DSP_X86_TXFM_COMMON_SSE2_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c b/thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c new file mode 100644 index 00000000000..422b0fc422d --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/x86/convolve.h" + +#if HAVE_SSE2 +filter8_1dfunction vpx_filter_block1d16_v8_sse2; +filter8_1dfunction vpx_filter_block1d16_h8_sse2; +filter8_1dfunction vpx_filter_block1d8_v8_sse2; +filter8_1dfunction vpx_filter_block1d8_h8_sse2; +filter8_1dfunction vpx_filter_block1d4_v8_sse2; +filter8_1dfunction vpx_filter_block1d4_h8_sse2; +filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2; +filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2; +filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2; +filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2; +filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2; +filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2; + +filter8_1dfunction vpx_filter_block1d16_v2_sse2; +filter8_1dfunction vpx_filter_block1d16_h2_sse2; +filter8_1dfunction vpx_filter_block1d8_v2_sse2; +filter8_1dfunction vpx_filter_block1d8_h2_sse2; +filter8_1dfunction vpx_filter_block1d4_v2_sse2; +filter8_1dfunction vpx_filter_block1d4_h2_sse2; +filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2; +filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2; +filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2; +filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2; +filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2; +filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2; + +// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); +FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); +FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); +FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2); + +// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_2D(, sse2); +FUN_CONV_2D(avg_ , sse2); + +#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2; + +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2; + +// void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src, +// ptrdiff_t src_stride, +// uint8_t *dst, +// ptrdiff_t dst_stride, +// const int16_t *filter_x, +// int x_step_q4, +// const int16_t *filter_y, +// int y_step_q4, +// int w, int h, int bd); +// void vpx_highbd_convolve8_vert_sse2(const uint8_t *src, +// ptrdiff_t src_stride, +// uint8_t *dst, +// ptrdiff_t dst_stride, +// const int16_t *filter_x, +// int x_step_q4, +// const int16_t *filter_y, +// int y_step_q4, +// int w, int h, int bd); +// void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src, +// ptrdiff_t src_stride, +// uint8_t *dst, +// ptrdiff_t dst_stride, +// const int16_t *filter_x, +// int x_step_q4, +// const int16_t *filter_y, +// int y_step_q4, +// int w, int h, int bd); +// void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src, +// ptrdiff_t src_stride, +// uint8_t *dst, +// ptrdiff_t dst_stride, +// const int16_t *filter_x, +// int x_step_q4, +// const int16_t *filter_y, +// int y_step_q4, +// int w, int h, int bd); +HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); +HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); +HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); +HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, + sse2); + +// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h, int bd); +// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h, int bd); +HIGH_FUN_CONV_2D(, sse2); +HIGH_FUN_CONV_2D(avg_ , sse2); +#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 +#endif // HAVE_SSE2 diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm new file mode 100644 index 00000000000..abc02706555 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm @@ -0,0 +1,228 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "third_party/x86inc/x86inc.asm" + +SECTION .text + +%macro convolve_fn 1-2 +%ifidn %1, avg +%define AUX_XMM_REGS 4 +%else +%define AUX_XMM_REGS 0 +%endif +%ifidn %2, highbd +%define pavg pavgw +cglobal %2_convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \ + dst, dst_stride, \ + fx, fxs, fy, fys, w, h, bd +%else +%define pavg pavgb +cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \ + dst, dst_stride, \ + fx, fxs, fy, fys, w, h +%endif + mov r4d, dword wm +%ifidn %2, highbd + shl r4d, 1 + shl srcq, 1 + shl src_strideq, 1 + shl dstq, 1 + shl dst_strideq, 1 +%else + cmp r4d, 4 + je .w4 +%endif + cmp r4d, 8 + je .w8 + cmp r4d, 16 + je .w16 + cmp r4d, 32 + je .w32 +%ifidn %2, highbd + cmp r4d, 64 + je .w64 + + mov r4d, dword hm +.loop128: + movu m0, [srcq] + movu m1, [srcq+16] + movu m2, [srcq+32] + movu m3, [srcq+48] +%ifidn %1, avg + pavg m0, [dstq] + pavg m1, [dstq+16] + pavg m2, [dstq+32] + pavg m3, [dstq+48] +%endif + mova [dstq ], m0 + mova [dstq+16], m1 + mova [dstq+32], m2 + mova [dstq+48], m3 + movu m0, [srcq+64] + movu m1, [srcq+80] + movu m2, [srcq+96] + movu m3, [srcq+112] + add srcq, src_strideq +%ifidn %1, avg + pavg m0, [dstq+64] + pavg m1, [dstq+80] + pavg m2, [dstq+96] + pavg m3, [dstq+112] +%endif + mova [dstq+64], m0 + mova [dstq+80], m1 + mova [dstq+96], m2 + mova [dstq+112], m3 + add dstq, dst_strideq + dec r4d + jnz .loop128 + RET +%endif + +.w64 + mov r4d, dword hm +.loop64: + movu m0, [srcq] + movu m1, [srcq+16] + movu m2, [srcq+32] + movu m3, [srcq+48] + add srcq, src_strideq +%ifidn %1, avg + pavg m0, [dstq] + pavg m1, [dstq+16] + pavg m2, [dstq+32] + pavg m3, [dstq+48] +%endif + mova [dstq ], m0 + mova [dstq+16], m1 + mova [dstq+32], m2 + mova [dstq+48], m3 + add dstq, dst_strideq + dec r4d + jnz .loop64 + RET + +.w32: + mov r4d, dword hm +.loop32: + movu m0, [srcq] + movu m1, [srcq+16] + movu m2, [srcq+src_strideq] + movu m3, [srcq+src_strideq+16] + lea srcq, [srcq+src_strideq*2] +%ifidn %1, avg + pavg m0, [dstq] + pavg m1, [dstq +16] + pavg m2, [dstq+dst_strideq] + pavg m3, [dstq+dst_strideq+16] +%endif + mova [dstq ], m0 + mova [dstq +16], m1 + mova [dstq+dst_strideq ], m2 + mova [dstq+dst_strideq+16], m3 + lea dstq, [dstq+dst_strideq*2] + sub r4d, 2 + jnz .loop32 + RET + +.w16: + mov r4d, dword hm + lea r5q, [src_strideq*3] + lea r6q, [dst_strideq*3] +.loop16: + movu m0, [srcq] + movu m1, [srcq+src_strideq] + movu m2, [srcq+src_strideq*2] + movu m3, [srcq+r5q] + lea srcq, [srcq+src_strideq*4] +%ifidn %1, avg + pavg m0, [dstq] + pavg m1, [dstq+dst_strideq] + pavg m2, [dstq+dst_strideq*2] + pavg m3, [dstq+r6q] +%endif + mova [dstq ], m0 + mova [dstq+dst_strideq ], m1 + mova [dstq+dst_strideq*2], m2 + mova [dstq+r6q ], m3 + lea dstq, [dstq+dst_strideq*4] + sub r4d, 4 + jnz .loop16 + RET + +.w8: + mov r4d, dword hm + lea r5q, [src_strideq*3] + lea r6q, [dst_strideq*3] +.loop8: + movh m0, [srcq] + movh m1, [srcq+src_strideq] + movh m2, [srcq+src_strideq*2] + movh m3, [srcq+r5q] + lea srcq, [srcq+src_strideq*4] +%ifidn %1, avg + movh m4, [dstq] + movh m5, [dstq+dst_strideq] + movh m6, [dstq+dst_strideq*2] + movh m7, [dstq+r6q] + pavg m0, m4 + pavg m1, m5 + pavg m2, m6 + pavg m3, m7 +%endif + movh [dstq ], m0 + movh [dstq+dst_strideq ], m1 + movh [dstq+dst_strideq*2], m2 + movh [dstq+r6q ], m3 + lea dstq, [dstq+dst_strideq*4] + sub r4d, 4 + jnz .loop8 + RET + +%ifnidn %2, highbd +.w4: + mov r4d, dword hm + lea r5q, [src_strideq*3] + lea r6q, [dst_strideq*3] +.loop4: + movd m0, [srcq] + movd m1, [srcq+src_strideq] + movd m2, [srcq+src_strideq*2] + movd m3, [srcq+r5q] + lea srcq, [srcq+src_strideq*4] +%ifidn %1, avg + movd m4, [dstq] + movd m5, [dstq+dst_strideq] + movd m6, [dstq+dst_strideq*2] + movd m7, [dstq+r6q] + pavg m0, m4 + pavg m1, m5 + pavg m2, m6 + pavg m3, m7 +%endif + movd [dstq ], m0 + movd [dstq+dst_strideq ], m1 + movd [dstq+dst_strideq*2], m2 + movd [dstq+r6q ], m3 + lea dstq, [dstq+dst_strideq*4] + sub r4d, 4 + jnz .loop4 + RET +%endif +%endmacro + +INIT_XMM sse2 +convolve_fn copy +convolve_fn avg +%if CONFIG_VP9_HIGHBITDEPTH +convolve_fn copy, highbd +convolve_fn avg, highbd +%endif diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c new file mode 100644 index 00000000000..b7186785379 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c @@ -0,0 +1,605 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Due to a header conflict between math.h and intrinsics includes with ceil() +// in certain configurations under vs9 this include needs to precede +// immintrin.h. + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/x86/convolve.h" +#include "vpx_ports/mem.h" + +// filters for 16_h8 and 16_v8 +DECLARE_ALIGNED(32, static const uint8_t, filt1_global_avx2[32]) = { + 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, + 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 +}; + +DECLARE_ALIGNED(32, static const uint8_t, filt2_global_avx2[32]) = { + 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, + 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 +}; + +DECLARE_ALIGNED(32, static const uint8_t, filt3_global_avx2[32]) = { + 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, + 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 +}; + +DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = { + 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, + 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 +}; + +#if defined(__clang__) +# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \ + (defined(__APPLE__) && \ + ((__clang_major__ == 4 && __clang_minor__ <= 2) || \ + (__clang_major__ == 5 && __clang_minor__ == 0))) + +# define MM256_BROADCASTSI128_SI256(x) \ + _mm_broadcastsi128_si256((__m128i const *)&(x)) +# else // clang > 3.3, and not 5.0 on macosx. +# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) +# endif // clang <= 3.3 +#elif defined(__GNUC__) +# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6) +# define MM256_BROADCASTSI128_SI256(x) \ + _mm_broadcastsi128_si256((__m128i const *)&(x)) +# elif __GNUC__ == 4 && __GNUC_MINOR__ == 7 +# define MM256_BROADCASTSI128_SI256(x) _mm_broadcastsi128_si256(x) +# else // gcc > 4.7 +# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) +# endif // gcc <= 4.6 +#else // !(gcc || clang) +# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) +#endif // __clang__ + +static void vpx_filter_block1d16_h8_avx2(const uint8_t *src_ptr, + ptrdiff_t src_pixels_per_line, + uint8_t *output_ptr, + ptrdiff_t output_pitch, + uint32_t output_height, + const int16_t *filter) { + __m128i filtersReg; + __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg; + __m256i firstFilters, secondFilters, thirdFilters, forthFilters; + __m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3; + __m256i srcReg32b1, srcReg32b2, filtersReg32; + unsigned int i; + ptrdiff_t src_stride, dst_stride; + + // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 + addFilterReg64 = _mm256_set1_epi32((int)0x0400040u); + filtersReg = _mm_loadu_si128((const __m128i *)filter); + // converting the 16 bit (short) to 8 bit (byte) and have the same data + // in both lanes of 128 bit register. + filtersReg =_mm_packs_epi16(filtersReg, filtersReg); + // have the same data in both lanes of a 256 bit register + filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg); + + // duplicate only the first 16 bits (first and second byte) + // across 256 bit register + firstFilters = _mm256_shuffle_epi8(filtersReg32, + _mm256_set1_epi16(0x100u)); + // duplicate only the second 16 bits (third and forth byte) + // across 256 bit register + secondFilters = _mm256_shuffle_epi8(filtersReg32, + _mm256_set1_epi16(0x302u)); + // duplicate only the third 16 bits (fifth and sixth byte) + // across 256 bit register + thirdFilters = _mm256_shuffle_epi8(filtersReg32, + _mm256_set1_epi16(0x504u)); + // duplicate only the forth 16 bits (seventh and eighth byte) + // across 256 bit register + forthFilters = _mm256_shuffle_epi8(filtersReg32, + _mm256_set1_epi16(0x706u)); + + filt1Reg = _mm256_load_si256((__m256i const *)filt1_global_avx2); + filt2Reg = _mm256_load_si256((__m256i const *)filt2_global_avx2); + filt3Reg = _mm256_load_si256((__m256i const *)filt3_global_avx2); + filt4Reg = _mm256_load_si256((__m256i const *)filt4_global_avx2); + + // multiple the size of the source and destination stride by two + src_stride = src_pixels_per_line << 1; + dst_stride = output_pitch << 1; + for (i = output_height; i > 1; i-=2) { + // load the 2 strides of source + srcReg32b1 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr - 3))); + srcReg32b1 = _mm256_inserti128_si256(srcReg32b1, + _mm_loadu_si128((const __m128i *) + (src_ptr+src_pixels_per_line-3)), 1); + + // filter the source buffer + srcRegFilt32b1_1= _mm256_shuffle_epi8(srcReg32b1, filt1Reg); + srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt4Reg); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters); + srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); + + // add and saturate the results together + srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2); + + // filter the source buffer + srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt2Reg); + srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt3Reg); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); + srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); + + // add and saturate the results together + srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, + _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2)); + + // reading 2 strides of the next 16 bytes + // (part of it was being read by earlier read) + srcReg32b2 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + 5))); + srcReg32b2 = _mm256_inserti128_si256(srcReg32b2, + _mm_loadu_si128((const __m128i *) + (src_ptr+src_pixels_per_line+5)), 1); + + // add and saturate the results together + srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, + _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2)); + + // filter the source buffer + srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg); + srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters); + srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); + + // add and saturate the results together + srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2); + + // filter the source buffer + srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt2Reg); + srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b2, filt3Reg); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); + srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); + + // add and saturate the results together + srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, + _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2)); + srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, + _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2)); + + + srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg64); + + srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg64); + + // shift by 7 bit each 16 bit + srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 7); + srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 7); + + // shrink to 8 bit each 16 bits, the first lane contain the first + // convolve result and the second lane contain the second convolve + // result + srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1, + srcRegFilt32b2_1); + + src_ptr+=src_stride; + + // save 16 bytes + _mm_store_si128((__m128i*)output_ptr, + _mm256_castsi256_si128(srcRegFilt32b1_1)); + + // save the next 16 bits + _mm_store_si128((__m128i*)(output_ptr+output_pitch), + _mm256_extractf128_si256(srcRegFilt32b1_1, 1)); + output_ptr+=dst_stride; + } + + // if the number of strides is odd. + // process only 16 bytes + if (i > 0) { + __m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1; + __m128i srcRegFilt2, srcRegFilt3; + + srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); + + // filter the source buffer + srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, + _mm256_castsi256_si128(filt1Reg)); + srcRegFilt2 = _mm_shuffle_epi8(srcReg1, + _mm256_castsi256_si128(filt4Reg)); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, + _mm256_castsi256_si128(firstFilters)); + srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, + _mm256_castsi256_si128(forthFilters)); + + // add and saturate the results together + srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2); + + // filter the source buffer + srcRegFilt3= _mm_shuffle_epi8(srcReg1, + _mm256_castsi256_si128(filt2Reg)); + srcRegFilt2= _mm_shuffle_epi8(srcReg1, + _mm256_castsi256_si128(filt3Reg)); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, + _mm256_castsi256_si128(secondFilters)); + srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, + _mm256_castsi256_si128(thirdFilters)); + + // add and saturate the results together + srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, + _mm_min_epi16(srcRegFilt3, srcRegFilt2)); + + // reading the next 16 bytes + // (part of it was being read by earlier read) + srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5)); + + // add and saturate the results together + srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, + _mm_max_epi16(srcRegFilt3, srcRegFilt2)); + + // filter the source buffer + srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2, + _mm256_castsi256_si128(filt1Reg)); + srcRegFilt2 = _mm_shuffle_epi8(srcReg2, + _mm256_castsi256_si128(filt4Reg)); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, + _mm256_castsi256_si128(firstFilters)); + srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, + _mm256_castsi256_si128(forthFilters)); + + // add and saturate the results together + srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2); + + // filter the source buffer + srcRegFilt3 = _mm_shuffle_epi8(srcReg2, + _mm256_castsi256_si128(filt2Reg)); + srcRegFilt2 = _mm_shuffle_epi8(srcReg2, + _mm256_castsi256_si128(filt3Reg)); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, + _mm256_castsi256_si128(secondFilters)); + srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, + _mm256_castsi256_si128(thirdFilters)); + + // add and saturate the results together + srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, + _mm_min_epi16(srcRegFilt3, srcRegFilt2)); + srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, + _mm_max_epi16(srcRegFilt3, srcRegFilt2)); + + + srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, + _mm256_castsi256_si128(addFilterReg64)); + + srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, + _mm256_castsi256_si128(addFilterReg64)); + + // shift by 7 bit each 16 bit + srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7); + srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7); + + // shrink to 8 bit each 16 bits, the first lane contain the first + // convolve result and the second lane contain the second convolve + // result + srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1); + + // save 16 bytes + _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1); + } +} + +static void vpx_filter_block1d16_v8_avx2(const uint8_t *src_ptr, + ptrdiff_t src_pitch, + uint8_t *output_ptr, + ptrdiff_t out_pitch, + uint32_t output_height, + const int16_t *filter) { + __m128i filtersReg; + __m256i addFilterReg64; + __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5; + __m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10; + __m256i srcReg32b11, srcReg32b12, filtersReg32; + __m256i firstFilters, secondFilters, thirdFilters, forthFilters; + unsigned int i; + ptrdiff_t src_stride, dst_stride; + + // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 + addFilterReg64 = _mm256_set1_epi32((int)0x0400040u); + filtersReg = _mm_loadu_si128((const __m128i *)filter); + // converting the 16 bit (short) to 8 bit (byte) and have the + // same data in both lanes of 128 bit register. + filtersReg =_mm_packs_epi16(filtersReg, filtersReg); + // have the same data in both lanes of a 256 bit register + filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg); + + // duplicate only the first 16 bits (first and second byte) + // across 256 bit register + firstFilters = _mm256_shuffle_epi8(filtersReg32, + _mm256_set1_epi16(0x100u)); + // duplicate only the second 16 bits (third and forth byte) + // across 256 bit register + secondFilters = _mm256_shuffle_epi8(filtersReg32, + _mm256_set1_epi16(0x302u)); + // duplicate only the third 16 bits (fifth and sixth byte) + // across 256 bit register + thirdFilters = _mm256_shuffle_epi8(filtersReg32, + _mm256_set1_epi16(0x504u)); + // duplicate only the forth 16 bits (seventh and eighth byte) + // across 256 bit register + forthFilters = _mm256_shuffle_epi8(filtersReg32, + _mm256_set1_epi16(0x706u)); + + // multiple the size of the source and destination stride by two + src_stride = src_pitch << 1; + dst_stride = out_pitch << 1; + + // load 16 bytes 7 times in stride of src_pitch + srcReg32b1 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr))); + srcReg32b2 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch))); + srcReg32b3 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2))); + srcReg32b4 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3))); + srcReg32b5 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4))); + srcReg32b6 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5))); + srcReg32b7 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6))); + + // have each consecutive loads on the same 256 register + srcReg32b1 = _mm256_inserti128_si256(srcReg32b1, + _mm256_castsi256_si128(srcReg32b2), 1); + srcReg32b2 = _mm256_inserti128_si256(srcReg32b2, + _mm256_castsi256_si128(srcReg32b3), 1); + srcReg32b3 = _mm256_inserti128_si256(srcReg32b3, + _mm256_castsi256_si128(srcReg32b4), 1); + srcReg32b4 = _mm256_inserti128_si256(srcReg32b4, + _mm256_castsi256_si128(srcReg32b5), 1); + srcReg32b5 = _mm256_inserti128_si256(srcReg32b5, + _mm256_castsi256_si128(srcReg32b6), 1); + srcReg32b6 = _mm256_inserti128_si256(srcReg32b6, + _mm256_castsi256_si128(srcReg32b7), 1); + + // merge every two consecutive registers except the last one + srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2); + srcReg32b1 = _mm256_unpackhi_epi8(srcReg32b1, srcReg32b2); + + // save + srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4); + + // save + srcReg32b3 = _mm256_unpackhi_epi8(srcReg32b3, srcReg32b4); + + // save + srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6); + + // save + srcReg32b5 = _mm256_unpackhi_epi8(srcReg32b5, srcReg32b6); + + + for (i = output_height; i > 1; i-=2) { + // load the last 2 loads of 16 bytes and have every two + // consecutive loads in the same 256 bit register + srcReg32b8 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7))); + srcReg32b7 = _mm256_inserti128_si256(srcReg32b7, + _mm256_castsi256_si128(srcReg32b8), 1); + srcReg32b9 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 8))); + srcReg32b8 = _mm256_inserti128_si256(srcReg32b8, + _mm256_castsi256_si128(srcReg32b9), 1); + + // merge every two consecutive registers + // save + srcReg32b4 = _mm256_unpacklo_epi8(srcReg32b7, srcReg32b8); + srcReg32b7 = _mm256_unpackhi_epi8(srcReg32b7, srcReg32b8); + + // multiply 2 adjacent elements with the filter and add the result + srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters); + srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters); + + // add and saturate the results together + srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6); + + // multiply 2 adjacent elements with the filter and add the result + srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters); + srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters); + + // add and saturate the results together + srcReg32b10 = _mm256_adds_epi16(srcReg32b10, + _mm256_min_epi16(srcReg32b8, srcReg32b12)); + srcReg32b10 = _mm256_adds_epi16(srcReg32b10, + _mm256_max_epi16(srcReg32b8, srcReg32b12)); + + // multiply 2 adjacent elements with the filter and add the result + srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters); + srcReg32b6 = _mm256_maddubs_epi16(srcReg32b7, forthFilters); + + srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b6); + + // multiply 2 adjacent elements with the filter and add the result + srcReg32b8 = _mm256_maddubs_epi16(srcReg32b3, secondFilters); + srcReg32b12 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters); + + // add and saturate the results together + srcReg32b1 = _mm256_adds_epi16(srcReg32b1, + _mm256_min_epi16(srcReg32b8, srcReg32b12)); + srcReg32b1 = _mm256_adds_epi16(srcReg32b1, + _mm256_max_epi16(srcReg32b8, srcReg32b12)); + + srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64); + srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64); + + // shift by 7 bit each 16 bit + srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 7); + srcReg32b1 = _mm256_srai_epi16(srcReg32b1, 7); + + // shrink to 8 bit each 16 bits, the first lane contain the first + // convolve result and the second lane contain the second convolve + // result + srcReg32b1 = _mm256_packus_epi16(srcReg32b10, srcReg32b1); + + src_ptr+=src_stride; + + // save 16 bytes + _mm_store_si128((__m128i*)output_ptr, + _mm256_castsi256_si128(srcReg32b1)); + + // save the next 16 bits + _mm_store_si128((__m128i*)(output_ptr+out_pitch), + _mm256_extractf128_si256(srcReg32b1, 1)); + + output_ptr+=dst_stride; + + // save part of the registers for next strides + srcReg32b10 = srcReg32b11; + srcReg32b1 = srcReg32b3; + srcReg32b11 = srcReg32b2; + srcReg32b3 = srcReg32b5; + srcReg32b2 = srcReg32b4; + srcReg32b5 = srcReg32b7; + srcReg32b7 = srcReg32b9; + } + if (i > 0) { + __m128i srcRegFilt1, srcRegFilt3, srcRegFilt4, srcRegFilt5; + __m128i srcRegFilt6, srcRegFilt7, srcRegFilt8; + // load the last 16 bytes + srcRegFilt8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)); + + // merge the last 2 results together + srcRegFilt4 = _mm_unpacklo_epi8( + _mm256_castsi256_si128(srcReg32b7), srcRegFilt8); + srcRegFilt7 = _mm_unpackhi_epi8( + _mm256_castsi256_si128(srcReg32b7), srcRegFilt8); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt1 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b10), + _mm256_castsi256_si128(firstFilters)); + srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, + _mm256_castsi256_si128(forthFilters)); + srcRegFilt3 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b1), + _mm256_castsi256_si128(firstFilters)); + srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, + _mm256_castsi256_si128(forthFilters)); + + // add and saturate the results together + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); + srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, srcRegFilt7); + + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt4 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b11), + _mm256_castsi256_si128(secondFilters)); + srcRegFilt5 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b3), + _mm256_castsi256_si128(secondFilters)); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt6 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b2), + _mm256_castsi256_si128(thirdFilters)); + srcRegFilt7 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b5), + _mm256_castsi256_si128(thirdFilters)); + + // add and saturate the results together + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, + _mm_min_epi16(srcRegFilt4, srcRegFilt6)); + srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, + _mm_min_epi16(srcRegFilt5, srcRegFilt7)); + + // add and saturate the results together + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, + _mm_max_epi16(srcRegFilt4, srcRegFilt6)); + srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, + _mm_max_epi16(srcRegFilt5, srcRegFilt7)); + + + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, + _mm256_castsi256_si128(addFilterReg64)); + srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, + _mm256_castsi256_si128(addFilterReg64)); + + // shift by 7 bit each 16 bit + srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); + srcRegFilt3 = _mm_srai_epi16(srcRegFilt3, 7); + + // shrink to 8 bit each 16 bits, the first lane contain the first + // convolve result and the second lane contain the second convolve + // result + srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt3); + + // save 16 bytes + _mm_store_si128((__m128i*)output_ptr, srcRegFilt1); + } +} + +#if HAVE_AVX2 && HAVE_SSSE3 +filter8_1dfunction vpx_filter_block1d4_v8_ssse3; +#if ARCH_X86_64 +filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3; +filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3; +filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3; +#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_intrin_ssse3 +#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_intrin_ssse3 +#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_intrin_ssse3 +#else // ARCH_X86 +filter8_1dfunction vpx_filter_block1d8_v8_ssse3; +filter8_1dfunction vpx_filter_block1d8_h8_ssse3; +filter8_1dfunction vpx_filter_block1d4_h8_ssse3; +#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_ssse3 +#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_ssse3 +#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_ssse3 +#endif // ARCH_X86_64 +filter8_1dfunction vpx_filter_block1d16_v2_ssse3; +filter8_1dfunction vpx_filter_block1d16_h2_ssse3; +filter8_1dfunction vpx_filter_block1d8_v2_ssse3; +filter8_1dfunction vpx_filter_block1d8_h2_ssse3; +filter8_1dfunction vpx_filter_block1d4_v2_ssse3; +filter8_1dfunction vpx_filter_block1d4_h2_ssse3; +#define vpx_filter_block1d4_v8_avx2 vpx_filter_block1d4_v8_ssse3 +#define vpx_filter_block1d16_v2_avx2 vpx_filter_block1d16_v2_ssse3 +#define vpx_filter_block1d16_h2_avx2 vpx_filter_block1d16_h2_ssse3 +#define vpx_filter_block1d8_v2_avx2 vpx_filter_block1d8_v2_ssse3 +#define vpx_filter_block1d8_h2_avx2 vpx_filter_block1d8_h2_ssse3 +#define vpx_filter_block1d4_v2_avx2 vpx_filter_block1d4_v2_ssse3 +#define vpx_filter_block1d4_h2_avx2 vpx_filter_block1d4_h2_ssse3 +// void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); +FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); + +// void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_2D(, avx2); +#endif // HAVE_AX2 && HAVE_SSSE3 diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c new file mode 100644 index 00000000000..6fd52087c7b --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c @@ -0,0 +1,915 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Due to a header conflict between math.h and intrinsics includes with ceil() +// in certain configurations under vs9 this include needs to precede +// tmmintrin.h. + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/vpx_filter.h" +#include "vpx_dsp/x86/convolve.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" +#include "vpx_ports/emmintrin_compat.h" + +// filters only for the 4_h8 convolution +DECLARE_ALIGNED(16, static const uint8_t, filt1_4_h8[16]) = { + 0, 1, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 5, 6 +}; + +DECLARE_ALIGNED(16, static const uint8_t, filt2_4_h8[16]) = { + 4, 5, 5, 6, 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 9, 10 +}; + +// filters for 8_h8 and 16_h8 +DECLARE_ALIGNED(16, static const uint8_t, filt1_global[16]) = { + 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 +}; + +DECLARE_ALIGNED(16, static const uint8_t, filt2_global[16]) = { + 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 +}; + +DECLARE_ALIGNED(16, static const uint8_t, filt3_global[16]) = { + 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 +}; + +DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = { + 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 +}; + +// These are reused by the avx2 intrinsics. +filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3; +filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3; +filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3; + +void vpx_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_pixels_per_line, + uint8_t *output_ptr, + ptrdiff_t output_pitch, + uint32_t output_height, + const int16_t *filter) { + __m128i firstFilters, secondFilters, shuffle1, shuffle2; + __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; + __m128i addFilterReg64, filtersReg, srcReg, minReg; + unsigned int i; + + // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 + addFilterReg64 =_mm_set1_epi32((int)0x0400040u); + filtersReg = _mm_loadu_si128((const __m128i *)filter); + // converting the 16 bit (short) to 8 bit (byte) and have the same data + // in both lanes of 128 bit register. + filtersReg =_mm_packs_epi16(filtersReg, filtersReg); + + // duplicate only the first 16 bits in the filter into the first lane + firstFilters = _mm_shufflelo_epi16(filtersReg, 0); + // duplicate only the third 16 bit in the filter into the first lane + secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu); + // duplicate only the seconds 16 bits in the filter into the second lane + // firstFilters: k0 k1 k0 k1 k0 k1 k0 k1 k2 k3 k2 k3 k2 k3 k2 k3 + firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u); + // duplicate only the forth 16 bits in the filter into the second lane + // secondFilters: k4 k5 k4 k5 k4 k5 k4 k5 k6 k7 k6 k7 k6 k7 k6 k7 + secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu); + + // loading the local filters + shuffle1 =_mm_load_si128((__m128i const *)filt1_4_h8); + shuffle2 = _mm_load_si128((__m128i const *)filt2_4_h8); + + for (i = 0; i < output_height; i++) { + srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); + + // filter the source buffer + srcRegFilt1= _mm_shuffle_epi8(srcReg, shuffle1); + srcRegFilt2= _mm_shuffle_epi8(srcReg, shuffle2); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); + srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); + + // extract the higher half of the lane + srcRegFilt3 = _mm_srli_si128(srcRegFilt1, 8); + srcRegFilt4 = _mm_srli_si128(srcRegFilt2, 8); + + minReg = _mm_min_epi16(srcRegFilt3, srcRegFilt2); + + // add and saturate all the results together + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); + srcRegFilt3 = _mm_max_epi16(srcRegFilt3, srcRegFilt2); + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3); + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); + + // shift by 7 bit each 16 bits + srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); + + // shrink to 8 bit each 16 bits + srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); + src_ptr+=src_pixels_per_line; + + // save only 4 bytes + *((int*)&output_ptr[0])= _mm_cvtsi128_si32(srcRegFilt1); + + output_ptr+=output_pitch; + } +} + +void vpx_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_pixels_per_line, + uint8_t *output_ptr, + ptrdiff_t output_pitch, + uint32_t output_height, + const int16_t *filter) { + __m128i firstFilters, secondFilters, thirdFilters, forthFilters, srcReg; + __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg; + __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; + __m128i addFilterReg64, filtersReg, minReg; + unsigned int i; + + // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 + addFilterReg64 = _mm_set1_epi32((int)0x0400040u); + filtersReg = _mm_loadu_si128((const __m128i *)filter); + // converting the 16 bit (short) to 8 bit (byte) and have the same data + // in both lanes of 128 bit register. + filtersReg =_mm_packs_epi16(filtersReg, filtersReg); + + // duplicate only the first 16 bits (first and second byte) + // across 128 bit register + firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); + // duplicate only the second 16 bits (third and forth byte) + // across 128 bit register + secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); + // duplicate only the third 16 bits (fifth and sixth byte) + // across 128 bit register + thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); + // duplicate only the forth 16 bits (seventh and eighth byte) + // across 128 bit register + forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); + + filt1Reg = _mm_load_si128((__m128i const *)filt1_global); + filt2Reg = _mm_load_si128((__m128i const *)filt2_global); + filt3Reg = _mm_load_si128((__m128i const *)filt3_global); + filt4Reg = _mm_load_si128((__m128i const *)filt4_global); + + for (i = 0; i < output_height; i++) { + srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); + + // filter the source buffer + srcRegFilt1= _mm_shuffle_epi8(srcReg, filt1Reg); + srcRegFilt2= _mm_shuffle_epi8(srcReg, filt2Reg); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); + srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); + + // filter the source buffer + srcRegFilt3= _mm_shuffle_epi8(srcReg, filt3Reg); + srcRegFilt4= _mm_shuffle_epi8(srcReg, filt4Reg); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, thirdFilters); + srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, forthFilters); + + // add and saturate all the results together + minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3); + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); + + srcRegFilt2= _mm_max_epi16(srcRegFilt2, srcRegFilt3); + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); + + // shift by 7 bit each 16 bits + srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); + + // shrink to 8 bit each 16 bits + srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); + + src_ptr+=src_pixels_per_line; + + // save only 8 bytes + _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1); + + output_ptr+=output_pitch; + } +} + +void vpx_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_pitch, + uint8_t *output_ptr, + ptrdiff_t out_pitch, + uint32_t output_height, + const int16_t *filter) { + __m128i addFilterReg64, filtersReg, minReg; + __m128i firstFilters, secondFilters, thirdFilters, forthFilters; + __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt5; + __m128i srcReg1, srcReg2, srcReg3, srcReg4, srcReg5, srcReg6, srcReg7; + __m128i srcReg8; + unsigned int i; + + // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 + addFilterReg64 = _mm_set1_epi32((int)0x0400040u); + filtersReg = _mm_loadu_si128((const __m128i *)filter); + // converting the 16 bit (short) to 8 bit (byte) and have the same data + // in both lanes of 128 bit register. + filtersReg =_mm_packs_epi16(filtersReg, filtersReg); + + // duplicate only the first 16 bits in the filter + firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); + // duplicate only the second 16 bits in the filter + secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); + // duplicate only the third 16 bits in the filter + thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); + // duplicate only the forth 16 bits in the filter + forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); + + // load the first 7 rows of 8 bytes + srcReg1 = _mm_loadl_epi64((const __m128i *)src_ptr); + srcReg2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch)); + srcReg3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2)); + srcReg4 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3)); + srcReg5 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4)); + srcReg6 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5)); + srcReg7 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)); + + for (i = 0; i < output_height; i++) { + // load the last 8 bytes + srcReg8 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7)); + + // merge the result together + srcRegFilt1 = _mm_unpacklo_epi8(srcReg1, srcReg2); + srcRegFilt3 = _mm_unpacklo_epi8(srcReg3, srcReg4); + + // merge the result together + srcRegFilt2 = _mm_unpacklo_epi8(srcReg5, srcReg6); + srcRegFilt5 = _mm_unpacklo_epi8(srcReg7, srcReg8); + + // multiply 2 adjacent elements with the filter and add the result + srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); + srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); + srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters); + srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, forthFilters); + + // add and saturate the results together + minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3); + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt5); + srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3); + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); + srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); + + // shift by 7 bit each 16 bit + srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); + + // shrink to 8 bit each 16 bits + srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); + + src_ptr+=src_pitch; + + // shift down a row + srcReg1 = srcReg2; + srcReg2 = srcReg3; + srcReg3 = srcReg4; + srcReg4 = srcReg5; + srcReg5 = srcReg6; + srcReg6 = srcReg7; + srcReg7 = srcReg8; + + // save only 8 bytes convolve result + _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1); + + output_ptr+=out_pitch; + } +} + +filter8_1dfunction vpx_filter_block1d16_v8_ssse3; +filter8_1dfunction vpx_filter_block1d16_h8_ssse3; +filter8_1dfunction vpx_filter_block1d8_v8_ssse3; +filter8_1dfunction vpx_filter_block1d8_h8_ssse3; +filter8_1dfunction vpx_filter_block1d4_v8_ssse3; +filter8_1dfunction vpx_filter_block1d4_h8_ssse3; +filter8_1dfunction vpx_filter_block1d16_v8_avg_ssse3; +filter8_1dfunction vpx_filter_block1d16_h8_avg_ssse3; +filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3; +filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3; +filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3; +filter8_1dfunction vpx_filter_block1d4_h8_avg_ssse3; + +filter8_1dfunction vpx_filter_block1d16_v2_ssse3; +filter8_1dfunction vpx_filter_block1d16_h2_ssse3; +filter8_1dfunction vpx_filter_block1d8_v2_ssse3; +filter8_1dfunction vpx_filter_block1d8_h2_ssse3; +filter8_1dfunction vpx_filter_block1d4_v2_ssse3; +filter8_1dfunction vpx_filter_block1d4_h2_ssse3; +filter8_1dfunction vpx_filter_block1d16_v2_avg_ssse3; +filter8_1dfunction vpx_filter_block1d16_h2_avg_ssse3; +filter8_1dfunction vpx_filter_block1d8_v2_avg_ssse3; +filter8_1dfunction vpx_filter_block1d8_h2_avg_ssse3; +filter8_1dfunction vpx_filter_block1d4_v2_avg_ssse3; +filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3; + +// void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3); +FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3); +FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3); +FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, + ssse3); + +#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3, out4, out5, out6, out7) { \ + const __m128i tr0_0 = _mm_unpacklo_epi8(in0, in1); \ + const __m128i tr0_1 = _mm_unpacklo_epi8(in2, in3); \ + const __m128i tr0_2 = _mm_unpacklo_epi8(in4, in5); \ + const __m128i tr0_3 = _mm_unpacklo_epi8(in6, in7); \ + \ + const __m128i tr1_0 = _mm_unpacklo_epi16(tr0_0, tr0_1); \ + const __m128i tr1_1 = _mm_unpackhi_epi16(tr0_0, tr0_1); \ + const __m128i tr1_2 = _mm_unpacklo_epi16(tr0_2, tr0_3); \ + const __m128i tr1_3 = _mm_unpackhi_epi16(tr0_2, tr0_3); \ + \ + const __m128i tr2_0 = _mm_unpacklo_epi32(tr1_0, tr1_2); \ + const __m128i tr2_1 = _mm_unpackhi_epi32(tr1_0, tr1_2); \ + const __m128i tr2_2 = _mm_unpacklo_epi32(tr1_1, tr1_3); \ + const __m128i tr2_3 = _mm_unpackhi_epi32(tr1_1, tr1_3); \ + \ + out0 = _mm_unpacklo_epi64(tr2_0, tr2_0); \ + out1 = _mm_unpackhi_epi64(tr2_0, tr2_0); \ + out2 = _mm_unpacklo_epi64(tr2_1, tr2_1); \ + out3 = _mm_unpackhi_epi64(tr2_1, tr2_1); \ + out4 = _mm_unpacklo_epi64(tr2_2, tr2_2); \ + out5 = _mm_unpackhi_epi64(tr2_2, tr2_2); \ + out6 = _mm_unpacklo_epi64(tr2_3, tr2_3); \ + out7 = _mm_unpackhi_epi64(tr2_3, tr2_3); \ +} + +static void filter_horiz_w8_ssse3(const uint8_t *src_x, ptrdiff_t src_pitch, + uint8_t *dst, const int16_t *x_filter) { + const __m128i k_256 = _mm_set1_epi16(1 << 8); + const __m128i f_values = _mm_load_si128((const __m128i *)x_filter); + // pack and duplicate the filter values + const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); + const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); + const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); + const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); + const __m128i A = _mm_loadl_epi64((const __m128i *)src_x); + const __m128i B = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch)); + const __m128i C = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 2)); + const __m128i D = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 3)); + const __m128i E = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 4)); + const __m128i F = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 5)); + const __m128i G = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 6)); + const __m128i H = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 7)); + // 00 01 10 11 02 03 12 13 04 05 14 15 06 07 16 17 + const __m128i tr0_0 = _mm_unpacklo_epi16(A, B); + // 20 21 30 31 22 23 32 33 24 25 34 35 26 27 36 37 + const __m128i tr0_1 = _mm_unpacklo_epi16(C, D); + // 40 41 50 51 42 43 52 53 44 45 54 55 46 47 56 57 + const __m128i tr0_2 = _mm_unpacklo_epi16(E, F); + // 60 61 70 71 62 63 72 73 64 65 74 75 66 67 76 77 + const __m128i tr0_3 = _mm_unpacklo_epi16(G, H); + // 00 01 10 11 20 21 30 31 02 03 12 13 22 23 32 33 + const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); + // 04 05 14 15 24 25 34 35 06 07 16 17 26 27 36 37 + const __m128i tr1_1 = _mm_unpackhi_epi32(tr0_0, tr0_1); + // 40 41 50 51 60 61 70 71 42 43 52 53 62 63 72 73 + const __m128i tr1_2 = _mm_unpacklo_epi32(tr0_2, tr0_3); + // 44 45 54 55 64 65 74 75 46 47 56 57 66 67 76 77 + const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); + // 00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71 + const __m128i s1s0 = _mm_unpacklo_epi64(tr1_0, tr1_2); + const __m128i s3s2 = _mm_unpackhi_epi64(tr1_0, tr1_2); + const __m128i s5s4 = _mm_unpacklo_epi64(tr1_1, tr1_3); + const __m128i s7s6 = _mm_unpackhi_epi64(tr1_1, tr1_3); + // multiply 2 adjacent elements with the filter and add the result + const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); + const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); + const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); + const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); + // add and saturate the results together + const __m128i min_x2x1 = _mm_min_epi16(x2, x1); + const __m128i max_x2x1 = _mm_max_epi16(x2, x1); + __m128i temp = _mm_adds_epi16(x0, x3); + temp = _mm_adds_epi16(temp, min_x2x1); + temp = _mm_adds_epi16(temp, max_x2x1); + // round and shift by 7 bit each 16 bit + temp = _mm_mulhrs_epi16(temp, k_256); + // shrink to 8 bit each 16 bits + temp = _mm_packus_epi16(temp, temp); + // save only 8 bytes convolve result + _mm_storel_epi64((__m128i*)dst, temp); +} + +static void transpose8x8_to_dst(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride) { + __m128i A, B, C, D, E, F, G, H; + + A = _mm_loadl_epi64((const __m128i *)src); + B = _mm_loadl_epi64((const __m128i *)(src + src_stride)); + C = _mm_loadl_epi64((const __m128i *)(src + src_stride * 2)); + D = _mm_loadl_epi64((const __m128i *)(src + src_stride * 3)); + E = _mm_loadl_epi64((const __m128i *)(src + src_stride * 4)); + F = _mm_loadl_epi64((const __m128i *)(src + src_stride * 5)); + G = _mm_loadl_epi64((const __m128i *)(src + src_stride * 6)); + H = _mm_loadl_epi64((const __m128i *)(src + src_stride * 7)); + + TRANSPOSE_8X8(A, B, C, D, E, F, G, H, + A, B, C, D, E, F, G, H); + + _mm_storel_epi64((__m128i*)dst, A); + _mm_storel_epi64((__m128i*)(dst + dst_stride * 1), B); + _mm_storel_epi64((__m128i*)(dst + dst_stride * 2), C); + _mm_storel_epi64((__m128i*)(dst + dst_stride * 3), D); + _mm_storel_epi64((__m128i*)(dst + dst_stride * 4), E); + _mm_storel_epi64((__m128i*)(dst + dst_stride * 5), F); + _mm_storel_epi64((__m128i*)(dst + dst_stride * 6), G); + _mm_storel_epi64((__m128i*)(dst + dst_stride * 7), H); +} + +static void scaledconvolve_horiz_w8(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *x_filters, + int x0_q4, int x_step_q4, int w, int h) { + DECLARE_ALIGNED(16, uint8_t, temp[8 * 8]); + int x, y, z; + src -= SUBPEL_TAPS / 2 - 1; + + // This function processes 8x8 areas. The intermediate height is not always + // a multiple of 8, so force it to be a multiple of 8 here. + y = h + (8 - (h & 0x7)); + + do { + int x_q4 = x0_q4; + for (x = 0; x < w; x += 8) { + // process 8 src_x steps + for (z = 0; z < 8; ++z) { + const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; + const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; + if (x_q4 & SUBPEL_MASK) { + filter_horiz_w8_ssse3(src_x, src_stride, temp + (z * 8), x_filter); + } else { + int i; + for (i = 0; i < 8; ++i) { + temp[z * 8 + i] = src_x[i * src_stride + 3]; + } + } + x_q4 += x_step_q4; + } + + // transpose the 8x8 filters values back to dst + transpose8x8_to_dst(temp, 8, dst + x, dst_stride); + } + + src += src_stride * 8; + dst += dst_stride * 8; + } while (y -= 8); +} + +static void filter_horiz_w4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, + uint8_t *dst, const int16_t *filter) { + const __m128i k_256 = _mm_set1_epi16(1 << 8); + const __m128i f_values = _mm_load_si128((const __m128i *)filter); + // pack and duplicate the filter values + const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); + const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); + const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); + const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); + const __m128i A = _mm_loadl_epi64((const __m128i *)src_ptr); + const __m128i B = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch)); + const __m128i C = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2)); + const __m128i D = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3)); + // TRANSPOSE... + // 00 01 02 03 04 05 06 07 + // 10 11 12 13 14 15 16 17 + // 20 21 22 23 24 25 26 27 + // 30 31 32 33 34 35 36 37 + // + // TO + // + // 00 10 20 30 + // 01 11 21 31 + // 02 12 22 32 + // 03 13 23 33 + // 04 14 24 34 + // 05 15 25 35 + // 06 16 26 36 + // 07 17 27 37 + // + // 00 01 10 11 02 03 12 13 04 05 14 15 06 07 16 17 + const __m128i tr0_0 = _mm_unpacklo_epi16(A, B); + // 20 21 30 31 22 23 32 33 24 25 34 35 26 27 36 37 + const __m128i tr0_1 = _mm_unpacklo_epi16(C, D); + // 00 01 10 11 20 21 30 31 02 03 12 13 22 23 32 33 + const __m128i s1s0 = _mm_unpacklo_epi32(tr0_0, tr0_1); + // 04 05 14 15 24 25 34 35 06 07 16 17 26 27 36 37 + const __m128i s5s4 = _mm_unpackhi_epi32(tr0_0, tr0_1); + // 02 03 12 13 22 23 32 33 + const __m128i s3s2 = _mm_srli_si128(s1s0, 8); + // 06 07 16 17 26 27 36 37 + const __m128i s7s6 = _mm_srli_si128(s5s4, 8); + // multiply 2 adjacent elements with the filter and add the result + const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); + const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); + const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); + const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); + // add and saturate the results together + const __m128i min_x2x1 = _mm_min_epi16(x2, x1); + const __m128i max_x2x1 = _mm_max_epi16(x2, x1); + __m128i temp = _mm_adds_epi16(x0, x3); + temp = _mm_adds_epi16(temp, min_x2x1); + temp = _mm_adds_epi16(temp, max_x2x1); + // round and shift by 7 bit each 16 bit + temp = _mm_mulhrs_epi16(temp, k_256); + // shrink to 8 bit each 16 bits + temp = _mm_packus_epi16(temp, temp); + // save only 4 bytes + *(int *)dst = _mm_cvtsi128_si32(temp); +} + +static void transpose4x4_to_dst(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride) { + __m128i A = _mm_cvtsi32_si128(*(const int *)src); + __m128i B = _mm_cvtsi32_si128(*(const int *)(src + src_stride)); + __m128i C = _mm_cvtsi32_si128(*(const int *)(src + src_stride * 2)); + __m128i D = _mm_cvtsi32_si128(*(const int *)(src + src_stride * 3)); + // 00 10 01 11 02 12 03 13 + const __m128i tr0_0 = _mm_unpacklo_epi8(A, B); + // 20 30 21 31 22 32 23 33 + const __m128i tr0_1 = _mm_unpacklo_epi8(C, D); + // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + A = _mm_unpacklo_epi16(tr0_0, tr0_1); + B = _mm_srli_si128(A, 4); + C = _mm_srli_si128(A, 8); + D = _mm_srli_si128(A, 12); + + *(int *)(dst) = _mm_cvtsi128_si32(A); + *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(B); + *(int *)(dst + dst_stride * 2) = _mm_cvtsi128_si32(C); + *(int *)(dst + dst_stride * 3) = _mm_cvtsi128_si32(D); +} + +static void scaledconvolve_horiz_w4(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *x_filters, + int x0_q4, int x_step_q4, int w, int h) { + DECLARE_ALIGNED(16, uint8_t, temp[4 * 4]); + int x, y, z; + src -= SUBPEL_TAPS / 2 - 1; + + for (y = 0; y < h; y += 4) { + int x_q4 = x0_q4; + for (x = 0; x < w; x += 4) { + // process 4 src_x steps + for (z = 0; z < 4; ++z) { + const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; + const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; + if (x_q4 & SUBPEL_MASK) { + filter_horiz_w4_ssse3(src_x, src_stride, temp + (z * 4), x_filter); + } else { + int i; + for (i = 0; i < 4; ++i) { + temp[z * 4 + i] = src_x[i * src_stride + 3]; + } + } + x_q4 += x_step_q4; + } + + // transpose the 4x4 filters values back to dst + transpose4x4_to_dst(temp, 4, dst + x, dst_stride); + } + + src += src_stride * 4; + dst += dst_stride * 4; + } +} + +static void filter_vert_w4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, + uint8_t *dst, const int16_t *filter) { + const __m128i k_256 = _mm_set1_epi16(1 << 8); + const __m128i f_values = _mm_load_si128((const __m128i *)filter); + // pack and duplicate the filter values + const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); + const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); + const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); + const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); + const __m128i A = _mm_cvtsi32_si128(*(const int *)src_ptr); + const __m128i B = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch)); + const __m128i C = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 2)); + const __m128i D = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 3)); + const __m128i E = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 4)); + const __m128i F = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 5)); + const __m128i G = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 6)); + const __m128i H = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 7)); + const __m128i s1s0 = _mm_unpacklo_epi8(A, B); + const __m128i s3s2 = _mm_unpacklo_epi8(C, D); + const __m128i s5s4 = _mm_unpacklo_epi8(E, F); + const __m128i s7s6 = _mm_unpacklo_epi8(G, H); + // multiply 2 adjacent elements with the filter and add the result + const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); + const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); + const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); + const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); + // add and saturate the results together + const __m128i min_x2x1 = _mm_min_epi16(x2, x1); + const __m128i max_x2x1 = _mm_max_epi16(x2, x1); + __m128i temp = _mm_adds_epi16(x0, x3); + temp = _mm_adds_epi16(temp, min_x2x1); + temp = _mm_adds_epi16(temp, max_x2x1); + // round and shift by 7 bit each 16 bit + temp = _mm_mulhrs_epi16(temp, k_256); + // shrink to 8 bit each 16 bits + temp = _mm_packus_epi16(temp, temp); + // save only 4 bytes + *(int *)dst = _mm_cvtsi128_si32(temp); +} + +static void scaledconvolve_vert_w4(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *y_filters, + int y0_q4, int y_step_q4, int w, int h) { + int y; + int y_q4 = y0_q4; + + src -= src_stride * (SUBPEL_TAPS / 2 - 1); + for (y = 0; y < h; ++y) { + const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; + const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; + + if (y_q4 & SUBPEL_MASK) { + filter_vert_w4_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter); + } else { + memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); + } + + y_q4 += y_step_q4; + } +} + +static void filter_vert_w8_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, + uint8_t *dst, const int16_t *filter) { + const __m128i k_256 = _mm_set1_epi16(1 << 8); + const __m128i f_values = _mm_load_si128((const __m128i *)filter); + // pack and duplicate the filter values + const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); + const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); + const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); + const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); + const __m128i A = _mm_loadl_epi64((const __m128i *)src_ptr); + const __m128i B = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch)); + const __m128i C = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2)); + const __m128i D = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3)); + const __m128i E = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4)); + const __m128i F = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5)); + const __m128i G = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)); + const __m128i H = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7)); + const __m128i s1s0 = _mm_unpacklo_epi8(A, B); + const __m128i s3s2 = _mm_unpacklo_epi8(C, D); + const __m128i s5s4 = _mm_unpacklo_epi8(E, F); + const __m128i s7s6 = _mm_unpacklo_epi8(G, H); + // multiply 2 adjacent elements with the filter and add the result + const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); + const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); + const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); + const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); + // add and saturate the results together + const __m128i min_x2x1 = _mm_min_epi16(x2, x1); + const __m128i max_x2x1 = _mm_max_epi16(x2, x1); + __m128i temp = _mm_adds_epi16(x0, x3); + temp = _mm_adds_epi16(temp, min_x2x1); + temp = _mm_adds_epi16(temp, max_x2x1); + // round and shift by 7 bit each 16 bit + temp = _mm_mulhrs_epi16(temp, k_256); + // shrink to 8 bit each 16 bits + temp = _mm_packus_epi16(temp, temp); + // save only 8 bytes convolve result + _mm_storel_epi64((__m128i*)dst, temp); +} + +static void scaledconvolve_vert_w8(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *y_filters, + int y0_q4, int y_step_q4, int w, int h) { + int y; + int y_q4 = y0_q4; + + src -= src_stride * (SUBPEL_TAPS / 2 - 1); + for (y = 0; y < h; ++y) { + const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; + const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; + if (y_q4 & SUBPEL_MASK) { + filter_vert_w8_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter); + } else { + memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); + } + y_q4 += y_step_q4; + } +} + +static void filter_vert_w16_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, + uint8_t *dst, const int16_t *filter, int w) { + const __m128i k_256 = _mm_set1_epi16(1 << 8); + const __m128i f_values = _mm_load_si128((const __m128i *)filter); + // pack and duplicate the filter values + const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); + const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); + const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); + const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); + int i; + + for (i = 0; i < w; i += 16) { + const __m128i A = _mm_loadu_si128((const __m128i *)src_ptr); + const __m128i B = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch)); + const __m128i C = + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2)); + const __m128i D = + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3)); + const __m128i E = + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4)); + const __m128i F = + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5)); + const __m128i G = + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)); + const __m128i H = + _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)); + // merge the result together + const __m128i s1s0_lo = _mm_unpacklo_epi8(A, B); + const __m128i s7s6_lo = _mm_unpacklo_epi8(G, H); + const __m128i s1s0_hi = _mm_unpackhi_epi8(A, B); + const __m128i s7s6_hi = _mm_unpackhi_epi8(G, H); + // multiply 2 adjacent elements with the filter and add the result + const __m128i x0_lo = _mm_maddubs_epi16(s1s0_lo, f1f0); + const __m128i x3_lo = _mm_maddubs_epi16(s7s6_lo, f7f6); + const __m128i x0_hi = _mm_maddubs_epi16(s1s0_hi, f1f0); + const __m128i x3_hi = _mm_maddubs_epi16(s7s6_hi, f7f6); + // add and saturate the results together + const __m128i x3x0_lo = _mm_adds_epi16(x0_lo, x3_lo); + const __m128i x3x0_hi = _mm_adds_epi16(x0_hi, x3_hi); + // merge the result together + const __m128i s3s2_lo = _mm_unpacklo_epi8(C, D); + const __m128i s3s2_hi = _mm_unpackhi_epi8(C, D); + // multiply 2 adjacent elements with the filter and add the result + const __m128i x1_lo = _mm_maddubs_epi16(s3s2_lo, f3f2); + const __m128i x1_hi = _mm_maddubs_epi16(s3s2_hi, f3f2); + // merge the result together + const __m128i s5s4_lo = _mm_unpacklo_epi8(E, F); + const __m128i s5s4_hi = _mm_unpackhi_epi8(E, F); + // multiply 2 adjacent elements with the filter and add the result + const __m128i x2_lo = _mm_maddubs_epi16(s5s4_lo, f5f4); + const __m128i x2_hi = _mm_maddubs_epi16(s5s4_hi, f5f4); + // add and saturate the results together + __m128i temp_lo = _mm_adds_epi16(x3x0_lo, _mm_min_epi16(x1_lo, x2_lo)); + __m128i temp_hi = _mm_adds_epi16(x3x0_hi, _mm_min_epi16(x1_hi, x2_hi)); + + // add and saturate the results together + temp_lo = _mm_adds_epi16(temp_lo, _mm_max_epi16(x1_lo, x2_lo)); + temp_hi = _mm_adds_epi16(temp_hi, _mm_max_epi16(x1_hi, x2_hi)); + // round and shift by 7 bit each 16 bit + temp_lo = _mm_mulhrs_epi16(temp_lo, k_256); + temp_hi = _mm_mulhrs_epi16(temp_hi, k_256); + // shrink to 8 bit each 16 bits, the first lane contain the first + // convolve result and the second lane contain the second convolve + // result + temp_hi = _mm_packus_epi16(temp_lo, temp_hi); + src_ptr += 16; + // save 16 bytes convolve result + _mm_store_si128((__m128i*)&dst[i], temp_hi); + } +} + +static void scaledconvolve_vert_w16(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *y_filters, + int y0_q4, int y_step_q4, int w, int h) { + int y; + int y_q4 = y0_q4; + + src -= src_stride * (SUBPEL_TAPS / 2 - 1); + for (y = 0; y < h; ++y) { + const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; + const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; + if (y_q4 & SUBPEL_MASK) { + filter_vert_w16_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter, + w); + } else { + memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); + } + y_q4 += y_step_q4; + } +} + +static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *const x_filters, + int x0_q4, int x_step_q4, + const InterpKernel *const y_filters, + int y0_q4, int y_step_q4, + int w, int h) { + // Note: Fixed size intermediate buffer, temp, places limits on parameters. + // 2d filtering proceeds in 2 steps: + // (1) Interpolate horizontally into an intermediate buffer, temp. + // (2) Interpolate temp vertically to derive the sub-pixel result. + // Deriving the maximum number of rows in the temp buffer (135): + // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). + // --Largest block size is 64x64 pixels. + // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the + // original frame (in 1/16th pixel units). + // --Must round-up because block may be located at sub-pixel position. + // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. + // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. + // --Require an additional 8 rows for the horiz_w8 transpose tail. + DECLARE_ALIGNED(16, uint8_t, temp[(135 + 8) * 64]); + const int intermediate_height = + (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; + + assert(w <= 64); + assert(h <= 64); + assert(y_step_q4 <= 32); + assert(x_step_q4 <= 32); + + if (w >= 8) { + scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1), + src_stride, temp, 64, x_filters, x0_q4, x_step_q4, + w, intermediate_height); + } else { + scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1), + src_stride, temp, 64, x_filters, x0_q4, x_step_q4, + w, intermediate_height); + } + + if (w >= 16) { + scaledconvolve_vert_w16(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, + dst_stride, y_filters, y0_q4, y_step_q4, w, h); + } else if (w == 8) { + scaledconvolve_vert_w8(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, + dst_stride, y_filters, y0_q4, y_step_q4, w, h); + } else { + scaledconvolve_vert_w4(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, + dst_stride, y_filters, y0_q4, y_step_q4, w, h); + } +} + +static const InterpKernel *get_filter_base(const int16_t *filter) { + // NOTE: This assumes that the filter table is 256-byte aligned. + // TODO(agrange) Modify to make independent of table alignment. + return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); +} + +static int get_filter_offset(const int16_t *f, const InterpKernel *base) { + return (int)((const InterpKernel *)(intptr_t)f - base); +} + +void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + const InterpKernel *const filters_x = get_filter_base(filter_x); + const int x0_q4 = get_filter_offset(filter_x, filters_x); + + const InterpKernel *const filters_y = get_filter_base(filter_y); + const int y0_q4 = get_filter_offset(filter_y, filters_y); + + scaledconvolve2d(src, src_stride, dst, dst_stride, + filters_x, x0_q4, x_step_q4, + filters_y, y0_q4, y_step_q4, w, h); +} + +// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +// void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const int16_t *filter_x, int x_step_q4, +// const int16_t *filter_y, int y_step_q4, +// int w, int h); +FUN_CONV_2D(, ssse3); +FUN_CONV_2D(avg_ , ssse3); diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm new file mode 100644 index 00000000000..08f3d6a6cf3 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm @@ -0,0 +1,987 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;Note: tap3 and tap4 have to be applied and added after other taps to avoid +;overflow. + +%macro GET_FILTERS_4 0 + mov rdx, arg(5) ;filter ptr + mov rcx, 0x0400040 + + movdqa xmm7, [rdx] ;load filters + pshuflw xmm0, xmm7, 0b ;k0 + pshuflw xmm1, xmm7, 01010101b ;k1 + pshuflw xmm2, xmm7, 10101010b ;k2 + pshuflw xmm3, xmm7, 11111111b ;k3 + psrldq xmm7, 8 + pshuflw xmm4, xmm7, 0b ;k4 + pshuflw xmm5, xmm7, 01010101b ;k5 + pshuflw xmm6, xmm7, 10101010b ;k6 + pshuflw xmm7, xmm7, 11111111b ;k7 + + punpcklqdq xmm0, xmm1 + punpcklqdq xmm2, xmm3 + punpcklqdq xmm5, xmm4 + punpcklqdq xmm6, xmm7 + + movdqa k0k1, xmm0 + movdqa k2k3, xmm2 + movdqa k5k4, xmm5 + movdqa k6k7, xmm6 + + movq xmm6, rcx + pshufd xmm6, xmm6, 0 + movdqa krd, xmm6 + + pxor xmm7, xmm7 + movdqa zero, xmm7 +%endm + +%macro APPLY_FILTER_4 1 + punpckldq xmm0, xmm1 ;two row in one register + punpckldq xmm6, xmm7 + punpckldq xmm2, xmm3 + punpckldq xmm5, xmm4 + + punpcklbw xmm0, zero ;unpack to word + punpcklbw xmm6, zero + punpcklbw xmm2, zero + punpcklbw xmm5, zero + + pmullw xmm0, k0k1 ;multiply the filter factors + pmullw xmm6, k6k7 + pmullw xmm2, k2k3 + pmullw xmm5, k5k4 + + paddsw xmm0, xmm6 ;sum + movdqa xmm1, xmm0 + psrldq xmm1, 8 + paddsw xmm0, xmm1 + paddsw xmm0, xmm2 + psrldq xmm2, 8 + paddsw xmm0, xmm5 + psrldq xmm5, 8 + paddsw xmm0, xmm2 + paddsw xmm0, xmm5 + + paddsw xmm0, krd ;rounding + psraw xmm0, 7 ;shift + packuswb xmm0, xmm0 ;pack to byte + +%if %1 + movd xmm1, [rdi] + pavgb xmm0, xmm1 +%endif + movd [rdi], xmm0 +%endm + +%macro GET_FILTERS 0 + mov rdx, arg(5) ;filter ptr + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + mov rcx, 0x0400040 + + movdqa xmm7, [rdx] ;load filters + pshuflw xmm0, xmm7, 0b ;k0 + pshuflw xmm1, xmm7, 01010101b ;k1 + pshuflw xmm2, xmm7, 10101010b ;k2 + pshuflw xmm3, xmm7, 11111111b ;k3 + pshufhw xmm4, xmm7, 0b ;k4 + pshufhw xmm5, xmm7, 01010101b ;k5 + pshufhw xmm6, xmm7, 10101010b ;k6 + pshufhw xmm7, xmm7, 11111111b ;k7 + + punpcklwd xmm0, xmm0 + punpcklwd xmm1, xmm1 + punpcklwd xmm2, xmm2 + punpcklwd xmm3, xmm3 + punpckhwd xmm4, xmm4 + punpckhwd xmm5, xmm5 + punpckhwd xmm6, xmm6 + punpckhwd xmm7, xmm7 + + movdqa k0, xmm0 ;store filter factors on stack + movdqa k1, xmm1 + movdqa k2, xmm2 + movdqa k3, xmm3 + movdqa k4, xmm4 + movdqa k5, xmm5 + movdqa k6, xmm6 + movdqa k7, xmm7 + + movq xmm6, rcx + pshufd xmm6, xmm6, 0 + movdqa krd, xmm6 ;rounding + + pxor xmm7, xmm7 + movdqa zero, xmm7 +%endm + +%macro LOAD_VERT_8 1 + movq xmm0, [rsi + %1] ;0 + movq xmm1, [rsi + rax + %1] ;1 + movq xmm6, [rsi + rdx * 2 + %1] ;6 + lea rsi, [rsi + rax] + movq xmm7, [rsi + rdx * 2 + %1] ;7 + movq xmm2, [rsi + rax + %1] ;2 + movq xmm3, [rsi + rax * 2 + %1] ;3 + movq xmm4, [rsi + rdx + %1] ;4 + movq xmm5, [rsi + rax * 4 + %1] ;5 +%endm + +%macro APPLY_FILTER_8 2 + punpcklbw xmm0, zero + punpcklbw xmm1, zero + punpcklbw xmm6, zero + punpcklbw xmm7, zero + punpcklbw xmm2, zero + punpcklbw xmm5, zero + punpcklbw xmm3, zero + punpcklbw xmm4, zero + + pmullw xmm0, k0 + pmullw xmm1, k1 + pmullw xmm6, k6 + pmullw xmm7, k7 + pmullw xmm2, k2 + pmullw xmm5, k5 + pmullw xmm3, k3 + pmullw xmm4, k4 + + paddsw xmm0, xmm1 + paddsw xmm0, xmm6 + paddsw xmm0, xmm7 + paddsw xmm0, xmm2 + paddsw xmm0, xmm5 + paddsw xmm0, xmm3 + paddsw xmm0, xmm4 + + paddsw xmm0, krd ;rounding + psraw xmm0, 7 ;shift + packuswb xmm0, xmm0 ;pack back to byte +%if %1 + movq xmm1, [rdi + %2] + pavgb xmm0, xmm1 +%endif + movq [rdi + %2], xmm0 +%endm + +;void vpx_filter_block1d4_v8_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vpx_filter_block1d4_v8_sse2) PRIVATE +sym(vpx_filter_block1d4_v8_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 6 + %define k0k1 [rsp + 16 * 0] + %define k2k3 [rsp + 16 * 1] + %define k5k4 [rsp + 16 * 2] + %define k6k7 [rsp + 16 * 3] + %define krd [rsp + 16 * 4] + %define zero [rsp + 16 * 5] + + GET_FILTERS_4 + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rbx, DWORD PTR arg(3) ;out_pitch + lea rdx, [rax + rax * 2] + movsxd rcx, DWORD PTR arg(4) ;output_height + +.loop: + movd xmm0, [rsi] ;load src: row 0 + movd xmm1, [rsi + rax] ;1 + movd xmm6, [rsi + rdx * 2] ;6 + lea rsi, [rsi + rax] + movd xmm7, [rsi + rdx * 2] ;7 + movd xmm2, [rsi + rax] ;2 + movd xmm3, [rsi + rax * 2] ;3 + movd xmm4, [rsi + rdx] ;4 + movd xmm5, [rsi + rax * 4] ;5 + + APPLY_FILTER_4 0 + + lea rdi, [rdi + rbx] + dec rcx + jnz .loop + + add rsp, 16 * 6 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vpx_filter_block1d8_v8_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vpx_filter_block1d8_v8_sse2) PRIVATE +sym(vpx_filter_block1d8_v8_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 10 + %define k0 [rsp + 16 * 0] + %define k1 [rsp + 16 * 1] + %define k2 [rsp + 16 * 2] + %define k3 [rsp + 16 * 3] + %define k4 [rsp + 16 * 4] + %define k5 [rsp + 16 * 5] + %define k6 [rsp + 16 * 6] + %define k7 [rsp + 16 * 7] + %define krd [rsp + 16 * 8] + %define zero [rsp + 16 * 9] + + GET_FILTERS + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rbx, DWORD PTR arg(3) ;out_pitch + lea rdx, [rax + rax * 2] + movsxd rcx, DWORD PTR arg(4) ;output_height + +.loop: + LOAD_VERT_8 0 + APPLY_FILTER_8 0, 0 + + lea rdi, [rdi + rbx] + dec rcx + jnz .loop + + add rsp, 16 * 10 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vpx_filter_block1d16_v8_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vpx_filter_block1d16_v8_sse2) PRIVATE +sym(vpx_filter_block1d16_v8_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 10 + %define k0 [rsp + 16 * 0] + %define k1 [rsp + 16 * 1] + %define k2 [rsp + 16 * 2] + %define k3 [rsp + 16 * 3] + %define k4 [rsp + 16 * 4] + %define k5 [rsp + 16 * 5] + %define k6 [rsp + 16 * 6] + %define k7 [rsp + 16 * 7] + %define krd [rsp + 16 * 8] + %define zero [rsp + 16 * 9] + + GET_FILTERS + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rbx, DWORD PTR arg(3) ;out_pitch + lea rdx, [rax + rax * 2] + movsxd rcx, DWORD PTR arg(4) ;output_height + +.loop: + LOAD_VERT_8 0 + APPLY_FILTER_8 0, 0 + sub rsi, rax + + LOAD_VERT_8 8 + APPLY_FILTER_8 0, 8 + add rdi, rbx + + dec rcx + jnz .loop + + add rsp, 16 * 10 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d4_v8_avg_sse2) PRIVATE +sym(vpx_filter_block1d4_v8_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 6 + %define k0k1 [rsp + 16 * 0] + %define k2k3 [rsp + 16 * 1] + %define k5k4 [rsp + 16 * 2] + %define k6k7 [rsp + 16 * 3] + %define krd [rsp + 16 * 4] + %define zero [rsp + 16 * 5] + + GET_FILTERS_4 + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rbx, DWORD PTR arg(3) ;out_pitch + lea rdx, [rax + rax * 2] + movsxd rcx, DWORD PTR arg(4) ;output_height + +.loop: + movd xmm0, [rsi] ;load src: row 0 + movd xmm1, [rsi + rax] ;1 + movd xmm6, [rsi + rdx * 2] ;6 + lea rsi, [rsi + rax] + movd xmm7, [rsi + rdx * 2] ;7 + movd xmm2, [rsi + rax] ;2 + movd xmm3, [rsi + rax * 2] ;3 + movd xmm4, [rsi + rdx] ;4 + movd xmm5, [rsi + rax * 4] ;5 + + APPLY_FILTER_4 1 + + lea rdi, [rdi + rbx] + dec rcx + jnz .loop + + add rsp, 16 * 6 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d8_v8_avg_sse2) PRIVATE +sym(vpx_filter_block1d8_v8_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 10 + %define k0 [rsp + 16 * 0] + %define k1 [rsp + 16 * 1] + %define k2 [rsp + 16 * 2] + %define k3 [rsp + 16 * 3] + %define k4 [rsp + 16 * 4] + %define k5 [rsp + 16 * 5] + %define k6 [rsp + 16 * 6] + %define k7 [rsp + 16 * 7] + %define krd [rsp + 16 * 8] + %define zero [rsp + 16 * 9] + + GET_FILTERS + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rbx, DWORD PTR arg(3) ;out_pitch + lea rdx, [rax + rax * 2] + movsxd rcx, DWORD PTR arg(4) ;output_height +.loop: + LOAD_VERT_8 0 + APPLY_FILTER_8 1, 0 + + lea rdi, [rdi + rbx] + dec rcx + jnz .loop + + add rsp, 16 * 10 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d16_v8_avg_sse2) PRIVATE +sym(vpx_filter_block1d16_v8_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 10 + %define k0 [rsp + 16 * 0] + %define k1 [rsp + 16 * 1] + %define k2 [rsp + 16 * 2] + %define k3 [rsp + 16 * 3] + %define k4 [rsp + 16 * 4] + %define k5 [rsp + 16 * 5] + %define k6 [rsp + 16 * 6] + %define k7 [rsp + 16 * 7] + %define krd [rsp + 16 * 8] + %define zero [rsp + 16 * 9] + + GET_FILTERS + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rbx, DWORD PTR arg(3) ;out_pitch + lea rdx, [rax + rax * 2] + movsxd rcx, DWORD PTR arg(4) ;output_height +.loop: + LOAD_VERT_8 0 + APPLY_FILTER_8 1, 0 + sub rsi, rax + + LOAD_VERT_8 8 + APPLY_FILTER_8 1, 8 + add rdi, rbx + + dec rcx + jnz .loop + + add rsp, 16 * 10 + pop rsp + pop rbx + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vpx_filter_block1d4_h8_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vpx_filter_block1d4_h8_sse2) PRIVATE +sym(vpx_filter_block1d4_h8_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 6 + %define k0k1 [rsp + 16 * 0] + %define k2k3 [rsp + 16 * 1] + %define k5k4 [rsp + 16 * 2] + %define k6k7 [rsp + 16 * 3] + %define krd [rsp + 16 * 4] + %define zero [rsp + 16 * 5] + + GET_FILTERS_4 + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rdx, DWORD PTR arg(3) ;out_pitch + movsxd rcx, DWORD PTR arg(4) ;output_height + +.loop: + movdqu xmm0, [rsi - 3] ;load src + + movdqa xmm1, xmm0 + movdqa xmm6, xmm0 + movdqa xmm7, xmm0 + movdqa xmm2, xmm0 + movdqa xmm3, xmm0 + movdqa xmm5, xmm0 + movdqa xmm4, xmm0 + + psrldq xmm1, 1 + psrldq xmm6, 6 + psrldq xmm7, 7 + psrldq xmm2, 2 + psrldq xmm3, 3 + psrldq xmm5, 5 + psrldq xmm4, 4 + + APPLY_FILTER_4 0 + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx + jnz .loop + + add rsp, 16 * 6 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vpx_filter_block1d8_h8_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vpx_filter_block1d8_h8_sse2) PRIVATE +sym(vpx_filter_block1d8_h8_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 10 + %define k0 [rsp + 16 * 0] + %define k1 [rsp + 16 * 1] + %define k2 [rsp + 16 * 2] + %define k3 [rsp + 16 * 3] + %define k4 [rsp + 16 * 4] + %define k5 [rsp + 16 * 5] + %define k6 [rsp + 16 * 6] + %define k7 [rsp + 16 * 7] + %define krd [rsp + 16 * 8] + %define zero [rsp + 16 * 9] + + GET_FILTERS + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rdx, DWORD PTR arg(3) ;out_pitch + movsxd rcx, DWORD PTR arg(4) ;output_height + +.loop: + movdqu xmm0, [rsi - 3] ;load src + + movdqa xmm1, xmm0 + movdqa xmm6, xmm0 + movdqa xmm7, xmm0 + movdqa xmm2, xmm0 + movdqa xmm5, xmm0 + movdqa xmm3, xmm0 + movdqa xmm4, xmm0 + + psrldq xmm1, 1 + psrldq xmm6, 6 + psrldq xmm7, 7 + psrldq xmm2, 2 + psrldq xmm5, 5 + psrldq xmm3, 3 + psrldq xmm4, 4 + + APPLY_FILTER_8 0, 0 + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx + jnz .loop + + add rsp, 16 * 10 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vpx_filter_block1d16_h8_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; short *filter +;) +global sym(vpx_filter_block1d16_h8_sse2) PRIVATE +sym(vpx_filter_block1d16_h8_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 10 + %define k0 [rsp + 16 * 0] + %define k1 [rsp + 16 * 1] + %define k2 [rsp + 16 * 2] + %define k3 [rsp + 16 * 3] + %define k4 [rsp + 16 * 4] + %define k5 [rsp + 16 * 5] + %define k6 [rsp + 16 * 6] + %define k7 [rsp + 16 * 7] + %define krd [rsp + 16 * 8] + %define zero [rsp + 16 * 9] + + GET_FILTERS + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rdx, DWORD PTR arg(3) ;out_pitch + movsxd rcx, DWORD PTR arg(4) ;output_height + +.loop: + movdqu xmm0, [rsi - 3] ;load src + + movdqa xmm1, xmm0 + movdqa xmm6, xmm0 + movdqa xmm7, xmm0 + movdqa xmm2, xmm0 + movdqa xmm5, xmm0 + movdqa xmm3, xmm0 + movdqa xmm4, xmm0 + + psrldq xmm1, 1 + psrldq xmm6, 6 + psrldq xmm7, 7 + psrldq xmm2, 2 + psrldq xmm5, 5 + psrldq xmm3, 3 + psrldq xmm4, 4 + + APPLY_FILTER_8 0, 0 + + movdqu xmm0, [rsi + 5] ;load src + + movdqa xmm1, xmm0 + movdqa xmm6, xmm0 + movdqa xmm7, xmm0 + movdqa xmm2, xmm0 + movdqa xmm5, xmm0 + movdqa xmm3, xmm0 + movdqa xmm4, xmm0 + + psrldq xmm1, 1 + psrldq xmm6, 6 + psrldq xmm7, 7 + psrldq xmm2, 2 + psrldq xmm5, 5 + psrldq xmm3, 3 + psrldq xmm4, 4 + + APPLY_FILTER_8 0, 8 + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx + jnz .loop + + add rsp, 16 * 10 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d4_h8_avg_sse2) PRIVATE +sym(vpx_filter_block1d4_h8_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 6 + %define k0k1 [rsp + 16 * 0] + %define k2k3 [rsp + 16 * 1] + %define k5k4 [rsp + 16 * 2] + %define k6k7 [rsp + 16 * 3] + %define krd [rsp + 16 * 4] + %define zero [rsp + 16 * 5] + + GET_FILTERS_4 + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rdx, DWORD PTR arg(3) ;out_pitch + movsxd rcx, DWORD PTR arg(4) ;output_height + +.loop: + movdqu xmm0, [rsi - 3] ;load src + + movdqa xmm1, xmm0 + movdqa xmm6, xmm0 + movdqa xmm7, xmm0 + movdqa xmm2, xmm0 + movdqa xmm3, xmm0 + movdqa xmm5, xmm0 + movdqa xmm4, xmm0 + + psrldq xmm1, 1 + psrldq xmm6, 6 + psrldq xmm7, 7 + psrldq xmm2, 2 + psrldq xmm3, 3 + psrldq xmm5, 5 + psrldq xmm4, 4 + + APPLY_FILTER_4 1 + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx + jnz .loop + + add rsp, 16 * 6 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d8_h8_avg_sse2) PRIVATE +sym(vpx_filter_block1d8_h8_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 10 + %define k0 [rsp + 16 * 0] + %define k1 [rsp + 16 * 1] + %define k2 [rsp + 16 * 2] + %define k3 [rsp + 16 * 3] + %define k4 [rsp + 16 * 4] + %define k5 [rsp + 16 * 5] + %define k6 [rsp + 16 * 6] + %define k7 [rsp + 16 * 7] + %define krd [rsp + 16 * 8] + %define zero [rsp + 16 * 9] + + GET_FILTERS + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rdx, DWORD PTR arg(3) ;out_pitch + movsxd rcx, DWORD PTR arg(4) ;output_height + +.loop: + movdqu xmm0, [rsi - 3] ;load src + + movdqa xmm1, xmm0 + movdqa xmm6, xmm0 + movdqa xmm7, xmm0 + movdqa xmm2, xmm0 + movdqa xmm5, xmm0 + movdqa xmm3, xmm0 + movdqa xmm4, xmm0 + + psrldq xmm1, 1 + psrldq xmm6, 6 + psrldq xmm7, 7 + psrldq xmm2, 2 + psrldq xmm5, 5 + psrldq xmm3, 3 + psrldq xmm4, 4 + + APPLY_FILTER_8 1, 0 + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx + jnz .loop + + add rsp, 16 * 10 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d16_h8_avg_sse2) PRIVATE +sym(vpx_filter_block1d16_h8_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 16 * 10 + %define k0 [rsp + 16 * 0] + %define k1 [rsp + 16 * 1] + %define k2 [rsp + 16 * 2] + %define k3 [rsp + 16 * 3] + %define k4 [rsp + 16 * 4] + %define k5 [rsp + 16 * 5] + %define k6 [rsp + 16 * 6] + %define k7 [rsp + 16 * 7] + %define krd [rsp + 16 * 8] + %define zero [rsp + 16 * 9] + + GET_FILTERS + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rdx, DWORD PTR arg(3) ;out_pitch + movsxd rcx, DWORD PTR arg(4) ;output_height + +.loop: + movdqu xmm0, [rsi - 3] ;load src + + movdqa xmm1, xmm0 + movdqa xmm6, xmm0 + movdqa xmm7, xmm0 + movdqa xmm2, xmm0 + movdqa xmm5, xmm0 + movdqa xmm3, xmm0 + movdqa xmm4, xmm0 + + psrldq xmm1, 1 + psrldq xmm6, 6 + psrldq xmm7, 7 + psrldq xmm2, 2 + psrldq xmm5, 5 + psrldq xmm3, 3 + psrldq xmm4, 4 + + APPLY_FILTER_8 1, 0 + + movdqu xmm0, [rsi + 5] ;load src + + movdqa xmm1, xmm0 + movdqa xmm6, xmm0 + movdqa xmm7, xmm0 + movdqa xmm2, xmm0 + movdqa xmm5, xmm0 + movdqa xmm3, xmm0 + movdqa xmm4, xmm0 + + psrldq xmm1, 1 + psrldq xmm6, 6 + psrldq xmm7, 7 + psrldq xmm2, 2 + psrldq xmm5, 5 + psrldq xmm3, 3 + psrldq xmm4, 4 + + APPLY_FILTER_8 1, 8 + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx + jnz .loop + + add rsp, 16 * 10 + pop rsp + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm new file mode 100644 index 00000000000..d2cb8ea2927 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm @@ -0,0 +1,629 @@ +; +; Copyright (c) 2015 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "third_party/x86inc/x86inc.asm" + +SECTION_RODATA +pw_64: times 8 dw 64 + +; %define USE_PMULHRSW +; NOTE: pmulhrsw has a latency of 5 cycles. Tests showed a performance loss +; when using this instruction. +; +; The add order below (based on ffvp9) must be followed to prevent outranges. +; x = k0k1 + k4k5 +; y = k2k3 + k6k7 +; z = signed SAT(x + y) + +SECTION .text +%if ARCH_X86_64 + %define LOCAL_VARS_SIZE 16*4 +%else + %define LOCAL_VARS_SIZE 16*6 +%endif + +%macro SETUP_LOCAL_VARS 0 + ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 + + ; pmaddubsw has a higher latency on some platforms, this might be eased by + ; interleaving the instructions. + %define k0k1 [rsp + 16*0] + %define k2k3 [rsp + 16*1] + %define k4k5 [rsp + 16*2] + %define k6k7 [rsp + 16*3] + packsswb m4, m4 + ; TODO(slavarnway): multiple pshufb instructions had a higher latency on + ; some platforms. + pshuflw m0, m4, 0b ;k0_k1 + pshuflw m1, m4, 01010101b ;k2_k3 + pshuflw m2, m4, 10101010b ;k4_k5 + pshuflw m3, m4, 11111111b ;k6_k7 + punpcklqdq m0, m0 + punpcklqdq m1, m1 + punpcklqdq m2, m2 + punpcklqdq m3, m3 + mova k0k1, m0 + mova k2k3, m1 + mova k4k5, m2 + mova k6k7, m3 +%if ARCH_X86_64 + %define krd m12 + %define tmp m13 + mova krd, [GLOBAL(pw_64)] +%else + %define tmp [rsp + 16*4] + %define krd [rsp + 16*5] +%if CONFIG_PIC=0 + mova m6, [GLOBAL(pw_64)] +%else + ; build constants without accessing global memory + pcmpeqb m6, m6 ;all ones + psrlw m6, 15 + psllw m6, 6 ;aka pw_64 +%endif + mova krd, m6 +%endif +%endm + +%macro HORIZx4_ROW 2 + mova %2, %1 + punpcklbw %1, %1 + punpckhbw %2, %2 + + mova m3, %2 + palignr %2, %1, 1 + palignr m3, %1, 5 + + pmaddubsw %2, k0k1k4k5 + pmaddubsw m3, k2k3k6k7 + mova m4, %2 ;k0k1 + mova m5, m3 ;k2k3 + psrldq %2, 8 ;k4k5 + psrldq m3, 8 ;k6k7 + paddsw %2, m4 + paddsw m5, m3 + paddsw %2, m5 + paddsw %2, krd + psraw %2, 7 + packuswb %2, %2 +%endm + +;------------------------------------------------------------------------------- +%macro SUBPIX_HFILTER4 1 +cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ + src, sstride, dst, dstride, height, filter + mova m4, [filterq] + packsswb m4, m4 +%if ARCH_X86_64 + %define k0k1k4k5 m8 + %define k2k3k6k7 m9 + %define krd m10 + %define orig_height r7d + mova krd, [GLOBAL(pw_64)] + pshuflw k0k1k4k5, m4, 0b ;k0_k1 + pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5 + pshuflw k2k3k6k7, m4, 01010101b ;k2_k3 + pshufhw k2k3k6k7, k2k3k6k7, 11111111b ;k2_k3_k6_k7 +%else + %define k0k1k4k5 [rsp + 16*0] + %define k2k3k6k7 [rsp + 16*1] + %define krd [rsp + 16*2] + %define orig_height [rsp + 16*3] + pshuflw m6, m4, 0b ;k0_k1 + pshufhw m6, m6, 10101010b ;k0_k1_k4_k5 + pshuflw m7, m4, 01010101b ;k2_k3 + pshufhw m7, m7, 11111111b ;k2_k3_k6_k7 +%if CONFIG_PIC=0 + mova m1, [GLOBAL(pw_64)] +%else + ; build constants without accessing global memory + pcmpeqb m1, m1 ;all ones + psrlw m1, 15 + psllw m1, 6 ;aka pw_64 +%endif + mova k0k1k4k5, m6 + mova k2k3k6k7, m7 + mova krd, m1 +%endif + mov orig_height, heightd + shr heightd, 1 +.loop: + ;Do two rows at once + movh m0, [srcq - 3] + movh m1, [srcq + 5] + punpcklqdq m0, m1 + mova m1, m0 + movh m2, [srcq + sstrideq - 3] + movh m3, [srcq + sstrideq + 5] + punpcklqdq m2, m3 + mova m3, m2 + punpcklbw m0, m0 + punpckhbw m1, m1 + punpcklbw m2, m2 + punpckhbw m3, m3 + mova m4, m1 + palignr m4, m0, 1 + pmaddubsw m4, k0k1k4k5 + palignr m1, m0, 5 + pmaddubsw m1, k2k3k6k7 + mova m7, m3 + palignr m7, m2, 1 + pmaddubsw m7, k0k1k4k5 + palignr m3, m2, 5 + pmaddubsw m3, k2k3k6k7 + mova m0, m4 ;k0k1 + mova m5, m1 ;k2k3 + mova m2, m7 ;k0k1 upper + psrldq m4, 8 ;k4k5 + psrldq m1, 8 ;k6k7 + paddsw m4, m0 + paddsw m5, m1 + mova m1, m3 ;k2k3 upper + psrldq m7, 8 ;k4k5 upper + psrldq m3, 8 ;k6k7 upper + paddsw m7, m2 + paddsw m4, m5 + paddsw m1, m3 + paddsw m7, m1 + paddsw m4, krd + psraw m4, 7 + packuswb m4, m4 + paddsw m7, krd + psraw m7, 7 + packuswb m7, m7 + +%ifidn %1, h8_avg + movd m0, [dstq] + pavgb m4, m0 + movd m2, [dstq + dstrideq] + pavgb m7, m2 +%endif + movd [dstq], m4 + movd [dstq + dstrideq], m7 + + lea srcq, [srcq + sstrideq ] + prefetcht0 [srcq + 4 * sstrideq - 3] + lea srcq, [srcq + sstrideq ] + lea dstq, [dstq + 2 * dstrideq ] + prefetcht0 [srcq + 2 * sstrideq - 3] + + dec heightd + jnz .loop + + ; Do last row if output_height is odd + mov heightd, orig_height + and heightd, 1 + je .done + + movh m0, [srcq - 3] ; load src + movh m1, [srcq + 5] + punpcklqdq m0, m1 + + HORIZx4_ROW m0, m1 +%ifidn %1, h8_avg + movd m0, [dstq] + pavgb m1, m0 +%endif + movd [dstq], m1 +.done + RET +%endm + +%macro HORIZx8_ROW 5 + mova %2, %1 + punpcklbw %1, %1 + punpckhbw %2, %2 + + mova %3, %2 + mova %4, %2 + mova %5, %2 + + palignr %2, %1, 1 + palignr %3, %1, 5 + palignr %4, %1, 9 + palignr %5, %1, 13 + + pmaddubsw %2, k0k1 + pmaddubsw %3, k2k3 + pmaddubsw %4, k4k5 + pmaddubsw %5, k6k7 + paddsw %2, %4 + paddsw %5, %3 + paddsw %2, %5 + paddsw %2, krd + psraw %2, 7 + packuswb %2, %2 + SWAP %1, %2 +%endm + +;------------------------------------------------------------------------------- +%macro SUBPIX_HFILTER8 1 +cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \ + src, sstride, dst, dstride, height, filter + mova m4, [filterq] + SETUP_LOCAL_VARS +%if ARCH_X86_64 + %define orig_height r7d +%else + %define orig_height heightmp +%endif + mov orig_height, heightd + shr heightd, 1 + +.loop: + movh m0, [srcq - 3] + movh m3, [srcq + 5] + movh m4, [srcq + sstrideq - 3] + movh m7, [srcq + sstrideq + 5] + punpcklqdq m0, m3 + mova m1, m0 + punpcklbw m0, m0 + punpckhbw m1, m1 + mova m5, m1 + palignr m5, m0, 13 + pmaddubsw m5, k6k7 + mova m2, m1 + mova m3, m1 + palignr m1, m0, 1 + pmaddubsw m1, k0k1 + punpcklqdq m4, m7 + mova m6, m4 + punpcklbw m4, m4 + palignr m2, m0, 5 + punpckhbw m6, m6 + palignr m3, m0, 9 + mova m7, m6 + pmaddubsw m2, k2k3 + pmaddubsw m3, k4k5 + + palignr m7, m4, 13 + mova m0, m6 + palignr m0, m4, 5 + pmaddubsw m7, k6k7 + paddsw m1, m3 + paddsw m2, m5 + paddsw m1, m2 + mova m5, m6 + palignr m6, m4, 1 + pmaddubsw m0, k2k3 + pmaddubsw m6, k0k1 + palignr m5, m4, 9 + paddsw m1, krd + pmaddubsw m5, k4k5 + psraw m1, 7 + paddsw m0, m7 +%ifidn %1, h8_avg + movh m7, [dstq] + movh m2, [dstq + dstrideq] +%endif + packuswb m1, m1 + paddsw m6, m5 + paddsw m6, m0 + paddsw m6, krd + psraw m6, 7 + packuswb m6, m6 +%ifidn %1, h8_avg + pavgb m1, m7 + pavgb m6, m2 +%endif + movh [dstq], m1 + movh [dstq + dstrideq], m6 + + lea srcq, [srcq + sstrideq ] + prefetcht0 [srcq + 4 * sstrideq - 3] + lea srcq, [srcq + sstrideq ] + lea dstq, [dstq + 2 * dstrideq ] + prefetcht0 [srcq + 2 * sstrideq - 3] + dec heightd + jnz .loop + + ;Do last row if output_height is odd + mov heightd, orig_height + and heightd, 1 + je .done + + movh m0, [srcq - 3] + movh m3, [srcq + 5] + punpcklqdq m0, m3 + + HORIZx8_ROW m0, m1, m2, m3, m4 + +%ifidn %1, h8_avg + movh m1, [dstq] + pavgb m0, m1 +%endif + movh [dstq], m0 +.done: + RET +%endm + +;------------------------------------------------------------------------------- +%macro SUBPIX_HFILTER16 1 +cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \ + src, sstride, dst, dstride, height, filter + mova m4, [filterq] + SETUP_LOCAL_VARS +.loop: + prefetcht0 [srcq + 2 * sstrideq -3] + + movh m0, [srcq - 3] + movh m4, [srcq + 5] + movh m6, [srcq + 13] + punpcklqdq m0, m4 + mova m7, m0 + punpckhbw m0, m0 + mova m1, m0 + punpcklqdq m4, m6 + mova m3, m0 + punpcklbw m7, m7 + + palignr m3, m7, 13 + mova m2, m0 + pmaddubsw m3, k6k7 + palignr m0, m7, 1 + pmaddubsw m0, k0k1 + palignr m1, m7, 5 + pmaddubsw m1, k2k3 + palignr m2, m7, 9 + pmaddubsw m2, k4k5 + paddsw m1, m3 + mova m3, m4 + punpckhbw m4, m4 + mova m5, m4 + punpcklbw m3, m3 + mova m7, m4 + palignr m5, m3, 5 + mova m6, m4 + palignr m4, m3, 1 + pmaddubsw m4, k0k1 + pmaddubsw m5, k2k3 + palignr m6, m3, 9 + pmaddubsw m6, k4k5 + palignr m7, m3, 13 + pmaddubsw m7, k6k7 + paddsw m0, m2 + paddsw m0, m1 +%ifidn %1, h8_avg + mova m1, [dstq] +%endif + paddsw m4, m6 + paddsw m5, m7 + paddsw m4, m5 + paddsw m0, krd + paddsw m4, krd + psraw m0, 7 + psraw m4, 7 + packuswb m0, m4 +%ifidn %1, h8_avg + pavgb m0, m1 +%endif + lea srcq, [srcq + sstrideq] + mova [dstq], m0 + lea dstq, [dstq + dstrideq] + dec heightd + jnz .loop + RET +%endm + +INIT_XMM ssse3 +SUBPIX_HFILTER16 h8 +SUBPIX_HFILTER16 h8_avg +SUBPIX_HFILTER8 h8 +SUBPIX_HFILTER8 h8_avg +SUBPIX_HFILTER4 h8 +SUBPIX_HFILTER4 h8_avg + +;------------------------------------------------------------------------------- +%macro SUBPIX_VFILTER 2 +cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ + src, sstride, dst, dstride, height, filter + mova m4, [filterq] + SETUP_LOCAL_VARS +%if ARCH_X86_64 + %define src1q r7 + %define sstride6q r8 + %define dst_stride dstrideq +%else + %define src1q filterq + %define sstride6q dstrideq + %define dst_stride dstridemp +%endif + mov src1q, srcq + add src1q, sstrideq + lea sstride6q, [sstrideq + sstrideq * 4] + add sstride6q, sstrideq ;pitch * 6 + +%ifidn %2, 8 + %define movx movh +%else + %define movx movd +%endif +.loop: + movx m0, [srcq ] ;A + movx m1, [srcq + sstrideq ] ;B + punpcklbw m0, m1 ;A B + movx m2, [srcq + sstrideq * 2 ] ;C + pmaddubsw m0, k0k1 + mova m6, m2 + movx m3, [src1q + sstrideq * 2] ;D + punpcklbw m2, m3 ;C D + pmaddubsw m2, k2k3 + movx m4, [srcq + sstrideq * 4 ] ;E + mova m7, m4 + movx m5, [src1q + sstrideq * 4] ;F + punpcklbw m4, m5 ;E F + pmaddubsw m4, k4k5 + punpcklbw m1, m6 ;A B next iter + movx m6, [srcq + sstride6q ] ;G + punpcklbw m5, m6 ;E F next iter + punpcklbw m3, m7 ;C D next iter + pmaddubsw m5, k4k5 + movx m7, [src1q + sstride6q ] ;H + punpcklbw m6, m7 ;G H + pmaddubsw m6, k6k7 + pmaddubsw m3, k2k3 + pmaddubsw m1, k0k1 + paddsw m0, m4 + paddsw m2, m6 + movx m6, [srcq + sstrideq * 8 ] ;H next iter + punpcklbw m7, m6 + pmaddubsw m7, k6k7 + paddsw m0, m2 + paddsw m0, krd + psraw m0, 7 + paddsw m1, m5 + packuswb m0, m0 + + paddsw m3, m7 + paddsw m1, m3 + paddsw m1, krd + psraw m1, 7 + lea srcq, [srcq + sstrideq * 2 ] + lea src1q, [src1q + sstrideq * 2] + packuswb m1, m1 + +%ifidn %1, v8_avg + movx m2, [dstq] + pavgb m0, m2 +%endif + movx [dstq], m0 + add dstq, dst_stride +%ifidn %1, v8_avg + movx m3, [dstq] + pavgb m1, m3 +%endif + movx [dstq], m1 + add dstq, dst_stride + sub heightd, 2 + cmp heightd, 1 + jg .loop + + cmp heightd, 0 + je .done + + movx m0, [srcq ] ;A + movx m1, [srcq + sstrideq ] ;B + movx m6, [srcq + sstride6q ] ;G + punpcklbw m0, m1 ;A B + movx m7, [src1q + sstride6q ] ;H + pmaddubsw m0, k0k1 + movx m2, [srcq + sstrideq * 2 ] ;C + punpcklbw m6, m7 ;G H + movx m3, [src1q + sstrideq * 2] ;D + pmaddubsw m6, k6k7 + movx m4, [srcq + sstrideq * 4 ] ;E + punpcklbw m2, m3 ;C D + movx m5, [src1q + sstrideq * 4] ;F + punpcklbw m4, m5 ;E F + pmaddubsw m2, k2k3 + pmaddubsw m4, k4k5 + paddsw m2, m6 + paddsw m0, m4 + paddsw m0, m2 + paddsw m0, krd + psraw m0, 7 + packuswb m0, m0 +%ifidn %1, v8_avg + movx m1, [dstq] + pavgb m0, m1 +%endif + movx [dstq], m0 +.done: + RET +%endm + +;------------------------------------------------------------------------------- +%macro SUBPIX_VFILTER16 1 +cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ + src, sstride, dst, dstride, height, filter + mova m4, [filterq] + SETUP_LOCAL_VARS +%if ARCH_X86_64 + %define src1q r7 + %define sstride6q r8 + %define dst_stride dstrideq +%else + %define src1q filterq + %define sstride6q dstrideq + %define dst_stride dstridemp +%endif + mov src1q, srcq + add src1q, sstrideq + lea sstride6q, [sstrideq + sstrideq * 4] + add sstride6q, sstrideq ;pitch * 6 + +.loop: + movh m0, [srcq ] ;A + movh m1, [srcq + sstrideq ] ;B + movh m2, [srcq + sstrideq * 2 ] ;C + movh m3, [src1q + sstrideq * 2] ;D + movh m4, [srcq + sstrideq * 4 ] ;E + movh m5, [src1q + sstrideq * 4] ;F + + punpcklbw m0, m1 ;A B + movh m6, [srcq + sstride6q] ;G + punpcklbw m2, m3 ;C D + movh m7, [src1q + sstride6q] ;H + punpcklbw m4, m5 ;E F + pmaddubsw m0, k0k1 + movh m3, [srcq + 8] ;A + pmaddubsw m2, k2k3 + punpcklbw m6, m7 ;G H + movh m5, [srcq + sstrideq + 8] ;B + pmaddubsw m4, k4k5 + punpcklbw m3, m5 ;A B + movh m7, [srcq + sstrideq * 2 + 8] ;C + pmaddubsw m6, k6k7 + movh m5, [src1q + sstrideq * 2 + 8] ;D + punpcklbw m7, m5 ;C D + paddsw m2, m6 + pmaddubsw m3, k0k1 + movh m1, [srcq + sstrideq * 4 + 8] ;E + paddsw m0, m4 + pmaddubsw m7, k2k3 + movh m6, [src1q + sstrideq * 4 + 8] ;F + punpcklbw m1, m6 ;E F + paddsw m0, m2 + paddsw m0, krd + movh m2, [srcq + sstride6q + 8] ;G + pmaddubsw m1, k4k5 + movh m5, [src1q + sstride6q + 8] ;H + psraw m0, 7 + punpcklbw m2, m5 ;G H + pmaddubsw m2, k6k7 +%ifidn %1, v8_avg + mova m4, [dstq] +%endif + movh [dstq], m0 + paddsw m7, m2 + paddsw m3, m1 + paddsw m3, m7 + paddsw m3, krd + psraw m3, 7 + packuswb m0, m3 + + add srcq, sstrideq + add src1q, sstrideq +%ifidn %1, v8_avg + pavgb m0, m4 +%endif + mova [dstq], m0 + add dstq, dst_stride + dec heightd + jnz .loop + RET +%endm + +INIT_XMM ssse3 +SUBPIX_VFILTER16 v8 +SUBPIX_VFILTER16 v8_avg +SUBPIX_VFILTER v8, 8 +SUBPIX_VFILTER v8_avg, 8 +SUBPIX_VFILTER v8, 4 +SUBPIX_VFILTER v8_avg, 4 diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm new file mode 100644 index 00000000000..a378dd04028 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm @@ -0,0 +1,448 @@ +; +; Copyright (c) 2014 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "vpx_ports/x86_abi_support.asm" + +%macro GET_PARAM_4 0 + mov rdx, arg(5) ;filter ptr + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + mov rcx, 0x0400040 + + movdqa xmm3, [rdx] ;load filters + pshuflw xmm4, xmm3, 11111111b ;k3 + psrldq xmm3, 8 + pshuflw xmm3, xmm3, 0b ;k4 + punpcklqdq xmm4, xmm3 ;k3k4 + + movq xmm3, rcx ;rounding + pshufd xmm3, xmm3, 0 + + pxor xmm2, xmm2 + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rdx, DWORD PTR arg(3) ;out_pitch + movsxd rcx, DWORD PTR arg(4) ;output_height +%endm + +%macro APPLY_FILTER_4 1 + + punpckldq xmm0, xmm1 ;two row in one register + punpcklbw xmm0, xmm2 ;unpack to word + pmullw xmm0, xmm4 ;multiply the filter factors + + movdqa xmm1, xmm0 + psrldq xmm1, 8 + paddsw xmm0, xmm1 + + paddsw xmm0, xmm3 ;rounding + psraw xmm0, 7 ;shift + packuswb xmm0, xmm0 ;pack to byte + +%if %1 + movd xmm1, [rdi] + pavgb xmm0, xmm1 +%endif + + movd [rdi], xmm0 + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx +%endm + +%macro GET_PARAM 0 + mov rdx, arg(5) ;filter ptr + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + mov rcx, 0x0400040 + + movdqa xmm7, [rdx] ;load filters + + pshuflw xmm6, xmm7, 11111111b ;k3 + pshufhw xmm7, xmm7, 0b ;k4 + punpcklwd xmm6, xmm6 + punpckhwd xmm7, xmm7 + + movq xmm4, rcx ;rounding + pshufd xmm4, xmm4, 0 + + pxor xmm5, xmm5 + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rdx, DWORD PTR arg(3) ;out_pitch + movsxd rcx, DWORD PTR arg(4) ;output_height +%endm + +%macro APPLY_FILTER_8 1 + punpcklbw xmm0, xmm5 + punpcklbw xmm1, xmm5 + + pmullw xmm0, xmm6 + pmullw xmm1, xmm7 + paddsw xmm0, xmm1 + paddsw xmm0, xmm4 ;rounding + psraw xmm0, 7 ;shift + packuswb xmm0, xmm0 ;pack back to byte +%if %1 + movq xmm1, [rdi] + pavgb xmm0, xmm1 +%endif + movq [rdi], xmm0 ;store the result + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx +%endm + +%macro APPLY_FILTER_16 1 + punpcklbw xmm0, xmm5 + punpcklbw xmm1, xmm5 + punpckhbw xmm2, xmm5 + punpckhbw xmm3, xmm5 + + pmullw xmm0, xmm6 + pmullw xmm1, xmm7 + pmullw xmm2, xmm6 + pmullw xmm3, xmm7 + + paddsw xmm0, xmm1 + paddsw xmm2, xmm3 + + paddsw xmm0, xmm4 ;rounding + paddsw xmm2, xmm4 + psraw xmm0, 7 ;shift + psraw xmm2, 7 + packuswb xmm0, xmm2 ;pack back to byte +%if %1 + movdqu xmm1, [rdi] + pavgb xmm0, xmm1 +%endif + movdqu [rdi], xmm0 ;store the result + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx +%endm + +global sym(vpx_filter_block1d4_v2_sse2) PRIVATE +sym(vpx_filter_block1d4_v2_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + + GET_PARAM_4 +.loop: + movd xmm0, [rsi] ;load src + movd xmm1, [rsi + rax] + + APPLY_FILTER_4 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d8_v2_sse2) PRIVATE +sym(vpx_filter_block1d8_v2_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movq xmm0, [rsi] ;0 + movq xmm1, [rsi + rax] ;1 + + APPLY_FILTER_8 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d16_v2_sse2) PRIVATE +sym(vpx_filter_block1d16_v2_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;0 + movdqu xmm1, [rsi + rax] ;1 + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + + APPLY_FILTER_16 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d4_v2_avg_sse2) PRIVATE +sym(vpx_filter_block1d4_v2_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + + GET_PARAM_4 +.loop: + movd xmm0, [rsi] ;load src + movd xmm1, [rsi + rax] + + APPLY_FILTER_4 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d8_v2_avg_sse2) PRIVATE +sym(vpx_filter_block1d8_v2_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movq xmm0, [rsi] ;0 + movq xmm1, [rsi + rax] ;1 + + APPLY_FILTER_8 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d16_v2_avg_sse2) PRIVATE +sym(vpx_filter_block1d16_v2_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;0 + movdqu xmm1, [rsi + rax] ;1 + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + + APPLY_FILTER_16 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d4_h2_sse2) PRIVATE +sym(vpx_filter_block1d4_h2_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + + GET_PARAM_4 +.loop: + movdqu xmm0, [rsi] ;load src + movdqa xmm1, xmm0 + psrldq xmm1, 1 + + APPLY_FILTER_4 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d8_h2_sse2) PRIVATE +sym(vpx_filter_block1d8_h2_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;load src + movdqa xmm1, xmm0 + psrldq xmm1, 1 + + APPLY_FILTER_8 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d16_h2_sse2) PRIVATE +sym(vpx_filter_block1d16_h2_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;load src + movdqu xmm1, [rsi + 1] + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + + APPLY_FILTER_16 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d4_h2_avg_sse2) PRIVATE +sym(vpx_filter_block1d4_h2_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + + GET_PARAM_4 +.loop: + movdqu xmm0, [rsi] ;load src + movdqa xmm1, xmm0 + psrldq xmm1, 1 + + APPLY_FILTER_4 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d8_h2_avg_sse2) PRIVATE +sym(vpx_filter_block1d8_h2_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;load src + movdqa xmm1, xmm0 + psrldq xmm1, 1 + + APPLY_FILTER_8 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d16_h2_avg_sse2) PRIVATE +sym(vpx_filter_block1d16_h2_avg_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;load src + movdqu xmm1, [rsi + 1] + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + + APPLY_FILTER_16 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm new file mode 100644 index 00000000000..3c8cfd22537 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm @@ -0,0 +1,422 @@ +; +; Copyright (c) 2014 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "vpx_ports/x86_abi_support.asm" + +%macro GET_PARAM_4 0 + mov rdx, arg(5) ;filter ptr + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + mov rcx, 0x0400040 + + movdqa xmm3, [rdx] ;load filters + psrldq xmm3, 6 + packsswb xmm3, xmm3 + pshuflw xmm3, xmm3, 0b ;k3_k4 + + movq xmm2, rcx ;rounding + pshufd xmm2, xmm2, 0 + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rdx, DWORD PTR arg(3) ;out_pitch + movsxd rcx, DWORD PTR arg(4) ;output_height +%endm + +%macro APPLY_FILTER_4 1 + punpcklbw xmm0, xmm1 + pmaddubsw xmm0, xmm3 + + paddsw xmm0, xmm2 ;rounding + psraw xmm0, 7 ;shift + packuswb xmm0, xmm0 ;pack to byte + +%if %1 + movd xmm1, [rdi] + pavgb xmm0, xmm1 +%endif + movd [rdi], xmm0 + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx +%endm + +%macro GET_PARAM 0 + mov rdx, arg(5) ;filter ptr + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + mov rcx, 0x0400040 + + movdqa xmm7, [rdx] ;load filters + psrldq xmm7, 6 + packsswb xmm7, xmm7 + pshuflw xmm7, xmm7, 0b ;k3_k4 + punpcklwd xmm7, xmm7 + + movq xmm6, rcx ;rounding + pshufd xmm6, xmm6, 0 + + movsxd rax, DWORD PTR arg(1) ;pixels_per_line + movsxd rdx, DWORD PTR arg(3) ;out_pitch + movsxd rcx, DWORD PTR arg(4) ;output_height +%endm + +%macro APPLY_FILTER_8 1 + punpcklbw xmm0, xmm1 + pmaddubsw xmm0, xmm7 + + paddsw xmm0, xmm6 ;rounding + psraw xmm0, 7 ;shift + packuswb xmm0, xmm0 ;pack back to byte + +%if %1 + movq xmm1, [rdi] + pavgb xmm0, xmm1 +%endif + movq [rdi], xmm0 ;store the result + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx +%endm + +%macro APPLY_FILTER_16 1 + punpcklbw xmm0, xmm1 + punpckhbw xmm2, xmm1 + pmaddubsw xmm0, xmm7 + pmaddubsw xmm2, xmm7 + + paddsw xmm0, xmm6 ;rounding + paddsw xmm2, xmm6 + psraw xmm0, 7 ;shift + psraw xmm2, 7 + packuswb xmm0, xmm2 ;pack back to byte + +%if %1 + movdqu xmm1, [rdi] + pavgb xmm0, xmm1 +%endif + movdqu [rdi], xmm0 ;store the result + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + dec rcx +%endm + +global sym(vpx_filter_block1d4_v2_ssse3) PRIVATE +sym(vpx_filter_block1d4_v2_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + + GET_PARAM_4 +.loop: + movd xmm0, [rsi] ;load src + movd xmm1, [rsi + rax] + + APPLY_FILTER_4 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d8_v2_ssse3) PRIVATE +sym(vpx_filter_block1d8_v2_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movq xmm0, [rsi] ;0 + movq xmm1, [rsi + rax] ;1 + + APPLY_FILTER_8 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d16_v2_ssse3) PRIVATE +sym(vpx_filter_block1d16_v2_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;0 + movdqu xmm1, [rsi + rax] ;1 + movdqa xmm2, xmm0 + + APPLY_FILTER_16 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d4_v2_avg_ssse3) PRIVATE +sym(vpx_filter_block1d4_v2_avg_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + + GET_PARAM_4 +.loop: + movd xmm0, [rsi] ;load src + movd xmm1, [rsi + rax] + + APPLY_FILTER_4 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d8_v2_avg_ssse3) PRIVATE +sym(vpx_filter_block1d8_v2_avg_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movq xmm0, [rsi] ;0 + movq xmm1, [rsi + rax] ;1 + + APPLY_FILTER_8 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d16_v2_avg_ssse3) PRIVATE +sym(vpx_filter_block1d16_v2_avg_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;0 + movdqu xmm1, [rsi + rax] ;1 + movdqa xmm2, xmm0 + + APPLY_FILTER_16 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d4_h2_ssse3) PRIVATE +sym(vpx_filter_block1d4_h2_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + + GET_PARAM_4 +.loop: + movdqu xmm0, [rsi] ;load src + movdqa xmm1, xmm0 + psrldq xmm1, 1 + + APPLY_FILTER_4 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d8_h2_ssse3) PRIVATE +sym(vpx_filter_block1d8_h2_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;load src + movdqa xmm1, xmm0 + psrldq xmm1, 1 + + APPLY_FILTER_8 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d16_h2_ssse3) PRIVATE +sym(vpx_filter_block1d16_h2_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;load src + movdqu xmm1, [rsi + 1] + movdqa xmm2, xmm0 + + APPLY_FILTER_16 0 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d4_h2_avg_ssse3) PRIVATE +sym(vpx_filter_block1d4_h2_avg_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + push rsi + push rdi + ; end prolog + + GET_PARAM_4 +.loop: + movdqu xmm0, [rsi] ;load src + movdqa xmm1, xmm0 + psrldq xmm1, 1 + + APPLY_FILTER_4 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d8_h2_avg_ssse3) PRIVATE +sym(vpx_filter_block1d8_h2_avg_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;load src + movdqa xmm1, xmm0 + psrldq xmm1, 1 + + APPLY_FILTER_8 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +global sym(vpx_filter_block1d16_h2_avg_ssse3) PRIVATE +sym(vpx_filter_block1d16_h2_avg_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM 7 + push rsi + push rdi + ; end prolog + + GET_PARAM +.loop: + movdqu xmm0, [rsi] ;load src + movdqu xmm1, [rsi + 1] + movdqa xmm2, xmm0 + + APPLY_FILTER_16 1 + jnz .loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret diff --git a/thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h b/thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h new file mode 100644 index 00000000000..c4dd78550f3 --- /dev/null +++ b/thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ +#define VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ +#include "./vpx_config.h" + +#define ADDRESS_STORAGE_SIZE sizeof(size_t) + +#ifndef DEFAULT_ALIGNMENT +# if defined(VXWORKS) +# define DEFAULT_ALIGNMENT 32 /*default addr alignment to use in +calls to vpx_* functions other +than vpx_memalign*/ +# else +# define DEFAULT_ALIGNMENT (2 * sizeof(void*)) /* NOLINT */ +# endif +#endif + +/*returns an addr aligned to the byte boundary specified by align*/ +#define align_addr(addr,align) (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align)) + +#endif // VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ diff --git a/thirdparty/libvpx/vpx_mem/vpx_mem.c b/thirdparty/libvpx/vpx_mem/vpx_mem.c new file mode 100644 index 00000000000..b261fc0da11 --- /dev/null +++ b/thirdparty/libvpx/vpx_mem/vpx_mem.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_mem.h" +#include +#include +#include +#include "include/vpx_mem_intrnl.h" +#include "vpx/vpx_integer.h" + +void *vpx_memalign(size_t align, size_t size) { + void *addr, + * x = NULL; + + addr = malloc(size + align - 1 + ADDRESS_STORAGE_SIZE); + + if (addr) { + x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align); + /* save the actual malloc address */ + ((size_t *)x)[-1] = (size_t)addr; + } + + return x; +} + +void *vpx_malloc(size_t size) { + return vpx_memalign(DEFAULT_ALIGNMENT, size); +} + +void *vpx_calloc(size_t num, size_t size) { + void *x; + + x = vpx_memalign(DEFAULT_ALIGNMENT, num * size); + + if (x) + memset(x, 0, num * size); + + return x; +} + +void *vpx_realloc(void *memblk, size_t size) { + void *addr, + * new_addr = NULL; + int align = DEFAULT_ALIGNMENT; + + /* + The realloc() function changes the size of the object pointed to by + ptr to the size specified by size, and returns a pointer to the + possibly moved block. The contents are unchanged up to the lesser + of the new and old sizes. If ptr is null, realloc() behaves like + malloc() for the specified size. If size is zero (0) and ptr is + not a null pointer, the object pointed to is freed. + */ + if (!memblk) + new_addr = vpx_malloc(size); + else if (!size) + vpx_free(memblk); + else { + addr = (void *)(((size_t *)memblk)[-1]); + memblk = NULL; + + new_addr = realloc(addr, size + align + ADDRESS_STORAGE_SIZE); + + if (new_addr) { + addr = new_addr; + new_addr = (void *)(((size_t) + ((unsigned char *)new_addr + ADDRESS_STORAGE_SIZE) + (align - 1)) & + (size_t) - align); + /* save the actual malloc address */ + ((size_t *)new_addr)[-1] = (size_t)addr; + } + } + + return new_addr; +} + +void vpx_free(void *memblk) { + if (memblk) { + void *addr = (void *)(((size_t *)memblk)[-1]); + free(addr); + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +void *vpx_memset16(void *dest, int val, size_t length) { + size_t i; + uint16_t *dest16 = (uint16_t *)dest; + for (i = 0; i < length; i++) + *dest16++ = val; + return dest; +} +#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_mem/vpx_mem.h b/thirdparty/libvpx/vpx_mem/vpx_mem.h new file mode 100644 index 00000000000..a006e0f00b3 --- /dev/null +++ b/thirdparty/libvpx/vpx_mem/vpx_mem.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VPX_MEM_VPX_MEM_H_ +#define VPX_MEM_VPX_MEM_H_ + +#include "vpx_config.h" +#if defined(__uClinux__) +# include +#endif + +#include +#include + +#if defined(__cplusplus) +extern "C" { +#endif + + void *vpx_memalign(size_t align, size_t size); + void *vpx_malloc(size_t size); + void *vpx_calloc(size_t num, size_t size); + void *vpx_realloc(void *memblk, size_t size); + void vpx_free(void *memblk); + +#if CONFIG_VP9_HIGHBITDEPTH + void *vpx_memset16(void *dest, int val, size_t length); +#endif + +#include + +#ifdef VPX_MEM_PLTFRM +# include VPX_MEM_PLTFRM +#endif + +#if defined(__cplusplus) +} +#endif + +#endif // VPX_MEM_VPX_MEM_H_ diff --git a/thirdparty/libvpx/vpx_ports/arm.h b/thirdparty/libvpx/vpx_ports/arm.h new file mode 100644 index 00000000000..42c98f5a838 --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/arm.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VPX_PORTS_ARM_H_ +#define VPX_PORTS_ARM_H_ +#include +#include "vpx_config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/*ARMv5TE "Enhanced DSP" instructions.*/ +#define HAS_EDSP 0x01 +/*ARMv6 "Parallel" or "Media" instructions.*/ +#define HAS_MEDIA 0x02 +/*ARMv7 optional NEON instructions.*/ +#define HAS_NEON 0x04 + +int arm_cpu_caps(void); + +// Earlier gcc compilers have issues with some neon intrinsics +#if !defined(__clang__) && defined(__GNUC__) && \ + __GNUC__ == 4 && __GNUC_MINOR__ <= 6 +#define VPX_INCOMPATIBLE_GCC +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_PORTS_ARM_H_ + diff --git a/thirdparty/libvpx/vpx_ports/arm_cpudetect.c b/thirdparty/libvpx/vpx_ports/arm_cpudetect.c new file mode 100644 index 00000000000..8a4b8af964f --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/arm_cpudetect.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include "vpx_ports/arm.h" +#include "./vpx_config.h" + +#ifdef WINAPI_FAMILY +#include +#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +#define getenv(x) NULL +#endif +#endif + +static int arm_cpu_env_flags(int *flags) { + char *env; + env = getenv("VPX_SIMD_CAPS"); + if (env && *env) { + *flags = (int)strtol(env, NULL, 0); + return 0; + } + *flags = 0; + return -1; +} + +static int arm_cpu_env_mask(void) { + char *env; + env = getenv("VPX_SIMD_CAPS_MASK"); + return env && *env ? (int)strtol(env, NULL, 0) : ~0; +} + +#if !CONFIG_RUNTIME_CPU_DETECT + +int arm_cpu_caps(void) { + /* This function should actually be a no-op. There is no way to adjust any of + * these because the RTCD tables do not exist: the functions are called + * statically */ + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); +#if HAVE_MEDIA + flags |= HAS_MEDIA; +#endif /* HAVE_MEDIA */ +#if HAVE_NEON || HAVE_NEON_ASM + flags |= HAS_NEON; +#endif /* HAVE_NEON || HAVE_NEON_ASM */ + return flags & mask; +} + +#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ +/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ +#define WIN32_LEAN_AND_MEAN +#define WIN32_EXTRA_LEAN +#include + +int arm_cpu_caps(void) { + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); + /* MSVC has no inline __asm support for ARM, but it does let you __emit + * instructions via their assembled hex code. + * All of these instructions should be essentially nops. + */ +#if HAVE_MEDIA + if (mask & HAS_MEDIA) + __try { + /*SHADD8 r3,r3,r3*/ + __emit(0xE6333F93); + flags |= HAS_MEDIA; + } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { + /*Ignore exception.*/ + } +} +#endif /* HAVE_MEDIA */ +#if HAVE_NEON || HAVE_NEON_ASM +if (mask &HAS_NEON) { + __try { + /*VORR q0,q0,q0*/ + __emit(0xF2200150); + flags |= HAS_NEON; + } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { + /*Ignore exception.*/ + } +} +#endif /* HAVE_NEON || HAVE_NEON_ASM */ +return flags & mask; +} + +#elif defined(__ANDROID__) /* end _MSC_VER */ +#include + +int arm_cpu_caps(void) { + int flags; + int mask; + uint64_t features; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); + features = android_getCpuFeatures(); + +#if HAVE_MEDIA + flags |= HAS_MEDIA; +#endif /* HAVE_MEDIA */ +#if HAVE_NEON || HAVE_NEON_ASM + if (features & ANDROID_CPU_ARM_FEATURE_NEON) + flags |= HAS_NEON; +#endif /* HAVE_NEON || HAVE_NEON_ASM */ + return flags & mask; +} + +#elif defined(__linux__) /* end __ANDROID__ */ + +#include + +int arm_cpu_caps(void) { + FILE *fin; + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); + /* Reading /proc/self/auxv would be easier, but that doesn't work reliably + * on Android. + * This also means that detection will fail in Scratchbox. + */ + fin = fopen("/proc/cpuinfo", "r"); + if (fin != NULL) { + /* 512 should be enough for anybody (it's even enough for all the flags + * that x86 has accumulated... so far). + */ + char buf[512]; + while (fgets(buf, 511, fin) != NULL) { +#if HAVE_NEON || HAVE_NEON_ASM + if (memcmp(buf, "Features", 8) == 0) { + char *p; + p = strstr(buf, " neon"); + if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { + flags |= HAS_NEON; + } + } +#endif /* HAVE_NEON || HAVE_NEON_ASM */ +#if HAVE_MEDIA + if (memcmp(buf, "CPU architecture:", 17) == 0) { + int version; + version = atoi(buf + 17); + if (version >= 6) { + flags |= HAS_MEDIA; + } + } +#endif /* HAVE_MEDIA */ + } + fclose(fin); + } + return flags & mask; +} +#else /* end __linux__ */ +#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \ +"available for your platform. Reconfigure with --disable-runtime-cpu-detect." +#endif diff --git a/thirdparty/libvpx/vpx_ports/bitops.h b/thirdparty/libvpx/vpx_ports/bitops.h new file mode 100644 index 00000000000..84ff3659fee --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/bitops.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_PORTS_BITOPS_H_ +#define VPX_PORTS_BITOPS_H_ + +#include + +#include "vpx_ports/msvc.h" + +#ifdef _MSC_VER +# include // the ceil() definition must precede intrin.h +# if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86)) +# include +# define USE_MSC_INTRINSICS +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// These versions of get_msb() are only valid when n != 0 because all +// of the optimized versions are undefined when n == 0: +// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html + +// use GNU builtins where available. +#if defined(__GNUC__) && \ + ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) +static INLINE int get_msb(unsigned int n) { + assert(n != 0); + return 31 ^ __builtin_clz(n); +} +#elif defined(USE_MSC_INTRINSICS) +#pragma intrinsic(_BitScanReverse) + +static INLINE int get_msb(unsigned int n) { + unsigned long first_set_bit; + assert(n != 0); + _BitScanReverse(&first_set_bit, n); + return first_set_bit; +} +#undef USE_MSC_INTRINSICS +#else +// Returns (int)floor(log2(n)). n must be > 0. +static INLINE int get_msb(unsigned int n) { + int log = 0; + unsigned int value = n; + int i; + + assert(n != 0); + + for (i = 4; i >= 0; --i) { + const int shift = (1 << i); + const unsigned int x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } + } + return log; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_PORTS_BITOPS_H_ diff --git a/thirdparty/libvpx/vpx_ports/config.h b/thirdparty/libvpx/vpx_ports/config.h new file mode 100644 index 00000000000..3c1ab99f4aa --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/config.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_PORTS_CONFIG_H_ +#define VPX_PORTS_CONFIG_H_ + +#include "vpx_config.h" + +#endif // VPX_PORTS_CONFIG_H_ diff --git a/thirdparty/libvpx/vpx_ports/emmintrin_compat.h b/thirdparty/libvpx/vpx_ports/emmintrin_compat.h new file mode 100644 index 00000000000..16176383d29 --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/emmintrin_compat.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_PORTS_EMMINTRIN_COMPAT_H_ +#define VPX_PORTS_EMMINTRIN_COMPAT_H_ + +#if defined(__GNUC__) && __GNUC__ < 4 +/* From emmintrin.h (gcc 4.5.3) */ +/* Casts between various SP, DP, INT vector types. Note that these do no + conversion of values, they just change the type. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_ps(__m128d __A) +{ + return (__m128) __A; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_si128(__m128d __A) +{ + return (__m128i) __A; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_pd(__m128 __A) +{ + return (__m128d) __A; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_si128(__m128 __A) +{ + return (__m128i) __A; +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_ps(__m128i __A) +{ + return (__m128) __A; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_pd(__m128i __A) +{ + return (__m128d) __A; +} +#endif + +#endif // VPX_PORTS_EMMINTRIN_COMPAT_H_ diff --git a/thirdparty/libvpx/vpx_ports/emms.asm b/thirdparty/libvpx/vpx_ports/emms.asm new file mode 100644 index 00000000000..db8da287375 --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/emms.asm @@ -0,0 +1,38 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +section .text +global sym(vpx_reset_mmx_state) PRIVATE +sym(vpx_reset_mmx_state): + emms + ret + + +%if LIBVPX_YASM_WIN64 +global sym(vpx_winx64_fldcw) PRIVATE +sym(vpx_winx64_fldcw): + sub rsp, 8 + mov [rsp], rcx ; win x64 specific + fldcw [rsp] + add rsp, 8 + ret + + +global sym(vpx_winx64_fstcw) PRIVATE +sym(vpx_winx64_fstcw): + sub rsp, 8 + fstcw [rsp] + mov rax, [rsp] + add rsp, 8 + ret +%endif diff --git a/thirdparty/libvpx/vpx_ports/mem.h b/thirdparty/libvpx/vpx_ports/mem.h new file mode 100644 index 00000000000..7502f906325 --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/mem.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VPX_PORTS_MEM_H_ +#define VPX_PORTS_MEM_H_ + +#include "vpx_config.h" +#include "vpx/vpx_integer.h" + +#if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C) +#define DECLARE_ALIGNED(n,typ,val) typ val __attribute__ ((aligned (n))) +#elif defined(_MSC_VER) +#define DECLARE_ALIGNED(n,typ,val) __declspec(align(n)) typ val +#else +#warning No alignment directives known for this compiler. +#define DECLARE_ALIGNED(n,typ,val) typ val +#endif + +/* Indicates that the usage of the specified variable has been audited to assure + * that it's safe to use uninitialized. Silences 'may be used uninitialized' + * warnings on gcc. + */ +#if defined(__GNUC__) && __GNUC__ +#define UNINITIALIZED_IS_SAFE(x) x=x +#else +#define UNINITIALIZED_IS_SAFE(x) x +#endif + +#if HAVE_NEON && defined(_MSC_VER) +#define __builtin_prefetch(x) +#endif + +/* Shift down with rounding */ +#define ROUND_POWER_OF_TWO(value, n) \ + (((value) + (1 << ((n) - 1))) >> (n)) + +#define ALIGN_POWER_OF_TWO(value, n) \ + (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) + +#if CONFIG_VP9_HIGHBITDEPTH +#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1)) +#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1)) +#endif // CONFIG_VP9_HIGHBITDEPTH + +#endif // VPX_PORTS_MEM_H_ diff --git a/thirdparty/libvpx/vpx_ports/mem_ops.h b/thirdparty/libvpx/vpx_ports/mem_ops.h new file mode 100644 index 00000000000..620df31b223 --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/mem_ops.h @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_PORTS_MEM_OPS_H_ +#define VPX_PORTS_MEM_OPS_H_ + +/* \file + * \brief Provides portable memory access primitives + * + * This function provides portable primitives for getting and setting of + * signed and unsigned integers in 16, 24, and 32 bit sizes. The operations + * can be performed on unaligned data regardless of hardware support for + * unaligned accesses. + * + * The type used to pass the integral values may be changed by defining + * MEM_VALUE_T with the appropriate type. The type given must be an integral + * numeric type. + * + * The actual functions instantiated have the MEM_VALUE_T type name pasted + * on to the symbol name. This allows the developer to instantiate these + * operations for multiple types within the same translation unit. This is + * of somewhat questionable utility, but the capability exists nonetheless. + * Users not making use of this functionality should call the functions + * without the type name appended, and the preprocessor will take care of + * it. + * + * NOTE: This code is not supported on platforms where char > 1 octet ATM. + */ + +#ifndef MAU_T +/* Minimum Access Unit for this target */ +#define MAU_T unsigned char +#endif + +#ifndef MEM_VALUE_T +#define MEM_VALUE_T int +#endif + +#undef MEM_VALUE_T_SZ_BITS +#define MEM_VALUE_T_SZ_BITS (sizeof(MEM_VALUE_T) << 3) + +#undef mem_ops_wrap_symbol +#define mem_ops_wrap_symbol(fn) mem_ops_wrap_symbol2(fn, MEM_VALUE_T) +#undef mem_ops_wrap_symbol2 +#define mem_ops_wrap_symbol2(fn,typ) mem_ops_wrap_symbol3(fn,typ) +#undef mem_ops_wrap_symbol3 +#define mem_ops_wrap_symbol3(fn,typ) fn##_as_##typ + +/* + * Include aligned access routines + */ +#define INCLUDED_BY_MEM_OPS_H +#include "mem_ops_aligned.h" +#undef INCLUDED_BY_MEM_OPS_H + +#undef mem_get_be16 +#define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16) +static unsigned MEM_VALUE_T mem_get_be16(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = mem[0] << 8; + val |= mem[1]; + return val; +} + +#undef mem_get_be24 +#define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24) +static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = mem[0] << 16; + val |= mem[1] << 8; + val |= mem[2]; + return val; +} + +#undef mem_get_be32 +#define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32) +static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = ((unsigned MEM_VALUE_T)mem[0]) << 24; + val |= mem[1] << 16; + val |= mem[2] << 8; + val |= mem[3]; + return val; +} + +#undef mem_get_le16 +#define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16) +static unsigned MEM_VALUE_T mem_get_le16(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = mem[1] << 8; + val |= mem[0]; + return val; +} + +#undef mem_get_le24 +#define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24) +static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = mem[2] << 16; + val |= mem[1] << 8; + val |= mem[0]; + return val; +} + +#undef mem_get_le32 +#define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32) +static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = ((unsigned MEM_VALUE_T)mem[3]) << 24; + val |= mem[2] << 16; + val |= mem[1] << 8; + val |= mem[0]; + return val; +} + +#define mem_get_s_generic(end,sz) \ + static VPX_INLINE signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\ + const MAU_T *mem = (const MAU_T*)vmem;\ + signed MEM_VALUE_T val = mem_get_##end##sz(mem);\ + return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\ + } + +#undef mem_get_sbe16 +#define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16) +mem_get_s_generic(be, 16) + +#undef mem_get_sbe24 +#define mem_get_sbe24 mem_ops_wrap_symbol(mem_get_sbe24) +mem_get_s_generic(be, 24) + +#undef mem_get_sbe32 +#define mem_get_sbe32 mem_ops_wrap_symbol(mem_get_sbe32) +mem_get_s_generic(be, 32) + +#undef mem_get_sle16 +#define mem_get_sle16 mem_ops_wrap_symbol(mem_get_sle16) +mem_get_s_generic(le, 16) + +#undef mem_get_sle24 +#define mem_get_sle24 mem_ops_wrap_symbol(mem_get_sle24) +mem_get_s_generic(le, 24) + +#undef mem_get_sle32 +#define mem_get_sle32 mem_ops_wrap_symbol(mem_get_sle32) +mem_get_s_generic(le, 32) + +#undef mem_put_be16 +#define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16) +static VPX_INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; + + mem[0] = (MAU_T)((val >> 8) & 0xff); + mem[1] = (MAU_T)((val >> 0) & 0xff); +} + +#undef mem_put_be24 +#define mem_put_be24 mem_ops_wrap_symbol(mem_put_be24) +static VPX_INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; + + mem[0] = (MAU_T)((val >> 16) & 0xff); + mem[1] = (MAU_T)((val >> 8) & 0xff); + mem[2] = (MAU_T)((val >> 0) & 0xff); +} + +#undef mem_put_be32 +#define mem_put_be32 mem_ops_wrap_symbol(mem_put_be32) +static VPX_INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; + + mem[0] = (MAU_T)((val >> 24) & 0xff); + mem[1] = (MAU_T)((val >> 16) & 0xff); + mem[2] = (MAU_T)((val >> 8) & 0xff); + mem[3] = (MAU_T)((val >> 0) & 0xff); +} + +#undef mem_put_le16 +#define mem_put_le16 mem_ops_wrap_symbol(mem_put_le16) +static VPX_INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; + + mem[0] = (MAU_T)((val >> 0) & 0xff); + mem[1] = (MAU_T)((val >> 8) & 0xff); +} + +#undef mem_put_le24 +#define mem_put_le24 mem_ops_wrap_symbol(mem_put_le24) +static VPX_INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; + + mem[0] = (MAU_T)((val >> 0) & 0xff); + mem[1] = (MAU_T)((val >> 8) & 0xff); + mem[2] = (MAU_T)((val >> 16) & 0xff); +} + +#undef mem_put_le32 +#define mem_put_le32 mem_ops_wrap_symbol(mem_put_le32) +static VPX_INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; + + mem[0] = (MAU_T)((val >> 0) & 0xff); + mem[1] = (MAU_T)((val >> 8) & 0xff); + mem[2] = (MAU_T)((val >> 16) & 0xff); + mem[3] = (MAU_T)((val >> 24) & 0xff); +} + +#endif // VPX_PORTS_MEM_OPS_H_ diff --git a/thirdparty/libvpx/vpx_ports/mem_ops_aligned.h b/thirdparty/libvpx/vpx_ports/mem_ops_aligned.h new file mode 100644 index 00000000000..46f61738ba4 --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/mem_ops_aligned.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_PORTS_MEM_OPS_ALIGNED_H_ +#define VPX_PORTS_MEM_OPS_ALIGNED_H_ + +#include "vpx/vpx_integer.h" + +/* \file + * \brief Provides portable memory access primitives for operating on aligned + * data + * + * This file is split from mem_ops.h for easier maintenance. See mem_ops.h + * for a more detailed description of these primitives. + */ +#ifndef INCLUDED_BY_MEM_OPS_H +#error Include mem_ops.h, not mem_ops_aligned.h directly. +#endif + +/* Architectures that provide instructions for doing this byte swapping + * could redefine these macros. + */ +#define swap_endian_16(val,raw) do {\ + val = (uint16_t)(((raw>>8) & 0x00ff) \ + | ((raw<<8) & 0xff00));\ + } while(0) +#define swap_endian_32(val,raw) do {\ + val = ((raw>>24) & 0x000000ff) \ + | ((raw>>8) & 0x0000ff00) \ + | ((raw<<8) & 0x00ff0000) \ + | ((raw<<24) & 0xff000000); \ + } while(0) +#define swap_endian_16_se(val,raw) do {\ + swap_endian_16(val,raw);\ + val = ((val << 16) >> 16);\ + } while(0) +#define swap_endian_32_se(val,raw) swap_endian_32(val,raw) + +#define mem_get_ne_aligned_generic(end,sz) \ + static VPX_INLINE unsigned MEM_VALUE_T \ + mem_get_##end##sz##_aligned(const void *vmem) {\ + const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ + return *mem;\ + } + +#define mem_get_sne_aligned_generic(end,sz) \ + static VPX_INLINE signed MEM_VALUE_T \ + mem_get_s##end##sz##_aligned(const void *vmem) {\ + const int##sz##_t *mem = (const int##sz##_t *)vmem;\ + return *mem;\ + } + +#define mem_get_se_aligned_generic(end,sz) \ + static VPX_INLINE unsigned MEM_VALUE_T \ + mem_get_##end##sz##_aligned(const void *vmem) {\ + const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ + unsigned MEM_VALUE_T val, raw = *mem;\ + swap_endian_##sz(val,raw);\ + return val;\ + } + +#define mem_get_sse_aligned_generic(end,sz) \ + static VPX_INLINE signed MEM_VALUE_T \ + mem_get_s##end##sz##_aligned(const void *vmem) {\ + const int##sz##_t *mem = (const int##sz##_t *)vmem;\ + unsigned MEM_VALUE_T val, raw = *mem;\ + swap_endian_##sz##_se(val,raw);\ + return val;\ + } + +#define mem_put_ne_aligned_generic(end,sz) \ + static VPX_INLINE void \ + mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ + uint##sz##_t *mem = (uint##sz##_t *)vmem;\ + *mem = (uint##sz##_t)val;\ + } + +#define mem_put_se_aligned_generic(end,sz) \ + static VPX_INLINE void \ + mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ + uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\ + swap_endian_##sz(raw,val);\ + *mem = (uint##sz##_t)raw;\ + } + +#include "vpx_config.h" +#if CONFIG_BIG_ENDIAN +#define mem_get_be_aligned_generic(sz) mem_get_ne_aligned_generic(be,sz) +#define mem_get_sbe_aligned_generic(sz) mem_get_sne_aligned_generic(be,sz) +#define mem_get_le_aligned_generic(sz) mem_get_se_aligned_generic(le,sz) +#define mem_get_sle_aligned_generic(sz) mem_get_sse_aligned_generic(le,sz) +#define mem_put_be_aligned_generic(sz) mem_put_ne_aligned_generic(be,sz) +#define mem_put_le_aligned_generic(sz) mem_put_se_aligned_generic(le,sz) +#else +#define mem_get_be_aligned_generic(sz) mem_get_se_aligned_generic(be,sz) +#define mem_get_sbe_aligned_generic(sz) mem_get_sse_aligned_generic(be,sz) +#define mem_get_le_aligned_generic(sz) mem_get_ne_aligned_generic(le,sz) +#define mem_get_sle_aligned_generic(sz) mem_get_sne_aligned_generic(le,sz) +#define mem_put_be_aligned_generic(sz) mem_put_se_aligned_generic(be,sz) +#define mem_put_le_aligned_generic(sz) mem_put_ne_aligned_generic(le,sz) +#endif + +#undef mem_get_be16_aligned +#define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned) +mem_get_be_aligned_generic(16) + +#undef mem_get_be32_aligned +#define mem_get_be32_aligned mem_ops_wrap_symbol(mem_get_be32_aligned) +mem_get_be_aligned_generic(32) + +#undef mem_get_le16_aligned +#define mem_get_le16_aligned mem_ops_wrap_symbol(mem_get_le16_aligned) +mem_get_le_aligned_generic(16) + +#undef mem_get_le32_aligned +#define mem_get_le32_aligned mem_ops_wrap_symbol(mem_get_le32_aligned) +mem_get_le_aligned_generic(32) + +#undef mem_get_sbe16_aligned +#define mem_get_sbe16_aligned mem_ops_wrap_symbol(mem_get_sbe16_aligned) +mem_get_sbe_aligned_generic(16) + +#undef mem_get_sbe32_aligned +#define mem_get_sbe32_aligned mem_ops_wrap_symbol(mem_get_sbe32_aligned) +mem_get_sbe_aligned_generic(32) + +#undef mem_get_sle16_aligned +#define mem_get_sle16_aligned mem_ops_wrap_symbol(mem_get_sle16_aligned) +mem_get_sle_aligned_generic(16) + +#undef mem_get_sle32_aligned +#define mem_get_sle32_aligned mem_ops_wrap_symbol(mem_get_sle32_aligned) +mem_get_sle_aligned_generic(32) + +#undef mem_put_be16_aligned +#define mem_put_be16_aligned mem_ops_wrap_symbol(mem_put_be16_aligned) +mem_put_be_aligned_generic(16) + +#undef mem_put_be32_aligned +#define mem_put_be32_aligned mem_ops_wrap_symbol(mem_put_be32_aligned) +mem_put_be_aligned_generic(32) + +#undef mem_put_le16_aligned +#define mem_put_le16_aligned mem_ops_wrap_symbol(mem_put_le16_aligned) +mem_put_le_aligned_generic(16) + +#undef mem_put_le32_aligned +#define mem_put_le32_aligned mem_ops_wrap_symbol(mem_put_le32_aligned) +mem_put_le_aligned_generic(32) + +#undef mem_get_ne_aligned_generic +#undef mem_get_se_aligned_generic +#undef mem_get_sne_aligned_generic +#undef mem_get_sse_aligned_generic +#undef mem_put_ne_aligned_generic +#undef mem_put_se_aligned_generic +#undef swap_endian_16 +#undef swap_endian_32 +#undef swap_endian_16_se +#undef swap_endian_32_se + +#endif // VPX_PORTS_MEM_OPS_ALIGNED_H_ diff --git a/thirdparty/libvpx/vpx_ports/msvc.h b/thirdparty/libvpx/vpx_ports/msvc.h new file mode 100644 index 00000000000..cab77405f47 --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/msvc.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_PORTS_MSVC_H_ +#define VPX_PORTS_MSVC_H_ +#ifdef _MSC_VER + +#include "./vpx_config.h" + +# if _MSC_VER < 1900 // VS2015 provides snprintf +# define snprintf _snprintf +# endif // _MSC_VER < 1900 + +#if _MSC_VER < 1800 // VS2013 provides round +#include +static INLINE double round(double x) { + if (x < 0) + return ceil(x - 0.5); + else + return floor(x + 0.5); +} +#endif // _MSC_VER < 1800 + +#endif // _MSC_VER +#endif // VPX_PORTS_MSVC_H_ diff --git a/thirdparty/libvpx/vpx_ports/system_state.h b/thirdparty/libvpx/vpx_ports/system_state.h new file mode 100644 index 00000000000..086c64681f5 --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/system_state.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_PORTS_SYSTEM_STATE_H_ +#define VPX_PORTS_SYSTEM_STATE_H_ + +#include "./vpx_config.h" + +#if ARCH_X86 || ARCH_X86_64 +void vpx_reset_mmx_state(void); +#define vpx_clear_system_state() vpx_reset_mmx_state() +#else +#define vpx_clear_system_state() +#endif // ARCH_X86 || ARCH_X86_64 +#endif // VPX_PORTS_SYSTEM_STATE_H_ diff --git a/thirdparty/libvpx/vpx_ports/vpx_once.h b/thirdparty/libvpx/vpx_ports/vpx_once.h new file mode 100644 index 00000000000..da04db45907 --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/vpx_once.h @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_PORTS_VPX_ONCE_H_ +#define VPX_PORTS_VPX_ONCE_H_ + +#include "vpx_config.h" + +/* Implement a function wrapper to guarantee initialization + * thread-safety for library singletons. + * + * NOTE: These functions use static locks, and can only be + * used with one common argument per compilation unit. So + * + * file1.c: + * vpx_once(foo); + * ... + * vpx_once(foo); + * + * file2.c: + * vpx_once(bar); + * + * will ensure foo() and bar() are each called only once, but in + * + * file1.c: + * vpx_once(foo); + * vpx_once(bar): + * + * bar() will never be called because the lock is used up + * by the call to foo(). + */ + +#if CONFIG_MULTITHREAD && defined(_WIN32) +#include +#include +/* Declare a per-compilation-unit state variable to track the progress + * of calling func() only once. This must be at global scope because + * local initializers are not thread-safe in MSVC prior to Visual + * Studio 2015. + * + * As a static, once_state will be zero-initialized as program start. + */ +static LONG once_state; +static void once(void (*func)(void)) +{ + /* Try to advance once_state from its initial value of 0 to 1. + * Only one thread can succeed in doing so. + */ + if (InterlockedCompareExchange(&once_state, 1, 0) == 0) { + /* We're the winning thread, having set once_state to 1. + * Call our function. */ + func(); + /* Now advance once_state to 2, unblocking any other threads. */ + InterlockedIncrement(&once_state); + return; + } + + /* We weren't the winning thread, but we want to block on + * the state variable so we don't return before func() + * has finished executing elsewhere. + * + * Try to advance once_state from 2 to 2, which is only possible + * after the winning thead advances it from 1 to 2. + */ + while (InterlockedCompareExchange(&once_state, 2, 2) != 2) { + /* State isn't yet 2. Try again. + * + * We are used for singleton initialization functions, + * which should complete quickly. Contention will likewise + * be rare, so it's worthwhile to use a simple but cpu- + * intensive busy-wait instead of successive backoff, + * waiting on a kernel object, or another heavier-weight scheme. + * + * We can at least yield our timeslice. + */ + Sleep(0); + } + + /* We've seen once_state advance to 2, so we know func() + * has been called. And we've left once_state as we found it, + * so other threads will have the same experience. + * + * It's safe to return now. + */ + return; +} + + +#elif CONFIG_MULTITHREAD && defined(__OS2__) +#define INCL_DOS +#include +static void once(void (*func)(void)) +{ + static int done; + + /* If the initialization is complete, return early. */ + if(done) + return; + + /* Causes all other threads in the process to block themselves + * and give up their time slice. + */ + DosEnterCritSec(); + + if (!done) + { + func(); + done = 1; + } + + /* Restores normal thread dispatching for the current process. */ + DosExitCritSec(); +} + + +#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H +#include +static void once(void (*func)(void)) +{ + static pthread_once_t lock = PTHREAD_ONCE_INIT; + pthread_once(&lock, func); +} + + +#else +/* No-op version that performs no synchronization. *_rtcd() is idempotent, + * so as long as your platform provides atomic loads/stores of pointers + * no synchronization is strictly necessary. + */ + +static void once(void (*func)(void)) +{ + static int done; + + if(!done) + { + func(); + done = 1; + } +} +#endif + +#endif // VPX_PORTS_VPX_ONCE_H_ diff --git a/thirdparty/libvpx/vpx_ports/vpx_timer.h b/thirdparty/libvpx/vpx_ports/vpx_timer.h new file mode 100644 index 00000000000..dd98e291c2f --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/vpx_timer.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VPX_PORTS_VPX_TIMER_H_ +#define VPX_PORTS_VPX_TIMER_H_ + +#include "./vpx_config.h" + +#include "vpx/vpx_integer.h" + +#if CONFIG_OS_SUPPORT + +#if defined(_WIN32) +/* + * Win32 specific includes + */ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#else +/* + * POSIX specific includes + */ +#include + +/* timersub is not provided by msys at this time. */ +#ifndef timersub +#define timersub(a, b, result) \ + do { \ + (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ + (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ + if ((result)->tv_usec < 0) { \ + --(result)->tv_sec; \ + (result)->tv_usec += 1000000; \ + } \ + } while (0) +#endif +#endif + + +struct vpx_usec_timer { +#if defined(_WIN32) + LARGE_INTEGER begin, end; +#else + struct timeval begin, end; +#endif +}; + + +static INLINE void +vpx_usec_timer_start(struct vpx_usec_timer *t) { +#if defined(_WIN32) + QueryPerformanceCounter(&t->begin); +#else + gettimeofday(&t->begin, NULL); +#endif +} + + +static INLINE void +vpx_usec_timer_mark(struct vpx_usec_timer *t) { +#if defined(_WIN32) + QueryPerformanceCounter(&t->end); +#else + gettimeofday(&t->end, NULL); +#endif +} + + +static INLINE int64_t +vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { +#if defined(_WIN32) + LARGE_INTEGER freq, diff; + + diff.QuadPart = t->end.QuadPart - t->begin.QuadPart; + + QueryPerformanceFrequency(&freq); + return diff.QuadPart * 1000000 / freq.QuadPart; +#else + struct timeval diff; + + timersub(&t->end, &t->begin, &diff); + return diff.tv_sec * 1000000 + diff.tv_usec; +#endif +} + +#else /* CONFIG_OS_SUPPORT = 0*/ + +/* Empty timer functions if CONFIG_OS_SUPPORT = 0 */ +#ifndef timersub +#define timersub(a, b, result) +#endif + +struct vpx_usec_timer { + void *dummy; +}; + +static INLINE void +vpx_usec_timer_start(struct vpx_usec_timer *t) { } + +static INLINE void +vpx_usec_timer_mark(struct vpx_usec_timer *t) { } + +static INLINE int +vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { + return 0; +} + +#endif /* CONFIG_OS_SUPPORT */ + +#endif // VPX_PORTS_VPX_TIMER_H_ diff --git a/thirdparty/libvpx/vpx_ports/x86.h b/thirdparty/libvpx/vpx_ports/x86.h new file mode 100644 index 00000000000..bae25ac3454 --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/x86.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VPX_PORTS_X86_H_ +#define VPX_PORTS_X86_H_ +#include + +#if defined(_MSC_VER) +#include /* For __cpuidex, __rdtsc */ +#endif + +#include "vpx_config.h" +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + VPX_CPU_UNKNOWN = -1, + VPX_CPU_AMD, + VPX_CPU_AMD_OLD, + VPX_CPU_CENTAUR, + VPX_CPU_CYRIX, + VPX_CPU_INTEL, + VPX_CPU_NEXGEN, + VPX_CPU_NSC, + VPX_CPU_RISE, + VPX_CPU_SIS, + VPX_CPU_TRANSMETA, + VPX_CPU_TRANSMETA_OLD, + VPX_CPU_UMC, + VPX_CPU_VIA, + + VPX_CPU_LAST +} vpx_cpu_t; + +#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) +#if ARCH_X86_64 +#define cpuid(func, func2, ax, bx, cx, dx)\ + __asm__ __volatile__ (\ + "cpuid \n\t" \ + : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func), "c" (func2)); +#else +#define cpuid(func, func2, ax, bx, cx, dx)\ + __asm__ __volatile__ (\ + "mov %%ebx, %%edi \n\t" \ + "cpuid \n\t" \ + "xchg %%edi, %%ebx \n\t" \ + : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func), "c" (func2)); +#endif +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ +#if ARCH_X86_64 +#define cpuid(func, func2, ax, bx, cx, dx)\ + asm volatile (\ + "xchg %rsi, %rbx \n\t" \ + "cpuid \n\t" \ + "movl %ebx, %edi \n\t" \ + "xchg %rsi, %rbx \n\t" \ + : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func), "c" (func2)); +#else +#define cpuid(func, func2, ax, bx, cx, dx)\ + asm volatile (\ + "pushl %ebx \n\t" \ + "cpuid \n\t" \ + "movl %ebx, %edi \n\t" \ + "popl %ebx \n\t" \ + : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func), "c" (func2)); +#endif +#else /* end __SUNPRO__ */ +#if ARCH_X86_64 +#if defined(_MSC_VER) && _MSC_VER > 1500 +#define cpuid(func, func2, a, b, c, d) do {\ + int regs[4];\ + __cpuidex(regs, func, func2); \ + a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ + } while(0) +#else +#define cpuid(func, func2, a, b, c, d) do {\ + int regs[4];\ + __cpuid(regs, func); \ + a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ + } while (0) +#endif +#else +#define cpuid(func, func2, a, b, c, d)\ + __asm mov eax, func\ + __asm mov ecx, func2\ + __asm cpuid\ + __asm mov a, eax\ + __asm mov b, ebx\ + __asm mov c, ecx\ + __asm mov d, edx +#endif +#endif /* end others */ + +// NaCl has no support for xgetbv or the raw opcode. +#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) +static INLINE uint64_t xgetbv(void) { + const uint32_t ecx = 0; + uint32_t eax, edx; + // Use the raw opcode for xgetbv for compatibility with older toolchains. + __asm__ volatile ( + ".byte 0x0f, 0x01, 0xd0\n" + : "=a"(eax), "=d"(edx) : "c" (ecx)); + return ((uint64_t)edx << 32) | eax; +} +#elif (defined(_M_X64) || defined(_M_IX86)) && \ + defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 +#include +#define xgetbv() _xgetbv(0) +#elif defined(_MSC_VER) && defined(_M_IX86) +static INLINE uint64_t xgetbv(void) { + uint32_t eax_, edx_; + __asm { + xor ecx, ecx // ecx = 0 + // Use the raw opcode for xgetbv for compatibility with older toolchains. + __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 + mov eax_, eax + mov edx_, edx + } + return ((uint64_t)edx_ << 32) | eax_; +} +#else +#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1700 +#include +#if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP) +#define getenv(x) NULL +#endif +#endif + +#define HAS_MMX 0x01 +#define HAS_SSE 0x02 +#define HAS_SSE2 0x04 +#define HAS_SSE3 0x08 +#define HAS_SSSE3 0x10 +#define HAS_SSE4_1 0x20 +#define HAS_AVX 0x40 +#define HAS_AVX2 0x80 +#ifndef BIT +#define BIT(n) (1<= 7) { + /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ + cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); + + if (reg_ebx & BIT(5)) flags |= HAS_AVX2; + } + } + } + + return flags & mask; +} + +// Note: +// 32-bit CPU cycle counter is light-weighted for most function performance +// measurement. For large function (CPU time > a couple of seconds), 64-bit +// counter should be used. +// 32-bit CPU cycle counter +static INLINE unsigned int +x86_readtsc(void) { +#if defined(__GNUC__) && __GNUC__ + unsigned int tsc; + __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); + return tsc; +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) + unsigned int tsc; + asm volatile("rdtsc\n\t":"=a"(tsc):); + return tsc; +#else +#if ARCH_X86_64 + return (unsigned int)__rdtsc(); +#else + __asm rdtsc; +#endif +#endif +} +// 64-bit CPU cycle counter +static INLINE uint64_t +x86_readtsc64(void) { +#if defined(__GNUC__) && __GNUC__ + uint32_t hi, lo; + __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); + return ((uint64_t)hi << 32) | lo; +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) + uint_t hi, lo; + asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi)); + return ((uint64_t)hi << 32) | lo; +#else +#if ARCH_X86_64 + return (uint64_t)__rdtsc(); +#else + __asm rdtsc; +#endif +#endif +} + +#if defined(__GNUC__) && __GNUC__ +#define x86_pause_hint()\ + __asm__ __volatile__ ("pause \n\t") +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) +#define x86_pause_hint()\ + asm volatile ("pause \n\t") +#else +#if ARCH_X86_64 +#define x86_pause_hint()\ + _mm_pause(); +#else +#define x86_pause_hint()\ + __asm pause +#endif +#endif + +#if defined(__GNUC__) && __GNUC__ +static void +x87_set_control_word(unsigned short mode) { + __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); +} +static unsigned short +x87_get_control_word(void) { + unsigned short mode; + __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); + return mode; +} +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) +static void +x87_set_control_word(unsigned short mode) { + asm volatile("fldcw %0" : : "m"(*&mode)); +} +static unsigned short +x87_get_control_word(void) { + unsigned short mode; + asm volatile("fstcw %0\n\t":"=m"(*&mode):); + return mode; +} +#elif ARCH_X86_64 +/* No fldcw intrinsics on Windows x64, punt to external asm */ +extern void vpx_winx64_fldcw(unsigned short mode); +extern unsigned short vpx_winx64_fstcw(void); +#define x87_set_control_word vpx_winx64_fldcw +#define x87_get_control_word vpx_winx64_fstcw +#else +static void +x87_set_control_word(unsigned short mode) { + __asm { fldcw mode } +} +static unsigned short +x87_get_control_word(void) { + unsigned short mode; + __asm { fstcw mode } + return mode; +} +#endif + +static INLINE unsigned int +x87_set_double_precision(void) { + unsigned int mode = x87_get_control_word(); + x87_set_control_word((mode&~0x300) | 0x200); + return mode; +} + + +extern void vpx_reset_mmx_state(void); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_PORTS_X86_H_ diff --git a/thirdparty/libvpx/vpx_ports/x86_abi_support.asm b/thirdparty/libvpx/vpx_ports/x86_abi_support.asm new file mode 100644 index 00000000000..708fa101c5d --- /dev/null +++ b/thirdparty/libvpx/vpx_ports/x86_abi_support.asm @@ -0,0 +1,404 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_config.asm" + +; 32/64 bit compatibility macros +; +; In general, we make the source use 64 bit syntax, then twiddle with it using +; the preprocessor to get the 32 bit syntax on 32 bit platforms. +; +%ifidn __OUTPUT_FORMAT__,elf32 +%define ABI_IS_32BIT 1 +%elifidn __OUTPUT_FORMAT__,macho32 +%define ABI_IS_32BIT 1 +%elifidn __OUTPUT_FORMAT__,win32 +%define ABI_IS_32BIT 1 +%elifidn __OUTPUT_FORMAT__,aout +%define ABI_IS_32BIT 1 +%else +%define ABI_IS_32BIT 0 +%endif + +%if ABI_IS_32BIT +%define rax eax +%define rbx ebx +%define rcx ecx +%define rdx edx +%define rsi esi +%define rdi edi +%define rsp esp +%define rbp ebp +%define movsxd mov +%macro movq 2 + %ifidn %1,eax + movd %1,%2 + %elifidn %2,eax + movd %1,%2 + %elifidn %1,ebx + movd %1,%2 + %elifidn %2,ebx + movd %1,%2 + %elifidn %1,ecx + movd %1,%2 + %elifidn %2,ecx + movd %1,%2 + %elifidn %1,edx + movd %1,%2 + %elifidn %2,edx + movd %1,%2 + %elifidn %1,esi + movd %1,%2 + %elifidn %2,esi + movd %1,%2 + %elifidn %1,edi + movd %1,%2 + %elifidn %2,edi + movd %1,%2 + %elifidn %1,esp + movd %1,%2 + %elifidn %2,esp + movd %1,%2 + %elifidn %1,ebp + movd %1,%2 + %elifidn %2,ebp + movd %1,%2 + %else + movq %1,%2 + %endif +%endmacro +%endif + + +; LIBVPX_YASM_WIN64 +; Set LIBVPX_YASM_WIN64 if output is Windows 64bit so the code will work if x64 +; or win64 is defined on the Yasm command line. +%ifidn __OUTPUT_FORMAT__,win64 +%define LIBVPX_YASM_WIN64 1 +%elifidn __OUTPUT_FORMAT__,x64 +%define LIBVPX_YASM_WIN64 1 +%else +%define LIBVPX_YASM_WIN64 0 +%endif + +; sym() +; Return the proper symbol name for the target ABI. +; +; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols +; with C linkage be prefixed with an underscore. +; +%ifidn __OUTPUT_FORMAT__,elf32 +%define sym(x) x +%elifidn __OUTPUT_FORMAT__,elf64 +%define sym(x) x +%elifidn __OUTPUT_FORMAT__,elfx32 +%define sym(x) x +%elif LIBVPX_YASM_WIN64 +%define sym(x) x +%else +%define sym(x) _ %+ x +%endif + +; PRIVATE +; Macro for the attribute to hide a global symbol for the target ABI. +; This is only active if CHROMIUM is defined. +; +; Chromium doesn't like exported global symbols due to symbol clashing with +; plugins among other things. +; +; Requires Chromium's patched copy of yasm: +; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761 +; http://www.tortall.net/projects/yasm/ticket/236 +; +%ifdef CHROMIUM + %ifidn __OUTPUT_FORMAT__,elf32 + %define PRIVATE :hidden + %elifidn __OUTPUT_FORMAT__,elf64 + %define PRIVATE :hidden + %elifidn __OUTPUT_FORMAT__,elfx32 + %define PRIVATE :hidden + %elif LIBVPX_YASM_WIN64 + %define PRIVATE + %else + %define PRIVATE :private_extern + %endif +%else + %define PRIVATE +%endif + +; arg() +; Return the address specification of the given argument +; +%if ABI_IS_32BIT + %define arg(x) [ebp+8+4*x] +%else + ; 64 bit ABI passes arguments in registers. This is a workaround to get up + ; and running quickly. Relies on SHADOW_ARGS_TO_STACK + %if LIBVPX_YASM_WIN64 + %define arg(x) [rbp+16+8*x] + %else + %define arg(x) [rbp-8-8*x] + %endif +%endif + +; REG_SZ_BYTES, REG_SZ_BITS +; Size of a register +%if ABI_IS_32BIT +%define REG_SZ_BYTES 4 +%define REG_SZ_BITS 32 +%else +%define REG_SZ_BYTES 8 +%define REG_SZ_BITS 64 +%endif + + +; ALIGN_STACK +; This macro aligns the stack to the given alignment (in bytes). The stack +; is left such that the previous value of the stack pointer is the first +; argument on the stack (ie, the inverse of this macro is 'pop rsp.') +; This macro uses one temporary register, which is not preserved, and thus +; must be specified as an argument. +%macro ALIGN_STACK 2 + mov %2, rsp + and rsp, -%1 + lea rsp, [rsp - (%1 - REG_SZ_BYTES)] + push %2 +%endmacro + + +; +; The Microsoft assembler tries to impose a certain amount of type safety in +; its register usage. YASM doesn't recognize these directives, so we just +; %define them away to maintain as much compatibility as possible with the +; original inline assembler we're porting from. +; +%idefine PTR +%idefine XMMWORD +%idefine MMWORD + +; PIC macros +; +%if ABI_IS_32BIT + %if CONFIG_PIC=1 + %ifidn __OUTPUT_FORMAT__,elf32 + %define WRT_PLT wrt ..plt + %macro GET_GOT 1 + extern _GLOBAL_OFFSET_TABLE_ + push %1 + call %%get_got + %%sub_offset: + jmp %%exitGG + %%get_got: + mov %1, [esp] + add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc + ret + %%exitGG: + %undef GLOBAL + %define GLOBAL(x) x + %1 wrt ..gotoff + %undef RESTORE_GOT + %define RESTORE_GOT pop %1 + %endmacro + %elifidn __OUTPUT_FORMAT__,macho32 + %macro GET_GOT 1 + push %1 + call %%get_got + %%get_got: + pop %1 + %undef GLOBAL + %define GLOBAL(x) x + %1 - %%get_got + %undef RESTORE_GOT + %define RESTORE_GOT pop %1 + %endmacro + %endif + %endif + + %ifdef CHROMIUM + %ifidn __OUTPUT_FORMAT__,macho32 + %define HIDDEN_DATA(x) x:private_extern + %else + %define HIDDEN_DATA(x) x + %endif + %else + %define HIDDEN_DATA(x) x + %endif +%else + %macro GET_GOT 1 + %endmacro + %define GLOBAL(x) rel x + %ifidn __OUTPUT_FORMAT__,elf64 + %define WRT_PLT wrt ..plt + %define HIDDEN_DATA(x) x:data hidden + %elifidn __OUTPUT_FORMAT__,elfx32 + %define WRT_PLT wrt ..plt + %define HIDDEN_DATA(x) x:data hidden + %elifidn __OUTPUT_FORMAT__,macho64 + %ifdef CHROMIUM + %define HIDDEN_DATA(x) x:private_extern + %else + %define HIDDEN_DATA(x) x + %endif + %else + %define HIDDEN_DATA(x) x + %endif +%endif +%ifnmacro GET_GOT + %macro GET_GOT 1 + %endmacro + %define GLOBAL(x) x +%endif +%ifndef RESTORE_GOT +%define RESTORE_GOT +%endif +%ifndef WRT_PLT +%define WRT_PLT +%endif + +%if ABI_IS_32BIT + %macro SHADOW_ARGS_TO_STACK 1 + %endm + %define UNSHADOW_ARGS +%else +%if LIBVPX_YASM_WIN64 + %macro SHADOW_ARGS_TO_STACK 1 ; argc + %if %1 > 0 + mov arg(0),rcx + %endif + %if %1 > 1 + mov arg(1),rdx + %endif + %if %1 > 2 + mov arg(2),r8 + %endif + %if %1 > 3 + mov arg(3),r9 + %endif + %endm +%else + %macro SHADOW_ARGS_TO_STACK 1 ; argc + %if %1 > 0 + push rdi + %endif + %if %1 > 1 + push rsi + %endif + %if %1 > 2 + push rdx + %endif + %if %1 > 3 + push rcx + %endif + %if %1 > 4 + push r8 + %endif + %if %1 > 5 + push r9 + %endif + %if %1 > 6 + %assign i %1-6 + %assign off 16 + %rep i + mov rax,[rbp+off] + push rax + %assign off off+8 + %endrep + %endif + %endm +%endif + %define UNSHADOW_ARGS mov rsp, rbp +%endif + +; Win64 ABI requires that XMM6:XMM15 are callee saved +; SAVE_XMM n, [u] +; store registers 6-n on the stack +; if u is specified, use unaligned movs. +; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return +; value. Typically we follow this up with 'push rbp' - re-aligning the stack - +; but in some cases this is not done and unaligned movs must be used. +%if LIBVPX_YASM_WIN64 +%macro SAVE_XMM 1-2 a + %if %1 < 6 + %error Only xmm registers 6-15 must be preserved + %else + %assign last_xmm %1 + %define movxmm movdq %+ %2 + %assign xmm_stack_space ((last_xmm - 5) * 16) + sub rsp, xmm_stack_space + %assign i 6 + %rep (last_xmm - 5) + movxmm [rsp + ((i - 6) * 16)], xmm %+ i + %assign i i+1 + %endrep + %endif +%endmacro +%macro RESTORE_XMM 0 + %ifndef last_xmm + %error RESTORE_XMM must be paired with SAVE_XMM n + %else + %assign i last_xmm + %rep (last_xmm - 5) + movxmm xmm %+ i, [rsp +((i - 6) * 16)] + %assign i i-1 + %endrep + add rsp, xmm_stack_space + ; there are a couple functions which return from multiple places. + ; otherwise, we could uncomment these: + ; %undef last_xmm + ; %undef xmm_stack_space + ; %undef movxmm + %endif +%endmacro +%else +%macro SAVE_XMM 1-2 +%endmacro +%macro RESTORE_XMM 0 +%endmacro +%endif + +; Name of the rodata section +; +; .rodata seems to be an elf-ism, as it doesn't work on OSX. +; +%ifidn __OUTPUT_FORMAT__,macho64 +%define SECTION_RODATA section .text +%elifidn __OUTPUT_FORMAT__,macho32 +%macro SECTION_RODATA 0 +section .text +%endmacro +%elifidn __OUTPUT_FORMAT__,aout +%define SECTION_RODATA section .data +%else +%define SECTION_RODATA section .rodata +%endif + + +; Tell GNU ld that we don't require an executable stack. +%ifidn __OUTPUT_FORMAT__,elf32 +section .note.GNU-stack noalloc noexec nowrite progbits +section .text +%elifidn __OUTPUT_FORMAT__,elf64 +section .note.GNU-stack noalloc noexec nowrite progbits +section .text +%elifidn __OUTPUT_FORMAT__,elfx32 +section .note.GNU-stack noalloc noexec nowrite progbits +section .text +%endif + +; On Android platforms use lrand48 when building postproc routines. Prior to L +; rand() was not available. +%if CONFIG_POSTPROC=1 || CONFIG_VP9_POSTPROC=1 +%ifdef __ANDROID__ +extern sym(lrand48) +%define LIBVPX_RAND lrand48 +%else +extern sym(rand) +%define LIBVPX_RAND rand +%endif +%endif ; CONFIG_POSTPROC || CONFIG_VP9_POSTPROC diff --git a/thirdparty/libvpx/vpx_scale/generic/yv12config.c b/thirdparty/libvpx/vpx_scale/generic/yv12config.c new file mode 100644 index 00000000000..6bbb6d8d482 --- /dev/null +++ b/thirdparty/libvpx/vpx_scale/generic/yv12config.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vpx_scale/yv12config.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" + +/**************************************************************************** +* Exports +****************************************************************************/ + +/**************************************************************************** + * + ****************************************************************************/ +#define yv12_align_addr(addr, align) \ + (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align)) + +int +vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) { + if (ybf) { + // If libvpx is using frame buffer callbacks then buffer_alloc_sz must + // not be set. + if (ybf->buffer_alloc_sz > 0) { + vpx_free(ybf->buffer_alloc); + } + + /* buffer_alloc isn't accessed by most functions. Rather y_buffer, + u_buffer and v_buffer point to buffer_alloc and are used. Clear out + all of this so that a freed pointer isn't inadvertently used */ + memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); + } else { + return -1; + } + + return 0; +} + +int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, int border) { + if (ybf) { + int aligned_width = (width + 15) & ~15; + int aligned_height = (height + 15) & ~15; + int y_stride = ((aligned_width + 2 * border) + 31) & ~31; + int yplane_size = (aligned_height + 2 * border) * y_stride; + int uv_width = aligned_width >> 1; + int uv_height = aligned_height >> 1; + /** There is currently a bunch of code which assumes + * uv_stride == y_stride/2, so enforce this here. */ + int uv_stride = y_stride >> 1; + int uvplane_size = (uv_height + border) * uv_stride; + const int frame_size = yplane_size + 2 * uvplane_size; + + if (!ybf->buffer_alloc) { + ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size); + ybf->buffer_alloc_sz = frame_size; + } + + if (!ybf->buffer_alloc || ybf->buffer_alloc_sz < frame_size) + return -1; + + /* Only support allocating buffers that have a border that's a multiple + * of 32. The border restriction is required to get 16-byte alignment of + * the start of the chroma rows without introducing an arbitrary gap + * between planes, which would break the semantics of things like + * vpx_img_set_rect(). */ + if (border & 0x1f) + return -3; + + ybf->y_crop_width = width; + ybf->y_crop_height = height; + ybf->y_width = aligned_width; + ybf->y_height = aligned_height; + ybf->y_stride = y_stride; + + ybf->uv_crop_width = (width + 1) / 2; + ybf->uv_crop_height = (height + 1) / 2; + ybf->uv_width = uv_width; + ybf->uv_height = uv_height; + ybf->uv_stride = uv_stride; + + ybf->alpha_width = 0; + ybf->alpha_height = 0; + ybf->alpha_stride = 0; + + ybf->border = border; + ybf->frame_size = frame_size; + + ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; + ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; + ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; + ybf->alpha_buffer = NULL; + + ybf->corrupted = 0; /* assume not currupted by errors */ + return 0; + } + return -2; +} + +int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, int border) { + if (ybf) { + vp8_yv12_de_alloc_frame_buffer(ybf); + return vp8_yv12_realloc_frame_buffer(ybf, width, height, border); + } + return -2; +} + +#if CONFIG_VP9 +// TODO(jkoleszar): Maybe replace this with struct vpx_image + +int vpx_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) { + if (ybf) { + if (ybf->buffer_alloc_sz > 0) { + vpx_free(ybf->buffer_alloc); + } + + /* buffer_alloc isn't accessed by most functions. Rather y_buffer, + u_buffer and v_buffer point to buffer_alloc and are used. Clear out + all of this so that a freed pointer isn't inadvertently used */ + memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); + } else { + return -1; + } + + return 0; +} + +int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, + int ss_x, int ss_y, +#if CONFIG_VP9_HIGHBITDEPTH + int use_highbitdepth, +#endif + int border, + int byte_alignment, + vpx_codec_frame_buffer_t *fb, + vpx_get_frame_buffer_cb_fn_t cb, + void *cb_priv) { + if (ybf) { + const int vp9_byte_align = (byte_alignment == 0) ? 1 : byte_alignment; + const int aligned_width = (width + 7) & ~7; + const int aligned_height = (height + 7) & ~7; + const int y_stride = ((aligned_width + 2 * border) + 31) & ~31; + const uint64_t yplane_size = (aligned_height + 2 * border) * + (uint64_t)y_stride + byte_alignment; + const int uv_width = aligned_width >> ss_x; + const int uv_height = aligned_height >> ss_y; + const int uv_stride = y_stride >> ss_x; + const int uv_border_w = border >> ss_x; + const int uv_border_h = border >> ss_y; + const uint64_t uvplane_size = (uv_height + 2 * uv_border_h) * + (uint64_t)uv_stride + byte_alignment; + +#if CONFIG_VP9_HIGHBITDEPTH + const uint64_t frame_size = + (1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size); +#else + const uint64_t frame_size = yplane_size + 2 * uvplane_size; +#endif // CONFIG_VP9_HIGHBITDEPTH + + uint8_t *buf = NULL; + + if (cb != NULL) { + const int align_addr_extra_size = 31; + const uint64_t external_frame_size = frame_size + align_addr_extra_size; + + assert(fb != NULL); + + if (external_frame_size != (size_t)external_frame_size) + return -1; + + // Allocation to hold larger frame, or first allocation. + if (cb(cb_priv, (size_t)external_frame_size, fb) < 0) + return -1; + + if (fb->data == NULL || fb->size < external_frame_size) + return -1; + + ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32); + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) + // This memset is needed for fixing the issue of using uninitialized + // value in msan test. It will cause a perf loss, so only do this for + // msan test. + memset(ybf->buffer_alloc, 0, (int)frame_size); +#endif +#endif + } else if (frame_size > (size_t)ybf->buffer_alloc_sz) { + // Allocation to hold larger frame, or first allocation. + vpx_free(ybf->buffer_alloc); + ybf->buffer_alloc = NULL; + + if (frame_size != (size_t)frame_size) + return -1; + + ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, (size_t)frame_size); + if (!ybf->buffer_alloc) + return -1; + + ybf->buffer_alloc_sz = (int)frame_size; + + // This memset is needed for fixing valgrind error from C loop filter + // due to access uninitialized memory in frame border. It could be + // removed if border is totally removed. + memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz); + } + + /* Only support allocating buffers that have a border that's a multiple + * of 32. The border restriction is required to get 16-byte alignment of + * the start of the chroma rows without introducing an arbitrary gap + * between planes, which would break the semantics of things like + * vpx_img_set_rect(). */ + if (border & 0x1f) + return -3; + + ybf->y_crop_width = width; + ybf->y_crop_height = height; + ybf->y_width = aligned_width; + ybf->y_height = aligned_height; + ybf->y_stride = y_stride; + + ybf->uv_crop_width = (width + ss_x) >> ss_x; + ybf->uv_crop_height = (height + ss_y) >> ss_y; + ybf->uv_width = uv_width; + ybf->uv_height = uv_height; + ybf->uv_stride = uv_stride; + + ybf->border = border; + ybf->frame_size = (int)frame_size; + ybf->subsampling_x = ss_x; + ybf->subsampling_y = ss_y; + + buf = ybf->buffer_alloc; +#if CONFIG_VP9_HIGHBITDEPTH + if (use_highbitdepth) { + // Store uint16 addresses when using 16bit framebuffers + buf = CONVERT_TO_BYTEPTR(ybf->buffer_alloc); + ybf->flags = YV12_FLAG_HIGHBITDEPTH; + } else { + ybf->flags = 0; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + ybf->y_buffer = (uint8_t *)yv12_align_addr( + buf + (border * y_stride) + border, vp9_byte_align); + ybf->u_buffer = (uint8_t *)yv12_align_addr( + buf + yplane_size + (uv_border_h * uv_stride) + uv_border_w, + vp9_byte_align); + ybf->v_buffer = (uint8_t *)yv12_align_addr( + buf + yplane_size + uvplane_size + (uv_border_h * uv_stride) + + uv_border_w, vp9_byte_align); + + ybf->corrupted = 0; /* assume not corrupted by errors */ + return 0; + } + return -2; +} + +int vpx_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, + int ss_x, int ss_y, +#if CONFIG_VP9_HIGHBITDEPTH + int use_highbitdepth, +#endif + int border, + int byte_alignment) { + if (ybf) { + vpx_free_frame_buffer(ybf); + return vpx_realloc_frame_buffer(ybf, width, height, ss_x, ss_y, +#if CONFIG_VP9_HIGHBITDEPTH + use_highbitdepth, +#endif + border, byte_alignment, NULL, NULL, NULL); + } + return -2; +} +#endif diff --git a/thirdparty/libvpx/vpx_scale/generic/yv12extend.c b/thirdparty/libvpx/vpx_scale/generic/yv12extend.c new file mode 100644 index 00000000000..52f0aff1f25 --- /dev/null +++ b/thirdparty/libvpx/vpx_scale/generic/yv12extend.c @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include "./vpx_config.h" +#include "./vpx_scale_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" +#include "vpx_scale/yv12config.h" +#if CONFIG_VP9_HIGHBITDEPTH +#include "vp9/common/vp9_common.h" +#endif + +static void extend_plane(uint8_t *const src, int src_stride, + int width, int height, + int extend_top, int extend_left, + int extend_bottom, int extend_right) { + int i; + const int linesize = extend_left + extend_right + width; + + /* copy the left and right most columns out */ + uint8_t *src_ptr1 = src; + uint8_t *src_ptr2 = src + width - 1; + uint8_t *dst_ptr1 = src - extend_left; + uint8_t *dst_ptr2 = src + width; + + for (i = 0; i < height; ++i) { + memset(dst_ptr1, src_ptr1[0], extend_left); + memset(dst_ptr2, src_ptr2[0], extend_right); + src_ptr1 += src_stride; + src_ptr2 += src_stride; + dst_ptr1 += src_stride; + dst_ptr2 += src_stride; + } + + /* Now copy the top and bottom lines into each line of the respective + * borders + */ + src_ptr1 = src - extend_left; + src_ptr2 = src + src_stride * (height - 1) - extend_left; + dst_ptr1 = src + src_stride * -extend_top - extend_left; + dst_ptr2 = src + src_stride * height - extend_left; + + for (i = 0; i < extend_top; ++i) { + memcpy(dst_ptr1, src_ptr1, linesize); + dst_ptr1 += src_stride; + } + + for (i = 0; i < extend_bottom; ++i) { + memcpy(dst_ptr2, src_ptr2, linesize); + dst_ptr2 += src_stride; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void extend_plane_high(uint8_t *const src8, int src_stride, + int width, int height, + int extend_top, int extend_left, + int extend_bottom, int extend_right) { + int i; + const int linesize = extend_left + extend_right + width; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + + /* copy the left and right most columns out */ + uint16_t *src_ptr1 = src; + uint16_t *src_ptr2 = src + width - 1; + uint16_t *dst_ptr1 = src - extend_left; + uint16_t *dst_ptr2 = src + width; + + for (i = 0; i < height; ++i) { + vpx_memset16(dst_ptr1, src_ptr1[0], extend_left); + vpx_memset16(dst_ptr2, src_ptr2[0], extend_right); + src_ptr1 += src_stride; + src_ptr2 += src_stride; + dst_ptr1 += src_stride; + dst_ptr2 += src_stride; + } + + /* Now copy the top and bottom lines into each line of the respective + * borders + */ + src_ptr1 = src - extend_left; + src_ptr2 = src + src_stride * (height - 1) - extend_left; + dst_ptr1 = src + src_stride * -extend_top - extend_left; + dst_ptr2 = src + src_stride * height - extend_left; + + for (i = 0; i < extend_top; ++i) { + memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t)); + dst_ptr1 += src_stride; + } + + for (i = 0; i < extend_bottom; ++i) { + memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t)); + dst_ptr2 += src_stride; + } +} +#endif + +void vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { + const int uv_border = ybf->border / 2; + + assert(ybf->border % 2 == 0); + assert(ybf->y_height - ybf->y_crop_height < 16); + assert(ybf->y_width - ybf->y_crop_width < 16); + assert(ybf->y_height - ybf->y_crop_height >= 0); + assert(ybf->y_width - ybf->y_crop_width >= 0); + +#if CONFIG_VP9_HIGHBITDEPTH + if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) { + extend_plane_high( + ybf->y_buffer, ybf->y_stride, + ybf->y_crop_width, ybf->y_crop_height, + ybf->border, ybf->border, + ybf->border + ybf->y_height - ybf->y_crop_height, + ybf->border + ybf->y_width - ybf->y_crop_width); + + extend_plane_high( + ybf->u_buffer, ybf->uv_stride, + ybf->uv_crop_width, ybf->uv_crop_height, + uv_border, uv_border, + uv_border + ybf->uv_height - ybf->uv_crop_height, + uv_border + ybf->uv_width - ybf->uv_crop_width); + + extend_plane_high( + ybf->v_buffer, ybf->uv_stride, + ybf->uv_crop_width, ybf->uv_crop_height, + uv_border, uv_border, + uv_border + ybf->uv_height - ybf->uv_crop_height, + uv_border + ybf->uv_width - ybf->uv_crop_width); + return; + } +#endif + extend_plane(ybf->y_buffer, ybf->y_stride, + ybf->y_crop_width, ybf->y_crop_height, + ybf->border, ybf->border, + ybf->border + ybf->y_height - ybf->y_crop_height, + ybf->border + ybf->y_width - ybf->y_crop_width); + + extend_plane(ybf->u_buffer, ybf->uv_stride, + ybf->uv_crop_width, ybf->uv_crop_height, + uv_border, uv_border, + uv_border + ybf->uv_height - ybf->uv_crop_height, + uv_border + ybf->uv_width - ybf->uv_crop_width); + + extend_plane(ybf->v_buffer, ybf->uv_stride, + ybf->uv_crop_width, ybf->uv_crop_height, + uv_border, uv_border, + uv_border + ybf->uv_height - ybf->uv_crop_height, + uv_border + ybf->uv_width - ybf->uv_crop_width); +} + +#if CONFIG_VP9 +static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) { + const int c_w = ybf->uv_crop_width; + const int c_h = ybf->uv_crop_height; + const int ss_x = ybf->uv_width < ybf->y_width; + const int ss_y = ybf->uv_height < ybf->y_height; + const int c_et = ext_size >> ss_y; + const int c_el = ext_size >> ss_x; + const int c_eb = c_et + ybf->uv_height - ybf->uv_crop_height; + const int c_er = c_el + ybf->uv_width - ybf->uv_crop_width; + + assert(ybf->y_height - ybf->y_crop_height < 16); + assert(ybf->y_width - ybf->y_crop_width < 16); + assert(ybf->y_height - ybf->y_crop_height >= 0); + assert(ybf->y_width - ybf->y_crop_width >= 0); + +#if CONFIG_VP9_HIGHBITDEPTH + if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) { + extend_plane_high(ybf->y_buffer, ybf->y_stride, + ybf->y_crop_width, ybf->y_crop_height, + ext_size, ext_size, + ext_size + ybf->y_height - ybf->y_crop_height, + ext_size + ybf->y_width - ybf->y_crop_width); + extend_plane_high(ybf->u_buffer, ybf->uv_stride, + c_w, c_h, c_et, c_el, c_eb, c_er); + extend_plane_high(ybf->v_buffer, ybf->uv_stride, + c_w, c_h, c_et, c_el, c_eb, c_er); + return; + } +#endif + extend_plane(ybf->y_buffer, ybf->y_stride, + ybf->y_crop_width, ybf->y_crop_height, + ext_size, ext_size, + ext_size + ybf->y_height - ybf->y_crop_height, + ext_size + ybf->y_width - ybf->y_crop_width); + + extend_plane(ybf->u_buffer, ybf->uv_stride, + c_w, c_h, c_et, c_el, c_eb, c_er); + + extend_plane(ybf->v_buffer, ybf->uv_stride, + c_w, c_h, c_et, c_el, c_eb, c_er); +} + +void vpx_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { + extend_frame(ybf, ybf->border); +} + +void vpx_extend_frame_inner_borders_c(YV12_BUFFER_CONFIG *ybf) { + const int inner_bw = (ybf->border > VP9INNERBORDERINPIXELS) ? + VP9INNERBORDERINPIXELS : ybf->border; + extend_frame(ybf, inner_bw); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) { + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + memcpy(dst, src, num * sizeof(uint16_t)); +} +#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_VP9 + +// Copies the source image into the destination image and updates the +// destination's UMV borders. +// Note: The frames are assumed to be identical in size. +void vp8_yv12_copy_frame_c(const YV12_BUFFER_CONFIG *src_ybc, + YV12_BUFFER_CONFIG *dst_ybc) { + int row; + const uint8_t *src = src_ybc->y_buffer; + uint8_t *dst = dst_ybc->y_buffer; + +#if 0 + /* These assertions are valid in the codec, but the libvpx-tester uses + * this code slightly differently. + */ + assert(src_ybc->y_width == dst_ybc->y_width); + assert(src_ybc->y_height == dst_ybc->y_height); +#endif + +#if CONFIG_VP9_HIGHBITDEPTH + if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) { + assert(dst_ybc->flags & YV12_FLAG_HIGHBITDEPTH); + for (row = 0; row < src_ybc->y_height; ++row) { + memcpy_short_addr(dst, src, src_ybc->y_width); + src += src_ybc->y_stride; + dst += dst_ybc->y_stride; + } + + src = src_ybc->u_buffer; + dst = dst_ybc->u_buffer; + + for (row = 0; row < src_ybc->uv_height; ++row) { + memcpy_short_addr(dst, src, src_ybc->uv_width); + src += src_ybc->uv_stride; + dst += dst_ybc->uv_stride; + } + + src = src_ybc->v_buffer; + dst = dst_ybc->v_buffer; + + for (row = 0; row < src_ybc->uv_height; ++row) { + memcpy_short_addr(dst, src, src_ybc->uv_width); + src += src_ybc->uv_stride; + dst += dst_ybc->uv_stride; + } + + vp8_yv12_extend_frame_borders_c(dst_ybc); + return; + } else { + assert(!(dst_ybc->flags & YV12_FLAG_HIGHBITDEPTH)); + } +#endif + + for (row = 0; row < src_ybc->y_height; ++row) { + memcpy(dst, src, src_ybc->y_width); + src += src_ybc->y_stride; + dst += dst_ybc->y_stride; + } + + src = src_ybc->u_buffer; + dst = dst_ybc->u_buffer; + + for (row = 0; row < src_ybc->uv_height; ++row) { + memcpy(dst, src, src_ybc->uv_width); + src += src_ybc->uv_stride; + dst += dst_ybc->uv_stride; + } + + src = src_ybc->v_buffer; + dst = dst_ybc->v_buffer; + + for (row = 0; row < src_ybc->uv_height; ++row) { + memcpy(dst, src, src_ybc->uv_width); + src += src_ybc->uv_stride; + dst += dst_ybc->uv_stride; + } + + vp8_yv12_extend_frame_borders_c(dst_ybc); +} + +void vpx_yv12_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc, + YV12_BUFFER_CONFIG *dst_ybc) { + int row; + const uint8_t *src = src_ybc->y_buffer; + uint8_t *dst = dst_ybc->y_buffer; + +#if CONFIG_VP9_HIGHBITDEPTH + if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) { + const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); + uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); + for (row = 0; row < src_ybc->y_height; ++row) { + memcpy(dst16, src16, src_ybc->y_width * sizeof(uint16_t)); + src16 += src_ybc->y_stride; + dst16 += dst_ybc->y_stride; + } + return; + } +#endif + + for (row = 0; row < src_ybc->y_height; ++row) { + memcpy(dst, src, src_ybc->y_width); + src += src_ybc->y_stride; + dst += dst_ybc->y_stride; + } +} diff --git a/thirdparty/libvpx/vpx_scale/vpx_scale.h b/thirdparty/libvpx/vpx_scale/vpx_scale.h new file mode 100644 index 00000000000..43fcf9d66e1 --- /dev/null +++ b/thirdparty/libvpx/vpx_scale/vpx_scale.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VPX_SCALE_VPX_SCALE_H_ +#define VPX_SCALE_VPX_SCALE_H_ + +#include "vpx_scale/yv12config.h" + +extern void vpx_scale_frame(YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, + unsigned char *temp_area, + unsigned char temp_height, + unsigned int hscale, + unsigned int hratio, + unsigned int vscale, + unsigned int vratio, + unsigned int interlaced); + +#endif // VPX_SCALE_VPX_SCALE_H_ diff --git a/thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c b/thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c new file mode 100644 index 00000000000..bea603fd104 --- /dev/null +++ b/thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#define RTCD_C +#include "./vpx_scale_rtcd.h" +#include "vpx_ports/vpx_once.h" + +void vpx_scale_rtcd() +{ + once(setup_rtcd_internal); +} diff --git a/thirdparty/libvpx/vpx_scale/yv12config.h b/thirdparty/libvpx/vpx_scale/yv12config.h new file mode 100644 index 00000000000..37b255d4d39 --- /dev/null +++ b/thirdparty/libvpx/vpx_scale/yv12config.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_SCALE_YV12CONFIG_H_ +#define VPX_SCALE_YV12CONFIG_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./vpx_config.h" +#include "vpx/vpx_codec.h" +#include "vpx/vpx_frame_buffer.h" +#include "vpx/vpx_integer.h" + +#define VP8BORDERINPIXELS 32 +#define VP9INNERBORDERINPIXELS 96 +#define VP9_INTERP_EXTEND 4 +#define VP9_ENC_BORDER_IN_PIXELS 160 +#define VP9_DEC_BORDER_IN_PIXELS 32 + +typedef struct yv12_buffer_config { + int y_width; + int y_height; + int y_crop_width; + int y_crop_height; + int y_stride; + + int uv_width; + int uv_height; + int uv_crop_width; + int uv_crop_height; + int uv_stride; + + int alpha_width; + int alpha_height; + int alpha_stride; + + uint8_t *y_buffer; + uint8_t *u_buffer; + uint8_t *v_buffer; + uint8_t *alpha_buffer; + + uint8_t *buffer_alloc; + int buffer_alloc_sz; + int border; + int frame_size; + int subsampling_x; + int subsampling_y; + unsigned int bit_depth; + vpx_color_space_t color_space; + vpx_color_range_t color_range; + int render_width; + int render_height; + + int corrupted; + int flags; +} YV12_BUFFER_CONFIG; + +#define YV12_FLAG_HIGHBITDEPTH 8 + +int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, int border); +int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, int border); +int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); + +int vpx_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, int ss_x, int ss_y, +#if CONFIG_VP9_HIGHBITDEPTH + int use_highbitdepth, +#endif + int border, int byte_alignment); + +// Updates the yv12 buffer config with the frame buffer. |byte_alignment| must +// be a power of 2, from 32 to 1024. 0 sets legacy alignment. If cb is not +// NULL, then libvpx is using the frame buffer callbacks to handle memory. +// If cb is not NULL, libvpx will call cb with minimum size in bytes needed +// to decode the current frame. If cb is NULL, libvpx will allocate memory +// internally to decode the current frame. Returns 0 on success. Returns < 0 +// on failure. +int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, int ss_x, int ss_y, +#if CONFIG_VP9_HIGHBITDEPTH + int use_highbitdepth, +#endif + int border, + int byte_alignment, + vpx_codec_frame_buffer_t *fb, + vpx_get_frame_buffer_cb_fn_t cb, + void *cb_priv); +int vpx_free_frame_buffer(YV12_BUFFER_CONFIG *ybf); + +#ifdef __cplusplus +} +#endif + +#endif // VPX_SCALE_YV12CONFIG_H_ diff --git a/thirdparty/libvpx/vpx_util/endian_inl.h b/thirdparty/libvpx/vpx_util/endian_inl.h new file mode 100644 index 00000000000..37bdce1ccd9 --- /dev/null +++ b/thirdparty/libvpx/vpx_util/endian_inl.h @@ -0,0 +1,120 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Endian related functions. + +#ifndef VPX_UTIL_ENDIAN_INL_H_ +#define VPX_UTIL_ENDIAN_INL_H_ + +#include +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" + +#if defined(__GNUC__) +# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) +# define LOCAL_GCC_PREREQ(maj, min) \ + (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) +#else +# define LOCAL_GCC_VERSION 0 +# define LOCAL_GCC_PREREQ(maj, min) 0 +#endif + +// handle clang compatibility +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) +#if !defined(WORDS_BIGENDIAN) && \ + (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) +#define WORDS_BIGENDIAN +#endif + +#if defined(WORDS_BIGENDIAN) +#define HToLE32 BSwap32 +#define HToLE16 BSwap16 +#define HToBE64(x) (x) +#define HToBE32(x) (x) +#else +#define HToLE32(x) (x) +#define HToLE16(x) (x) +#define HToBE64(X) BSwap64(X) +#define HToBE32(X) BSwap32(X) +#endif + +#if LOCAL_GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) +#define HAVE_BUILTIN_BSWAP16 +#endif + +#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32) +#define HAVE_BUILTIN_BSWAP32 +#endif + +#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64) +#define HAVE_BUILTIN_BSWAP64 +#endif + +#if HAVE_MIPS32 && defined(__mips__) && !defined(__mips64) && \ + defined(__mips_isa_rev) && (__mips_isa_rev >= 2) && (__mips_isa_rev < 6) +#define VPX_USE_MIPS32_R2 +#endif + +static INLINE uint16_t BSwap16(uint16_t x) { +#if defined(HAVE_BUILTIN_BSWAP16) + return __builtin_bswap16(x); +#elif defined(_MSC_VER) + return _byteswap_ushort(x); +#else + // gcc will recognize a 'rorw $8, ...' here: + return (x >> 8) | ((x & 0xff) << 8); +#endif // HAVE_BUILTIN_BSWAP16 +} + +static INLINE uint32_t BSwap32(uint32_t x) { +#if defined(VPX_USE_MIPS32_R2) + uint32_t ret; + __asm__ volatile ( + "wsbh %[ret], %[x] \n\t" + "rotr %[ret], %[ret], 16 \n\t" + : [ret]"=r"(ret) + : [x]"r"(x) + ); + return ret; +#elif defined(HAVE_BUILTIN_BSWAP32) + return __builtin_bswap32(x); +#elif defined(__i386__) || defined(__x86_64__) + uint32_t swapped_bytes; + __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x)); + return swapped_bytes; +#elif defined(_MSC_VER) + return (uint32_t)_byteswap_ulong(x); +#else + return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24); +#endif // HAVE_BUILTIN_BSWAP32 +} + +static INLINE uint64_t BSwap64(uint64_t x) { +#if defined(HAVE_BUILTIN_BSWAP64) + return __builtin_bswap64(x); +#elif defined(__x86_64__) + uint64_t swapped_bytes; + __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x)); + return swapped_bytes; +#elif defined(_MSC_VER) + return (uint64_t)_byteswap_uint64(x); +#else // generic code for swapping 64-bit values (suggested by bdb@) + x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32); + x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16); + x = ((x & 0xff00ff00ff00ff00ull) >> 8) | ((x & 0x00ff00ff00ff00ffull) << 8); + return x; +#endif // HAVE_BUILTIN_BSWAP64 +} + +#endif // VPX_UTIL_ENDIAN_INL_H_ diff --git a/thirdparty/libvpx/vpx_util/vpx_thread.c b/thirdparty/libvpx/vpx_util/vpx_thread.c new file mode 100644 index 00000000000..0bb0125bd49 --- /dev/null +++ b/thirdparty/libvpx/vpx_util/vpx_thread.c @@ -0,0 +1,184 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Multi-threaded worker +// +// Original source: +// http://git.chromium.org/webm/libwebp.git +// 100644 blob 264210ba2807e4da47eb5d18c04cf869d89b9784 src/utils/thread.c + +#include +#include // for memset() +#include "./vpx_thread.h" +#include "vpx_mem/vpx_mem.h" + +#if CONFIG_MULTITHREAD + +struct VPxWorkerImpl { + pthread_mutex_t mutex_; + pthread_cond_t condition_; + pthread_t thread_; +}; + +//------------------------------------------------------------------------------ + +static void execute(VPxWorker *const worker); // Forward declaration. + +static THREADFN thread_loop(void *ptr) { + VPxWorker *const worker = (VPxWorker*)ptr; + int done = 0; + while (!done) { + pthread_mutex_lock(&worker->impl_->mutex_); + while (worker->status_ == OK) { // wait in idling mode + pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); + } + if (worker->status_ == WORK) { + execute(worker); + worker->status_ = OK; + } else if (worker->status_ == NOT_OK) { // finish the worker + done = 1; + } + // signal to the main thread that we're done (for sync()) + pthread_cond_signal(&worker->impl_->condition_); + pthread_mutex_unlock(&worker->impl_->mutex_); + } + return THREAD_RETURN(NULL); // Thread is finished +} + +// main thread state control +static void change_state(VPxWorker *const worker, + VPxWorkerStatus new_status) { + // No-op when attempting to change state on a thread that didn't come up. + // Checking status_ without acquiring the lock first would result in a data + // race. + if (worker->impl_ == NULL) return; + + pthread_mutex_lock(&worker->impl_->mutex_); + if (worker->status_ >= OK) { + // wait for the worker to finish + while (worker->status_ != OK) { + pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); + } + // assign new status and release the working thread if needed + if (new_status != OK) { + worker->status_ = new_status; + pthread_cond_signal(&worker->impl_->condition_); + } + } + pthread_mutex_unlock(&worker->impl_->mutex_); +} + +#endif // CONFIG_MULTITHREAD + +//------------------------------------------------------------------------------ + +static void init(VPxWorker *const worker) { + memset(worker, 0, sizeof(*worker)); + worker->status_ = NOT_OK; +} + +static int sync(VPxWorker *const worker) { +#if CONFIG_MULTITHREAD + change_state(worker, OK); +#endif + assert(worker->status_ <= OK); + return !worker->had_error; +} + +static int reset(VPxWorker *const worker) { + int ok = 1; + worker->had_error = 0; + if (worker->status_ < OK) { +#if CONFIG_MULTITHREAD + worker->impl_ = (VPxWorkerImpl*)vpx_calloc(1, sizeof(*worker->impl_)); + if (worker->impl_ == NULL) { + return 0; + } + if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) { + goto Error; + } + if (pthread_cond_init(&worker->impl_->condition_, NULL)) { + pthread_mutex_destroy(&worker->impl_->mutex_); + goto Error; + } + pthread_mutex_lock(&worker->impl_->mutex_); + ok = !pthread_create(&worker->impl_->thread_, NULL, thread_loop, worker); + if (ok) worker->status_ = OK; + pthread_mutex_unlock(&worker->impl_->mutex_); + if (!ok) { + pthread_mutex_destroy(&worker->impl_->mutex_); + pthread_cond_destroy(&worker->impl_->condition_); + Error: + vpx_free(worker->impl_); + worker->impl_ = NULL; + return 0; + } +#else + worker->status_ = OK; +#endif + } else if (worker->status_ > OK) { + ok = sync(worker); + } + assert(!ok || (worker->status_ == OK)); + return ok; +} + +static void execute(VPxWorker *const worker) { + if (worker->hook != NULL) { + worker->had_error |= !worker->hook(worker->data1, worker->data2); + } +} + +static void launch(VPxWorker *const worker) { +#if CONFIG_MULTITHREAD + change_state(worker, WORK); +#else + execute(worker); +#endif +} + +static void end(VPxWorker *const worker) { +#if CONFIG_MULTITHREAD + if (worker->impl_ != NULL) { + change_state(worker, NOT_OK); + pthread_join(worker->impl_->thread_, NULL); + pthread_mutex_destroy(&worker->impl_->mutex_); + pthread_cond_destroy(&worker->impl_->condition_); + vpx_free(worker->impl_); + worker->impl_ = NULL; + } +#else + worker->status_ = NOT_OK; + assert(worker->impl_ == NULL); +#endif + assert(worker->status_ == NOT_OK); +} + +//------------------------------------------------------------------------------ + +static VPxWorkerInterface g_worker_interface = { + init, reset, sync, launch, execute, end +}; + +int vpx_set_worker_interface(const VPxWorkerInterface* const winterface) { + if (winterface == NULL || + winterface->init == NULL || winterface->reset == NULL || + winterface->sync == NULL || winterface->launch == NULL || + winterface->execute == NULL || winterface->end == NULL) { + return 0; + } + g_worker_interface = *winterface; + return 1; +} + +const VPxWorkerInterface *vpx_get_worker_interface(void) { + return &g_worker_interface; +} + +//------------------------------------------------------------------------------ diff --git a/thirdparty/libvpx/vpx_util/vpx_thread.h b/thirdparty/libvpx/vpx_util/vpx_thread.h new file mode 100644 index 00000000000..2062abd75f9 --- /dev/null +++ b/thirdparty/libvpx/vpx_util/vpx_thread.h @@ -0,0 +1,369 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Multi-threaded worker +// +// Original source: +// http://git.chromium.org/webm/libwebp.git +// 100644 blob 7bd451b124ae3b81596abfbcc823e3cb129d3a38 src/utils/thread.h + +#ifndef VPX_THREAD_H_ +#define VPX_THREAD_H_ + +#include "./vpx_config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Set maximum decode threads to be 8 due to the limit of frame buffers +// and not enough semaphores in the emulation layer on windows. +#define MAX_DECODE_THREADS 8 + +#if CONFIG_MULTITHREAD + +#if defined(_WIN32) && !HAVE_PTHREAD_H +#include // NOLINT +#include // NOLINT +#include // NOLINT +typedef HANDLE pthread_t; +typedef CRITICAL_SECTION pthread_mutex_t; +typedef struct { + HANDLE waiting_sem_; + HANDLE received_sem_; + HANDLE signal_event_; +} pthread_cond_t; + +//------------------------------------------------------------------------------ +// simplistic pthread emulation layer + +// _beginthreadex requires __stdcall +#define THREADFN unsigned int __stdcall +#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val) + +static INLINE int pthread_create(pthread_t* const thread, const void* attr, + unsigned int (__stdcall *start)(void*), + void* arg) { + (void)attr; + *thread = (pthread_t)_beginthreadex(NULL, /* void *security */ + 0, /* unsigned stack_size */ + start, + arg, + 0, /* unsigned initflag */ + NULL); /* unsigned *thrdaddr */ + if (*thread == NULL) return 1; + SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL); + return 0; +} + +static INLINE int pthread_join(pthread_t thread, void** value_ptr) { + (void)value_ptr; + return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 || + CloseHandle(thread) == 0); +} + +// Mutex +static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, + void* mutexattr) { + (void)mutexattr; + InitializeCriticalSection(mutex); + return 0; +} + +static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { + return TryEnterCriticalSection(mutex) ? 0 : EBUSY; +} + +static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { + EnterCriticalSection(mutex); + return 0; +} + +static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { + LeaveCriticalSection(mutex); + return 0; +} + +static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { + DeleteCriticalSection(mutex); + return 0; +} + +// Condition +static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { + int ok = 1; + ok &= (CloseHandle(condition->waiting_sem_) != 0); + ok &= (CloseHandle(condition->received_sem_) != 0); + ok &= (CloseHandle(condition->signal_event_) != 0); + return !ok; +} + +static INLINE int pthread_cond_init(pthread_cond_t *const condition, + void* cond_attr) { + (void)cond_attr; + condition->waiting_sem_ = CreateSemaphore(NULL, 0, MAX_DECODE_THREADS, NULL); + condition->received_sem_ = CreateSemaphore(NULL, 0, MAX_DECODE_THREADS, NULL); + condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL); + if (condition->waiting_sem_ == NULL || + condition->received_sem_ == NULL || + condition->signal_event_ == NULL) { + pthread_cond_destroy(condition); + return 1; + } + return 0; +} + +static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { + int ok = 1; + if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { + // a thread is waiting in pthread_cond_wait: allow it to be notified + ok = SetEvent(condition->signal_event_); + // wait until the event is consumed so the signaler cannot consume + // the event via its own pthread_cond_wait. + ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != + WAIT_OBJECT_0); + } + return !ok; +} + +static INLINE int pthread_cond_wait(pthread_cond_t *const condition, + pthread_mutex_t *const mutex) { + int ok; + // note that there is a consumer available so the signal isn't dropped in + // pthread_cond_signal + if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL)) + return 1; + // now unlock the mutex so pthread_cond_signal may be issued + pthread_mutex_unlock(mutex); + ok = (WaitForSingleObject(condition->signal_event_, INFINITE) == + WAIT_OBJECT_0); + ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL); + pthread_mutex_lock(mutex); + return !ok; +} +#elif defined(__OS2__) +#define INCL_DOS +#include // NOLINT + +#include // NOLINT +#include // NOLINT +#include // NOLINT + +#define pthread_t TID +#define pthread_mutex_t HMTX + +typedef struct { + HEV event_sem_; + HEV ack_sem_; + volatile unsigned wait_count_; +} pthread_cond_t; + +//------------------------------------------------------------------------------ +// simplistic pthread emulation layer + +#define THREADFN void * +#define THREAD_RETURN(val) (val) + +typedef struct { + void* (*start_)(void*); + void* arg_; +} thread_arg; + +static void thread_start(void* arg) { + thread_arg targ = *(thread_arg *)arg; + free(arg); + + targ.start_(targ.arg_); +} + +static INLINE int pthread_create(pthread_t* const thread, const void* attr, + void* (*start)(void*), + void* arg) { + int tid; + thread_arg *targ = (thread_arg *)malloc(sizeof(*targ)); + if (targ == NULL) return 1; + + (void)attr; + + targ->start_ = start; + targ->arg_ = arg; + tid = (pthread_t)_beginthread(thread_start, NULL, 1024 * 1024, targ); + if (tid == -1) { + free(targ); + return 1; + } + + *thread = tid; + return 0; +} + +static INLINE int pthread_join(pthread_t thread, void** value_ptr) { + (void)value_ptr; + return DosWaitThread(&thread, DCWW_WAIT) != 0; +} + +// Mutex +static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, + void* mutexattr) { + (void)mutexattr; + return DosCreateMutexSem(NULL, mutex, 0, FALSE) != 0; +} + +static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { + return DosRequestMutexSem(*mutex, SEM_IMMEDIATE_RETURN) == 0 ? 0 : EBUSY; +} + +static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { + return DosRequestMutexSem(*mutex, SEM_INDEFINITE_WAIT) != 0; +} + +static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { + return DosReleaseMutexSem(*mutex) != 0; +} + +static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { + return DosCloseMutexSem(*mutex) != 0; +} + +// Condition +static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { + int ok = 1; + ok &= DosCloseEventSem(condition->event_sem_) == 0; + ok &= DosCloseEventSem(condition->ack_sem_) == 0; + return !ok; +} + +static INLINE int pthread_cond_init(pthread_cond_t *const condition, + void* cond_attr) { + int ok = 1; + (void)cond_attr; + + ok &= DosCreateEventSem(NULL, &condition->event_sem_, DCE_POSTONE, FALSE) + == 0; + ok &= DosCreateEventSem(NULL, &condition->ack_sem_, DCE_POSTONE, FALSE) == 0; + if (!ok) { + pthread_cond_destroy(condition); + return 1; + } + condition->wait_count_ = 0; + return 0; +} + +static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { + int ok = 1; + + if (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) { + ok &= DosPostEventSem(condition->event_sem_) == 0; + ok &= DosWaitEventSem(condition->ack_sem_, SEM_INDEFINITE_WAIT) == 0; + } + + return !ok; +} + +static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) { + int ok = 1; + + while (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) + ok &= pthread_cond_signal(condition) == 0; + + return !ok; +} + +static INLINE int pthread_cond_wait(pthread_cond_t *const condition, + pthread_mutex_t *const mutex) { + int ok = 1; + + __atomic_increment(&condition->wait_count_); + + ok &= pthread_mutex_unlock(mutex) == 0; + + ok &= DosWaitEventSem(condition->event_sem_, SEM_INDEFINITE_WAIT) == 0; + + __atomic_decrement(&condition->wait_count_); + + ok &= DosPostEventSem(condition->ack_sem_) == 0; + + pthread_mutex_lock(mutex); + + return !ok; +} +#else // _WIN32 +#include // NOLINT +# define THREADFN void* +# define THREAD_RETURN(val) val +#endif + +#endif // CONFIG_MULTITHREAD + +// State of the worker thread object +typedef enum { + NOT_OK = 0, // object is unusable + OK, // ready to work + WORK // busy finishing the current task +} VPxWorkerStatus; + +// Function to be called by the worker thread. Takes two opaque pointers as +// arguments (data1 and data2), and should return false in case of error. +typedef int (*VPxWorkerHook)(void*, void*); + +// Platform-dependent implementation details for the worker. +typedef struct VPxWorkerImpl VPxWorkerImpl; + +// Synchronization object used to launch job in the worker thread +typedef struct { + VPxWorkerImpl *impl_; + VPxWorkerStatus status_; + VPxWorkerHook hook; // hook to call + void *data1; // first argument passed to 'hook' + void *data2; // second argument passed to 'hook' + int had_error; // return value of the last call to 'hook' +} VPxWorker; + +// The interface for all thread-worker related functions. All these functions +// must be implemented. +typedef struct { + // Must be called first, before any other method. + void (*init)(VPxWorker *const worker); + // Must be called to initialize the object and spawn the thread. Re-entrant. + // Will potentially launch the thread. Returns false in case of error. + int (*reset)(VPxWorker *const worker); + // Makes sure the previous work is finished. Returns true if worker->had_error + // was not set and no error condition was triggered by the working thread. + int (*sync)(VPxWorker *const worker); + // Triggers the thread to call hook() with data1 and data2 arguments. These + // hook/data1/data2 values can be changed at any time before calling this + // function, but not be changed afterward until the next call to Sync(). + void (*launch)(VPxWorker *const worker); + // This function is similar to launch() except that it calls the + // hook directly instead of using a thread. Convenient to bypass the thread + // mechanism while still using the VPxWorker structs. sync() must + // still be called afterward (for error reporting). + void (*execute)(VPxWorker *const worker); + // Kill the thread and terminate the object. To use the object again, one + // must call reset() again. + void (*end)(VPxWorker *const worker); +} VPxWorkerInterface; + +// Install a new set of threading functions, overriding the defaults. This +// should be done before any workers are started, i.e., before any encoding or +// decoding takes place. The contents of the interface struct are copied, it +// is safe to free the corresponding memory after this call. This function is +// not thread-safe. Return false in case of invalid pointer or methods. +int vpx_set_worker_interface(const VPxWorkerInterface *const winterface); + +// Retrieve the currently set thread worker interface. +const VPxWorkerInterface *vpx_get_worker_interface(void); + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_THREAD_H_ diff --git a/thirdparty/libvpx/vpx_version.h b/thirdparty/libvpx/vpx_version.h new file mode 100644 index 00000000000..5cff3b429f4 --- /dev/null +++ b/thirdparty/libvpx/vpx_version.h @@ -0,0 +1,7 @@ +#define VERSION_MAJOR 1 +#define VERSION_MINOR 6 +#define VERSION_PATCH 0 +#define VERSION_EXTRA "" +#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) +#define VERSION_STRING_NOSP "v1.6.0" +#define VERSION_STRING " v1.6.0" From 1556d0d377ec376dd02db80a1bee7fc81eb684d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82a=C5=BCej=20Szczygie=C5=82?= Date: Wed, 14 Sep 2016 22:10:55 +0200 Subject: [PATCH 009/223] libvpx: Necessary modifications - convert ARM assembly (NEON) files, - add rtcd for run-time CPU features detection, - modify "system_state.h", - "arm_cpudetect.c" fixes. --- thirdparty/libvpx/rtcd/vp8_rtcd_arm.h | 240 +++++++ thirdparty/libvpx/rtcd/vp8_rtcd_c.h | 117 +++ thirdparty/libvpx/rtcd/vp8_rtcd_x86.h | 247 +++++++ thirdparty/libvpx/rtcd/vp9_rtcd_arm.h | 54 ++ thirdparty/libvpx/rtcd/vp9_rtcd_c.h | 41 ++ thirdparty/libvpx/rtcd/vp9_rtcd_x86.h | 55 ++ thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h | 678 ++++++++++++++++++ thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h | 355 +++++++++ thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h | 614 ++++++++++++++++ thirdparty/libvpx/vp8_rtcd.h | 9 + thirdparty/libvpx/vp9_rtcd.h | 9 + thirdparty/libvpx/vpx_config.asm | 65 ++ thirdparty/libvpx/vpx_config.h | 124 ++++ .../{ => armasm_ms}/intrapred_neon_asm.asm | 21 +- .../{ => armasm_ms}/loopfilter_mb_neon.asm | 10 +- .../arm/{ => armasm_ms}/save_reg_neon.asm | 11 +- .../vpx_dsp/arm/gas/intrapred_neon_asm.s | 658 +++++++++++++++++ .../vpx_dsp/arm/gas/loopfilter_mb_neon.s | 647 +++++++++++++++++ .../libvpx/vpx_dsp/arm/gas/save_reg_neon.s | 44 ++ .../arm/gas_apple/intrapred_neon_asm.s | 660 +++++++++++++++++ .../arm/gas_apple/loopfilter_mb_neon.s | 649 +++++++++++++++++ .../vpx_dsp/arm/gas_apple/save_reg_neon.s | 46 ++ thirdparty/libvpx/vpx_dsp_rtcd.h | 9 + thirdparty/libvpx/vpx_ports/arm_cpudetect.c | 61 +- thirdparty/libvpx/vpx_ports/system_state.h | 8 +- thirdparty/libvpx/vpx_scale_rtcd.h | 44 ++ 26 files changed, 5429 insertions(+), 47 deletions(-) create mode 100644 thirdparty/libvpx/rtcd/vp8_rtcd_arm.h create mode 100644 thirdparty/libvpx/rtcd/vp8_rtcd_c.h create mode 100644 thirdparty/libvpx/rtcd/vp8_rtcd_x86.h create mode 100644 thirdparty/libvpx/rtcd/vp9_rtcd_arm.h create mode 100644 thirdparty/libvpx/rtcd/vp9_rtcd_c.h create mode 100644 thirdparty/libvpx/rtcd/vp9_rtcd_x86.h create mode 100644 thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h create mode 100644 thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h create mode 100644 thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h create mode 100644 thirdparty/libvpx/vp8_rtcd.h create mode 100644 thirdparty/libvpx/vp9_rtcd.h create mode 100644 thirdparty/libvpx/vpx_config.asm create mode 100644 thirdparty/libvpx/vpx_config.h rename thirdparty/libvpx/vpx_dsp/arm/{ => armasm_ms}/intrapred_neon_asm.asm (98%) rename thirdparty/libvpx/vpx_dsp/arm/{ => armasm_ms}/loopfilter_mb_neon.asm (99%) rename thirdparty/libvpx/vpx_dsp/arm/{ => armasm_ms}/save_reg_neon.asm (83%) create mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s create mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s create mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s create mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s create mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s create mode 100644 thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s create mode 100644 thirdparty/libvpx/vpx_dsp_rtcd.h create mode 100644 thirdparty/libvpx/vpx_scale_rtcd.h diff --git a/thirdparty/libvpx/rtcd/vp8_rtcd_arm.h b/thirdparty/libvpx/rtcd/vp8_rtcd_arm.h new file mode 100644 index 00000000000..5c9b7aa392a --- /dev/null +++ b/thirdparty/libvpx/rtcd/vp8_rtcd_arm.h @@ -0,0 +1,240 @@ +#ifndef VP8_RTCD_H_ +#define VP8_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * VP8 + */ + +struct blockd; +struct loop_filter_info; + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c + +void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_clear_system_state_c(); +#define vp8_clear_system_state vp8_clear_system_state_c + +void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +void vp8_copy_mem16x16_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); + +void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +void vp8_copy_mem8x4_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); + +void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +void vp8_copy_mem8x8_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); + +void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); +void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); +RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); + +void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); +void vp8_dequant_idct_add_neon(short *input, short *dq, unsigned char *output, int stride); +RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride); + +void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); +void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); +RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); + +void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs); +void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *dst, int stride, char *eobs); +RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs); + +void vp8_dequantize_b_c(struct blockd*, short *dqc); +void vp8_dequantize_b_neon(struct blockd*, short *dqc); +RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc); + +void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_bh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); + +void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_bv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); + +void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_mbh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); + +void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_mbv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); + +void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_bhs_neon(unsigned char *y, int ystride, const unsigned char *blimit); +RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit); + +void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_bvs_neon(unsigned char *y, int ystride, const unsigned char *blimit); +RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit); + +void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_mbhs_neon(unsigned char *y, int ystride, const unsigned char *blimit); +RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit); + +void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_mbvs_neon(unsigned char *y, int ystride, const unsigned char *blimit); +RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit); + +void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); +void vp8_short_idct4x4llm_neon(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); +RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); + +void vp8_short_inv_walsh4x4_c(short *input, short *output); +void vp8_short_inv_walsh4x4_neon(short *input, short *output); +RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output); + +void vp8_short_inv_walsh4x4_1_c(short *input, short *output); +#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c + +void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c + +void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/arm.h" +static void setup_rtcd_internal(void) +{ + int flags = arm_cpu_caps(); + + vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_neon; +#endif + vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_neon; +#endif + vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_neon; +#endif + vp8_copy_mem16x16 = vp8_copy_mem16x16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_copy_mem16x16 = vp8_copy_mem16x16_neon; +#endif + vp8_copy_mem8x4 = vp8_copy_mem8x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_copy_mem8x4 = vp8_copy_mem8x4_neon; +#endif + vp8_copy_mem8x8 = vp8_copy_mem8x8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_copy_mem8x8 = vp8_copy_mem8x8_neon; +#endif + vp8_dc_only_idct_add = vp8_dc_only_idct_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_dc_only_idct_add = vp8_dc_only_idct_add_neon; +#endif + vp8_dequant_idct_add = vp8_dequant_idct_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_dequant_idct_add = vp8_dequant_idct_add_neon; +#endif + vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon; +#endif + vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_neon; +#endif + vp8_dequantize_b = vp8_dequantize_b_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_dequantize_b = vp8_dequantize_b_neon; +#endif + vp8_loop_filter_bh = vp8_loop_filter_bh_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_loop_filter_bh = vp8_loop_filter_bh_neon; +#endif + vp8_loop_filter_bv = vp8_loop_filter_bv_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_loop_filter_bv = vp8_loop_filter_bv_neon; +#endif + vp8_loop_filter_mbh = vp8_loop_filter_mbh_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_loop_filter_mbh = vp8_loop_filter_mbh_neon; +#endif + vp8_loop_filter_mbv = vp8_loop_filter_mbv_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_loop_filter_mbv = vp8_loop_filter_mbv_neon; +#endif + vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_neon; +#endif + vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_neon; +#endif + vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_loop_filter_simple_mbh = vp8_loop_filter_mbhs_neon; +#endif + vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_loop_filter_simple_mbv = vp8_loop_filter_mbvs_neon; +#endif + vp8_short_idct4x4llm = vp8_short_idct4x4llm_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_short_idct4x4llm = vp8_short_idct4x4llm_neon; +#endif + vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_neon; +#endif + vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_neon; +#endif + vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_neon; +#endif + vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_neon; +#endif +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/thirdparty/libvpx/rtcd/vp8_rtcd_c.h b/thirdparty/libvpx/rtcd/vp8_rtcd_c.h new file mode 100644 index 00000000000..d1657ac09d5 --- /dev/null +++ b/thirdparty/libvpx/rtcd/vp8_rtcd_c.h @@ -0,0 +1,117 @@ +#ifndef VP8_RTCD_H_ +#define VP8_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * VP8 + */ + +struct blockd; +struct loop_filter_info; + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_c + +void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c + +void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_bilinear_predict8x4 vp8_bilinear_predict8x4_c + +void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_bilinear_predict8x8 vp8_bilinear_predict8x8_c + +void vp8_clear_system_state_c(); +#define vp8_clear_system_state vp8_clear_system_state_c + +void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +#define vp8_copy_mem16x16 vp8_copy_mem16x16_c + +void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +#define vp8_copy_mem8x4 vp8_copy_mem8x4_c + +void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +#define vp8_copy_mem8x8 vp8_copy_mem8x8_c + +void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); +#define vp8_dc_only_idct_add vp8_dc_only_idct_add_c + +void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); +#define vp8_dequant_idct_add vp8_dequant_idct_add_c + +void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); +#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c + +void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs); +#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c + +void vp8_dequantize_b_c(struct blockd*, short *dqc); +#define vp8_dequantize_b vp8_dequantize_b_c + +void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +#define vp8_loop_filter_bh vp8_loop_filter_bh_c + +void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +#define vp8_loop_filter_bv vp8_loop_filter_bv_c + +void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +#define vp8_loop_filter_mbh vp8_loop_filter_mbh_c + +void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +#define vp8_loop_filter_mbv vp8_loop_filter_mbv_c + +void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit); +#define vp8_loop_filter_simple_bh vp8_loop_filter_bhs_c + +void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit); +#define vp8_loop_filter_simple_bv vp8_loop_filter_bvs_c + +void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); +#define vp8_loop_filter_simple_mbh vp8_loop_filter_simple_horizontal_edge_c + +void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); +#define vp8_loop_filter_simple_mbv vp8_loop_filter_simple_vertical_edge_c + +void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); +#define vp8_short_idct4x4llm vp8_short_idct4x4llm_c + +void vp8_short_inv_walsh4x4_c(short *input, short *output); +#define vp8_short_inv_walsh4x4 vp8_short_inv_walsh4x4_c + +void vp8_short_inv_walsh4x4_1_c(short *input, short *output); +#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c + +void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_sixtap_predict16x16 vp8_sixtap_predict16x16_c + +void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c + +void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_sixtap_predict8x4 vp8_sixtap_predict8x4_c + +void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c + +void vp8_rtcd(void); + +#ifdef RTCD_C +static void setup_rtcd_internal(void) +{ +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/thirdparty/libvpx/rtcd/vp8_rtcd_x86.h b/thirdparty/libvpx/rtcd/vp8_rtcd_x86.h new file mode 100644 index 00000000000..cbe1f47b2a7 --- /dev/null +++ b/thirdparty/libvpx/rtcd/vp8_rtcd_x86.h @@ -0,0 +1,247 @@ +#ifndef VP8_RTCD_H_ +#define VP8_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * VP8 + */ + +struct blockd; +struct loop_filter_info; + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_bilinear_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_bilinear_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_clear_system_state_c(); +void vpx_reset_mmx_state(); +RTCD_EXTERN void (*vp8_clear_system_state)(); + +void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +void vp8_copy_mem16x16_sse2(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); + +void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +void vp8_copy_mem8x4_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); + +void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +void vp8_copy_mem8x8_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); + +void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); +void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); +RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); + +void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); +void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride); +RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride); + +void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); +void vp8_dequant_idct_add_uv_block_mmx(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); +void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); +RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); + +void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs); +void vp8_dequant_idct_add_y_block_mmx(short *q, short *dq, unsigned char *dst, int stride, char *eobs); +void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, int stride, char *eobs); +RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs); + +void vp8_dequantize_b_c(struct blockd*, short *dqc); +void vp8_dequantize_b_mmx(struct blockd*, short *dqc); +RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc); + +void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_bh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); + +void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_bv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_bv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); + +void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_mbh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_mbh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); + +void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_mbv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +void vp8_loop_filter_mbv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); +RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); + +void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_bhs_mmx(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_bhs_sse2(unsigned char *y, int ystride, const unsigned char *blimit); +RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit); + +void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_bvs_mmx(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_bvs_sse2(unsigned char *y, int ystride, const unsigned char *blimit); +RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit); + +void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_simple_horizontal_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_simple_horizontal_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit); +RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit); + +void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_simple_vertical_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit); +void vp8_loop_filter_simple_vertical_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit); +RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit); + +void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); +void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); +RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); + +void vp8_short_inv_walsh4x4_c(short *input, short *output); +void vp8_short_inv_walsh4x4_mmx(short *input, short *output); +void vp8_short_inv_walsh4x4_sse2(short *input, short *output); +RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output); + +void vp8_short_inv_walsh4x4_1_c(short *input, short *output); +#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c + +void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict4x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_sixtap_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict8x4_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict8x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); +RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); + +void vp8_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c; + if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx; + if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2; + if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3; + vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c; + if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx; + vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c; + if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx; + vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c; + if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx; + if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2; + if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3; + vp8_clear_system_state = vp8_clear_system_state_c; + if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state; + vp8_copy_mem16x16 = vp8_copy_mem16x16_c; + if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx; + if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2; + vp8_copy_mem8x4 = vp8_copy_mem8x4_c; + if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx; + vp8_copy_mem8x8 = vp8_copy_mem8x8_c; + if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx; + vp8_dc_only_idct_add = vp8_dc_only_idct_add_c; + if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx; + vp8_dequant_idct_add = vp8_dequant_idct_add_c; + if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx; + vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; + if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx; + if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; + vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c; + if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx; + if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; + vp8_dequantize_b = vp8_dequantize_b_c; + if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx; + vp8_loop_filter_bh = vp8_loop_filter_bh_c; + if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx; + if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2; + vp8_loop_filter_bv = vp8_loop_filter_bv_c; + if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx; + if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2; + vp8_loop_filter_mbh = vp8_loop_filter_mbh_c; + if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx; + if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2; + vp8_loop_filter_mbv = vp8_loop_filter_mbv_c; + if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx; + if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2; + vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c; + if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx; + if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2; + vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c; + if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx; + if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2; + vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c; + if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx; + if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2; + vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c; + if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx; + if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2; + vp8_short_idct4x4llm = vp8_short_idct4x4llm_c; + if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx; + vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c; + if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx; + if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2; + vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c; + if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx; + if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; + if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; + vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c; + if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; + if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3; + vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c; + if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx; + if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2; + if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3; + vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c; + if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx; + if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; + if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/thirdparty/libvpx/rtcd/vp9_rtcd_arm.h b/thirdparty/libvpx/rtcd/vp9_rtcd_arm.h new file mode 100644 index 00000000000..afdc7e179e6 --- /dev/null +++ b/thirdparty/libvpx/rtcd/vp9_rtcd_arm.h @@ -0,0 +1,54 @@ +#ifndef VP9_RTCD_H_ +#define VP9_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * VP9 + */ + +#include "vp9/common/vp9_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); +#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c + +void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); + +void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); + +void vp9_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/arm.h" +static void setup_rtcd_internal(void) +{ + int flags = arm_cpu_caps(); + + vp9_iht4x4_16_add = vp9_iht4x4_16_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp9_iht4x4_16_add = vp9_iht4x4_16_add_neon; +#endif + vp9_iht8x8_64_add = vp9_iht8x8_64_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vp9_iht8x8_64_add = vp9_iht8x8_64_add_neon; +#endif +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/thirdparty/libvpx/rtcd/vp9_rtcd_c.h b/thirdparty/libvpx/rtcd/vp9_rtcd_c.h new file mode 100644 index 00000000000..329cb9d04c8 --- /dev/null +++ b/thirdparty/libvpx/rtcd/vp9_rtcd_c.h @@ -0,0 +1,41 @@ +#ifndef VP9_RTCD_H_ +#define VP9_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * VP9 + */ + +#include "vp9/common/vp9_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); +#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c + +void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define vp9_iht4x4_16_add vp9_iht4x4_16_add_c + +void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c + +void vp9_rtcd(void); + +#ifdef RTCD_C +static void setup_rtcd_internal(void) +{ +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/thirdparty/libvpx/rtcd/vp9_rtcd_x86.h b/thirdparty/libvpx/rtcd/vp9_rtcd_x86.h new file mode 100644 index 00000000000..8ce8067674d --- /dev/null +++ b/thirdparty/libvpx/rtcd/vp9_rtcd_x86.h @@ -0,0 +1,55 @@ +#ifndef VP9_RTCD_H_ +#define VP9_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * VP9 + */ + +#include "vp9/common/vp9_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); +void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); +RTCD_EXTERN void (*vp9_iht16x16_256_add)(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); + +void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); + +void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); +RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); + +void vp9_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + vp9_iht16x16_256_add = vp9_iht16x16_256_add_c; + if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2; + + vp9_iht4x4_16_add = vp9_iht4x4_16_add_c; + if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2; + + vp9_iht8x8_64_add = vp9_iht8x8_64_add_c; + if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h new file mode 100644 index 00000000000..df42f3d24ae --- /dev/null +++ b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h @@ -0,0 +1,678 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_dsp_common.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c + +void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c + +void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c + +void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c + +void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c + +void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c + +void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d135_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c + +void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c + +void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c + +void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c + +void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c + +void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c + +void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c + +void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c + +void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c + +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + +void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c + +void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + +void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c + +void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c + +void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c + +void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c + +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + +void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + +void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c + +void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c + +void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_horizontal_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); + +void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_horizontal_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); + +void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_vertical_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_vertical_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); + +void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_vertical_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); + +void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_2d vpx_scaled_2d_c + +void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c + +void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c + +void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c + +void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_horiz vpx_scaled_horiz_c + +void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_vert vpx_scaled_vert_c + +void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c + +void vpx_dsp_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/arm.h" +static void setup_rtcd_internal(void) +{ + int flags = arm_cpu_caps(); + + vpx_convolve8 = vpx_convolve8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_convolve8 = vpx_convolve8_neon; +#endif + vpx_convolve8_avg = vpx_convolve8_avg_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_convolve8_avg = vpx_convolve8_avg_neon; +#endif + vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_neon; +#endif + vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_neon; +#endif + vpx_convolve8_horiz = vpx_convolve8_horiz_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_convolve8_horiz = vpx_convolve8_horiz_neon; +#endif + vpx_convolve8_vert = vpx_convolve8_vert_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_convolve8_vert = vpx_convolve8_vert_neon; +#endif + vpx_convolve_avg = vpx_convolve_avg_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_convolve_avg = vpx_convolve_avg_neon; +#endif + vpx_convolve_copy = vpx_convolve_copy_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_convolve_copy = vpx_convolve_copy_neon; +#endif + vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_neon; +#endif + vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_neon; +#endif + vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_neon; +#endif + vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_neon; +#endif + vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_neon; +#endif + vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_neon; +#endif + vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_neon; +#endif + vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_neon; +#endif + vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_neon; +#endif + vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_neon; +#endif + vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_neon; +#endif + vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_neon; +#endif + vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_neon; +#endif + vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_neon; +#endif + vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_neon; +#endif + vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_neon; +#endif + vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_neon; +#endif + vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_neon; +#endif + vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_neon; +#endif + vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_neon; +#endif + vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_neon; +#endif + vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_neon; +#endif + vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_neon; +#endif + vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_neon; +#endif + vpx_idct16x16_10_add = vpx_idct16x16_10_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct16x16_10_add = vpx_idct16x16_10_add_neon; +#endif + vpx_idct16x16_1_add = vpx_idct16x16_1_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct16x16_1_add = vpx_idct16x16_1_add_neon; +#endif + vpx_idct16x16_256_add = vpx_idct16x16_256_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct16x16_256_add = vpx_idct16x16_256_add_neon; +#endif + vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_neon; +#endif + vpx_idct32x32_135_add = vpx_idct32x32_135_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_neon; +#endif + vpx_idct32x32_1_add = vpx_idct32x32_1_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct32x32_1_add = vpx_idct32x32_1_add_neon; +#endif + vpx_idct32x32_34_add = vpx_idct32x32_34_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct32x32_34_add = vpx_idct32x32_1024_add_neon; +#endif + vpx_idct4x4_16_add = vpx_idct4x4_16_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct4x4_16_add = vpx_idct4x4_16_add_neon; +#endif + vpx_idct4x4_1_add = vpx_idct4x4_1_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct4x4_1_add = vpx_idct4x4_1_add_neon; +#endif + vpx_idct8x8_12_add = vpx_idct8x8_12_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct8x8_12_add = vpx_idct8x8_12_add_neon; +#endif + vpx_idct8x8_1_add = vpx_idct8x8_1_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct8x8_1_add = vpx_idct8x8_1_add_neon; +#endif + vpx_idct8x8_64_add = vpx_idct8x8_64_add_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_idct8x8_64_add = vpx_idct8x8_64_add_neon; +#endif + vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_neon; +#endif + vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_neon; +#endif + vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_neon; +#endif + vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_neon; +#endif + vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_neon; +#endif + vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_neon; +#endif + vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_neon; +#endif + vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_neon; +#endif + vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_neon; +#endif + vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_neon; +#endif + vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_neon; +#endif + vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_neon; +#endif + vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_neon; +#endif + vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_neon; +#endif + vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_neon; +#endif + vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_neon; +#endif + vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_neon; +#endif + vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_neon; +#endif + vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_neon; +#endif + vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c; +#if HAVE_NEON + if (flags & HAS_NEON) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_neon; +#endif +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h new file mode 100644 index 00000000000..9fcc2f00661 --- /dev/null +++ b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h @@ -0,0 +1,355 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_dsp_common.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_convolve8 vpx_convolve8_c + +void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_convolve8_avg vpx_convolve8_avg_c + +void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_convolve8_avg_horiz vpx_convolve8_avg_horiz_c + +void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_convolve8_avg_vert vpx_convolve8_avg_vert_c + +void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_convolve8_horiz vpx_convolve8_horiz_c + +void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_convolve8_vert vpx_convolve8_vert_c + +void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_convolve_avg vpx_convolve_avg_c + +void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_convolve_copy vpx_convolve_copy_c + +void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c + +void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c + +void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c + +void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c + +void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c + +void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c + +void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c + +void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c + +void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c + +void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c + +void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c + +void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c + +void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c + +void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c + +void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c + +void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c + +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + +void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c + +void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c + +void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45_predictor_4x4 vpx_d45_predictor_4x4_c + +void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c + +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + +void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c + +void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c + +void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c + +void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c + +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + +void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c + +void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_128_predictor_32x32 vpx_dc_128_predictor_32x32_c + +void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_128_predictor_4x4 vpx_dc_128_predictor_4x4_c + +void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_128_predictor_8x8 vpx_dc_128_predictor_8x8_c + +void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_left_predictor_16x16 vpx_dc_left_predictor_16x16_c + +void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_left_predictor_32x32 vpx_dc_left_predictor_32x32_c + +void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_left_predictor_4x4 vpx_dc_left_predictor_4x4_c + +void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_left_predictor_8x8 vpx_dc_left_predictor_8x8_c + +void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_predictor_16x16 vpx_dc_predictor_16x16_c + +void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_predictor_32x32 vpx_dc_predictor_32x32_c + +void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_predictor_4x4 vpx_dc_predictor_4x4_c + +void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_predictor_8x8 vpx_dc_predictor_8x8_c + +void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_top_predictor_16x16 vpx_dc_top_predictor_16x16_c + +void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_top_predictor_32x32 vpx_dc_top_predictor_32x32_c + +void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_top_predictor_4x4 vpx_dc_top_predictor_4x4_c + +void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_dc_top_predictor_8x8 vpx_dc_top_predictor_8x8_c + +void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_h_predictor_16x16 vpx_h_predictor_16x16_c + +void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_h_predictor_32x32 vpx_h_predictor_32x32_c + +void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_h_predictor_4x4 vpx_h_predictor_4x4_c + +void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c + +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + +void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct16x16_10_add vpx_idct16x16_10_add_c + +void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct16x16_1_add vpx_idct16x16_1_add_c + +void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct16x16_256_add vpx_idct16x16_256_add_c + +void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct32x32_1024_add vpx_idct32x32_1024_add_c + +void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct32x32_135_add vpx_idct32x32_135_add_c + +void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct32x32_1_add vpx_idct32x32_1_add_c + +void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct32x32_34_add vpx_idct32x32_34_add_c + +void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct4x4_16_add vpx_idct4x4_16_add_c + +void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct4x4_1_add vpx_idct4x4_1_add_c + +void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct8x8_12_add vpx_idct8x8_12_add_c + +void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct8x8_1_add vpx_idct8x8_1_add_c + +void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_idct8x8_64_add vpx_idct8x8_64_add_c + +void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c + +void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c + +void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define vpx_lpf_horizontal_4 vpx_lpf_horizontal_4_c + +void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define vpx_lpf_horizontal_4_dual vpx_lpf_horizontal_4_dual_c + +void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define vpx_lpf_horizontal_8 vpx_lpf_horizontal_8_c + +void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define vpx_lpf_horizontal_8_dual vpx_lpf_horizontal_8_dual_c + +void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define vpx_lpf_horizontal_edge_16 vpx_lpf_horizontal_edge_16_c + +void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define vpx_lpf_horizontal_edge_8 vpx_lpf_horizontal_edge_8_c + +void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define vpx_lpf_vertical_16 vpx_lpf_vertical_16_c + +void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define vpx_lpf_vertical_16_dual vpx_lpf_vertical_16_dual_c + +void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define vpx_lpf_vertical_4 vpx_lpf_vertical_4_c + +void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define vpx_lpf_vertical_4_dual vpx_lpf_vertical_4_dual_c + +void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +#define vpx_lpf_vertical_8 vpx_lpf_vertical_8_c + +void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +#define vpx_lpf_vertical_8_dual vpx_lpf_vertical_8_dual_c + +void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_2d vpx_scaled_2d_c + +void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c + +void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c + +void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c + +void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_horiz vpx_scaled_horiz_c + +void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_vert vpx_scaled_vert_c + +void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_tm_predictor_16x16 vpx_tm_predictor_16x16_c + +void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_tm_predictor_32x32 vpx_tm_predictor_32x32_c + +void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_tm_predictor_4x4 vpx_tm_predictor_4x4_c + +void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_tm_predictor_8x8 vpx_tm_predictor_8x8_c + +void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_v_predictor_16x16 vpx_v_predictor_16x16_c + +void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_v_predictor_32x32 vpx_v_predictor_32x32_c + +void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_v_predictor_4x4 vpx_v_predictor_4x4_c + +void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_v_predictor_8x8 vpx_v_predictor_8x8_c + +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c + +void vpx_dsp_rtcd(void); + +#ifdef RTCD_C +static void setup_rtcd_internal(void) +{ +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h new file mode 100644 index 00000000000..82574e096ca --- /dev/null +++ b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h @@ -0,0 +1,614 @@ +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_dsp_common.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c + +void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c + +void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c + +void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c + +void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c + +void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c + +void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c + +void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c + +void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d153_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d153_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d153_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d207_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d207_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d207_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d207_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d207_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d207_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d207_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d207_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c + +void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c + +void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c + +void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c + +void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d45_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d45_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d45_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d45_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d45_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c + +void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c + +void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c + +void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c + +void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d63_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d63_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d63_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d63_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d63_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d63_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c + +void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c + +void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c + +void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c + +void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c + +void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c + +void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +void vpx_iwht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); +RTCD_EXTERN void (*vpx_iwht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); + +void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); +#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c + +void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void vpx_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); + +void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); + +void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); + +void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +void vpx_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); +RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); + +void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); +RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); + +void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +RTCD_EXTERN void (*vpx_scaled_2d)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); + +void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c + +void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c + +void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c + +void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_horiz vpx_scaled_horiz_c + +void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); +#define vpx_scaled_vert vpx_scaled_vert_c + +void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_tm_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_tm_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_tm_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_tm_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +void vpx_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); + +void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); +#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c + +void vpx_dsp_rtcd(void); + +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + vpx_convolve8 = vpx_convolve8_c; + if (flags & HAS_SSE2) vpx_convolve8 = vpx_convolve8_sse2; + if (flags & HAS_SSSE3) vpx_convolve8 = vpx_convolve8_ssse3; + if (flags & HAS_AVX2) vpx_convolve8 = vpx_convolve8_avx2; + vpx_convolve8_avg = vpx_convolve8_avg_c; + if (flags & HAS_SSE2) vpx_convolve8_avg = vpx_convolve8_avg_sse2; + if (flags & HAS_SSSE3) vpx_convolve8_avg = vpx_convolve8_avg_ssse3; + vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c; + if (flags & HAS_SSE2) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_sse2; + if (flags & HAS_SSSE3) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_ssse3; + vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c; + if (flags & HAS_SSE2) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_sse2; + if (flags & HAS_SSSE3) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_ssse3; + vpx_convolve8_horiz = vpx_convolve8_horiz_c; + if (flags & HAS_SSE2) vpx_convolve8_horiz = vpx_convolve8_horiz_sse2; + if (flags & HAS_SSSE3) vpx_convolve8_horiz = vpx_convolve8_horiz_ssse3; + if (flags & HAS_AVX2) vpx_convolve8_horiz = vpx_convolve8_horiz_avx2; + vpx_convolve8_vert = vpx_convolve8_vert_c; + if (flags & HAS_SSE2) vpx_convolve8_vert = vpx_convolve8_vert_sse2; + if (flags & HAS_SSSE3) vpx_convolve8_vert = vpx_convolve8_vert_ssse3; + if (flags & HAS_AVX2) vpx_convolve8_vert = vpx_convolve8_vert_avx2; + vpx_convolve_avg = vpx_convolve_avg_c; + if (flags & HAS_SSE2) vpx_convolve_avg = vpx_convolve_avg_sse2; + vpx_convolve_copy = vpx_convolve_copy_c; + if (flags & HAS_SSE2) vpx_convolve_copy = vpx_convolve_copy_sse2; + vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_c; + if (flags & HAS_SSSE3) vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_ssse3; + vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_c; + if (flags & HAS_SSSE3) vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_ssse3; + vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_c; + if (flags & HAS_SSSE3) vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_ssse3; + vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_c; + if (flags & HAS_SSSE3) vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_ssse3; + vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_c; + if (flags & HAS_SSSE3) vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_ssse3; + vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_c; + if (flags & HAS_SSSE3) vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_ssse3; + vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_c; + if (flags & HAS_SSE2) vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_sse2; + vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_c; + if (flags & HAS_SSSE3) vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_ssse3; + vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c; + if (flags & HAS_SSSE3) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_ssse3; + vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_c; + if (flags & HAS_SSSE3) vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_ssse3; + vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c; + if (flags & HAS_SSE2) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_sse2; + vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c; + if (flags & HAS_SSE2) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_sse2; + vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_c; + if (flags & HAS_SSSE3) vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_ssse3; + vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_c; + if (flags & HAS_SSSE3) vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_ssse3; + vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_c; + if (flags & HAS_SSSE3) vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_ssse3; + vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_c; + if (flags & HAS_SSSE3) vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_ssse3; + vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c; + if (flags & HAS_SSE2) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_sse2; + vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c; + if (flags & HAS_SSE2) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_sse2; + vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c; + if (flags & HAS_SSE2) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_sse2; + vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c; + if (flags & HAS_SSE2) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_sse2; + vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c; + if (flags & HAS_SSE2) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_sse2; + vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c; + if (flags & HAS_SSE2) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_sse2; + vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c; + if (flags & HAS_SSE2) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_sse2; + vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c; + if (flags & HAS_SSE2) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_sse2; + vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c; + if (flags & HAS_SSE2) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_sse2; + vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c; + if (flags & HAS_SSE2) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_sse2; + vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c; + if (flags & HAS_SSE2) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_sse2; + vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c; + if (flags & HAS_SSE2) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_sse2; + vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c; + if (flags & HAS_SSE2) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_sse2; + vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c; + if (flags & HAS_SSE2) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_sse2; + vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c; + if (flags & HAS_SSE2) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_sse2; + vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c; + if (flags & HAS_SSE2) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_sse2; + vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c; + if (flags & HAS_SSE2) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_sse2; + vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c; + if (flags & HAS_SSE2) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_sse2; + vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c; + if (flags & HAS_SSE2) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_sse2; + vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c; + if (flags & HAS_SSE2) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_sse2; + vpx_idct16x16_10_add = vpx_idct16x16_10_add_c; + if (flags & HAS_SSE2) vpx_idct16x16_10_add = vpx_idct16x16_10_add_sse2; + vpx_idct16x16_1_add = vpx_idct16x16_1_add_c; + if (flags & HAS_SSE2) vpx_idct16x16_1_add = vpx_idct16x16_1_add_sse2; + vpx_idct16x16_256_add = vpx_idct16x16_256_add_c; + if (flags & HAS_SSE2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2; + vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c; + if (flags & HAS_SSE2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2; + vpx_idct32x32_135_add = vpx_idct32x32_135_add_c; + if (flags & HAS_SSE2) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_sse2; + vpx_idct32x32_1_add = vpx_idct32x32_1_add_c; + if (flags & HAS_SSE2) vpx_idct32x32_1_add = vpx_idct32x32_1_add_sse2; + vpx_idct32x32_34_add = vpx_idct32x32_34_add_c; + if (flags & HAS_SSE2) vpx_idct32x32_34_add = vpx_idct32x32_34_add_sse2; + vpx_idct4x4_16_add = vpx_idct4x4_16_add_c; + if (flags & HAS_SSE2) vpx_idct4x4_16_add = vpx_idct4x4_16_add_sse2; + vpx_idct4x4_1_add = vpx_idct4x4_1_add_c; + if (flags & HAS_SSE2) vpx_idct4x4_1_add = vpx_idct4x4_1_add_sse2; + vpx_idct8x8_12_add = vpx_idct8x8_12_add_c; + if (flags & HAS_SSE2) vpx_idct8x8_12_add = vpx_idct8x8_12_add_sse2; + vpx_idct8x8_1_add = vpx_idct8x8_1_add_c; + if (flags & HAS_SSE2) vpx_idct8x8_1_add = vpx_idct8x8_1_add_sse2; + vpx_idct8x8_64_add = vpx_idct8x8_64_add_c; + if (flags & HAS_SSE2) vpx_idct8x8_64_add = vpx_idct8x8_64_add_sse2; + vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_c; + if (flags & HAS_SSE2) vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_sse2; + vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c; + if (flags & HAS_SSE2) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_sse2; + vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c; + if (flags & HAS_SSE2) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_sse2; + vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c; + if (flags & HAS_SSE2) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_sse2; + vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c; + if (flags & HAS_SSE2) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_sse2; + vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c; + if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_sse2; + if (flags & HAS_AVX2) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_avx2; + vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c; + if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_sse2; + if (flags & HAS_AVX2) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_avx2; + vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c; + if (flags & HAS_SSE2) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_sse2; + vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c; + if (flags & HAS_SSE2) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_sse2; + vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c; + if (flags & HAS_SSE2) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_sse2; + vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c; + if (flags & HAS_SSE2) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_sse2; + vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c; + if (flags & HAS_SSE2) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_sse2; + vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c; + if (flags & HAS_SSE2) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_sse2; + vpx_scaled_2d = vpx_scaled_2d_c; + if (flags & HAS_SSSE3) vpx_scaled_2d = vpx_scaled_2d_ssse3; + vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c; + if (flags & HAS_SSE2) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_sse2; + vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c; + if (flags & HAS_SSE2) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_sse2; + vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c; + if (flags & HAS_SSE2) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_sse2; + vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c; + if (flags & HAS_SSE2) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_sse2; + vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c; + if (flags & HAS_SSE2) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_sse2; + vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c; + if (flags & HAS_SSE2) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_sse2; + vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c; + if (flags & HAS_SSE2) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_sse2; + vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c; + if (flags & HAS_SSE2) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_sse2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/thirdparty/libvpx/vp8_rtcd.h b/thirdparty/libvpx/vp8_rtcd.h new file mode 100644 index 00000000000..c5eeb5e579f --- /dev/null +++ b/thirdparty/libvpx/vp8_rtcd.h @@ -0,0 +1,9 @@ +#include "vpx_config.h" + +#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) + #include "rtcd/vp8_rtcd_x86.h" +#elif defined(WEBM_ARMASM) && ARCH_ARM + #include "rtcd/vp8_rtcd_arm.h" +#else + #include "rtcd/vp8_rtcd_c.h" +#endif diff --git a/thirdparty/libvpx/vp9_rtcd.h b/thirdparty/libvpx/vp9_rtcd.h new file mode 100644 index 00000000000..cf2b463d639 --- /dev/null +++ b/thirdparty/libvpx/vp9_rtcd.h @@ -0,0 +1,9 @@ +#include "vpx_config.h" + +#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) + #include "rtcd/vp9_rtcd_x86.h" +#elif defined(WEBM_ARMASM) && ARCH_ARM + #include "rtcd/vp9_rtcd_arm.h" +#else + #include "rtcd/vp9_rtcd_c.h" +#endif diff --git a/thirdparty/libvpx/vpx_config.asm b/thirdparty/libvpx/vpx_config.asm new file mode 100644 index 00000000000..8f2119e7bc2 --- /dev/null +++ b/thirdparty/libvpx/vpx_config.asm @@ -0,0 +1,65 @@ +%ifdef X86_32 + ARCH_X86 equ 1 + ARCH_X86_64 equ 0 +%elifdef X86_64 + ARCH_X86 equ 0 + ARCH_X86_64 equ 1 +%endif + +HAVE_VPX_PORTS equ 1 +CONFIG_DEPENDENCY_TRACKING equ 0 +CONFIG_EXTERNAL_BUILD equ 0 +CONFIG_INSTALL_DOCS equ 0 +CONFIG_INSTALL_BINS equ 0 +CONFIG_INSTALL_LIBS equ 0 +CONFIG_INSTALL_SRCS equ 0 +CONFIG_USE_X86INC equ 1 +CONFIG_DEBUG equ 0 +CONFIG_GPROF equ 0 +CONFIG_GCOV equ 0 +CONFIG_RVCT equ 0 +CONFIG_PIC equ 1 ;TODO: autodetect +CONFIG_CODEC_SRCS equ 0 +CONFIG_DEBUG_LIBS equ 0 +CONFIG_DEQUANT_TOKENS equ 0 +CONFIG_DC_RECON equ 0 +CONFIG_RUNTIME_CPU_DETECT equ 1 +CONFIG_POSTPROC equ 0 +CONFIG_VP9_POSTPROC equ 0 +CONFIG_MULTITHREAD equ 1 +CONFIG_INTERNAL_STATS equ 0 +CONFIG_VP8_ENCODER equ 0 +CONFIG_VP8_DECODER equ 1 +CONFIG_VP9_ENCODER equ 0 +CONFIG_VP9_DECODER equ 1 +CONFIG_VP8 equ 1 +CONFIG_VP9 equ 1 +CONFIG_ENCODERS equ 0 +CONFIG_DECODERS equ 1 +CONFIG_STATIC_MSVCRT equ 0 +CONFIG_SPATIAL_RESAMPLING equ 0 +CONFIG_REALTIME_ONLY equ 0 +CONFIG_ONTHEFLY_BITPACKING equ 0 +CONFIG_ERROR_CONCEALMENT equ 0 +CONFIG_SHARED equ 0 +CONFIG_STATIC equ 0 +CONFIG_SMALL equ 0 +CONFIG_POSTPROC_VISUALIZER equ 0 +CONFIG_OS_SUPPORT equ 1 +CONFIG_UNIT_TESTS equ 0 +CONFIG_WEBM_IO equ 0 +CONFIG_LIBYUV equ 0 +CONFIG_DECODE_PERF_TESTS equ 0 +CONFIG_ENCODE_PERF_TESTS equ 0 +CONFIG_MULTI_RES_ENCODING equ 0 +CONFIG_TEMPORAL_DENOISING equ 1 +CONFIG_VP9_TEMPORAL_DENOISING equ 0 +CONFIG_COEFFICIENT_RANGE_CHECKING equ 0 +CONFIG_VP9_HIGHBITDEPTH equ 0 +CONFIG_BETTER_HW_COMPATIBILITY equ 0 +CONFIG_EXPERIMENTAL equ 0 +CONFIG_SIZE_LIMIT equ 0 +CONFIG_SPATIAL_SVC equ 0 +CONFIG_FP_MB_STATS equ 0 +CONFIG_EMULATE_HARDWARE equ 0 +CONFIG_MISC_FIXES equ 0 diff --git a/thirdparty/libvpx/vpx_config.h b/thirdparty/libvpx/vpx_config.h new file mode 100644 index 00000000000..7058f0327b9 --- /dev/null +++ b/thirdparty/libvpx/vpx_config.h @@ -0,0 +1,124 @@ +/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */ +/* */ +/* Use of this source code is governed by a BSD-style license */ +/* that can be found in the LICENSE file in the root of the source */ +/* tree. An additional intellectual property rights grant can be found */ +/* in the file PATENTS. All contributing project authors may */ +/* be found in the AUTHORS file in the root of the source tree. */ +/* This file automatically generated by configure. Do not edit! */ +#ifndef VPX_CONFIG_H +#define VPX_CONFIG_H +#define RESTRICT +#define INLINE inline + +#define HAVE_MIPS32 0 +#define HAVE_MEDIA 0 + +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) + #define ARCH_X86 1 + #define ARCH_X86_64 0 + + #define ARCH_ARM 0 + #define HAVE_NEON 0 + #define HAVE_NEON_ASM 0 + + #define HAVE_MMX 1 + #define HAVE_SSE2 1 + #define HAVE_SSSE3 1 + #define HAVE_AVX2 1 +#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) + #define ARCH_X86 0 + #define ARCH_X86_64 1 + + #define ARCH_ARM 0 + #define HAVE_NEON 0 + #define HAVE_NEON_ASM 0 + + #define HAVE_MMX 1 + #define HAVE_SSE2 1 + #define HAVE_SSSE3 1 + #define HAVE_AVX2 1 +#elif defined(__arm__) || defined(__TARGET_ARCH_ARM) || defined(_M_ARM) || defined(__aarch64__) + #define ARCH_X86 0 + #define ARCH_X86_64 0 + + #define ARCH_ARM 1 + #define HAVE_NEON 1 + #define HAVE_NEON_ASM 1 +#else + #define ARCH_X86 0 + #define ARCH_X86_64 0 + + #define ARCH_ARM 0 + #define HAVE_NEON 0 + #define HAVE_NEON_ASM 0 +#endif + +#define CONFIG_BIG_ENDIAN 0 //TODO: Autodetect + +#ifdef _WIN32 + #define HAVE_PTHREAD_H 0 + #define HAVE_UNISTD_H 0 +#else + #define HAVE_PTHREAD_H 1 + #define HAVE_UNISTD_H 1 +#endif + +/**/ + +#define HAVE_VPX_PORTS 1 +#define CONFIG_DEPENDENCY_TRACKING 0 +#define CONFIG_EXTERNAL_BUILD 0 +#define CONFIG_INSTALL_DOCS 0 +#define CONFIG_INSTALL_BINS 0 +#define CONFIG_INSTALL_LIBS 0 +#define CONFIG_INSTALL_SRCS 0 +#define CONFIG_DEBUG 0 +#define CONFIG_GPROF 0 +#define CONFIG_GCOV 0 +#define CONFIG_RVCT 0 +#define CONFIG_CODEC_SRCS 0 +#define CONFIG_DEBUG_LIBS 0 +#define CONFIG_DEQUANT_TOKENS 0 +#define CONFIG_DC_RECON 0 +#define CONFIG_RUNTIME_CPU_DETECT 1 +#define CONFIG_POSTPROC 0 +#define CONFIG_VP9_POSTPROC 0 +#define CONFIG_MULTITHREAD 1 +#define CONFIG_INTERNAL_STATS 0 +#define CONFIG_VP8_ENCODER 0 +#define CONFIG_VP8_DECODER 1 +#define CONFIG_VP9_ENCODER 0 +#define CONFIG_VP9_DECODER 1 +#define CONFIG_VP8 1 +#define CONFIG_VP9 1 +#define CONFIG_ENCODERS 0 +#define CONFIG_DECODERS 1 +#define CONFIG_STATIC_MSVCRT 0 +#define CONFIG_SPATIAL_RESAMPLING 0 +#define CONFIG_REALTIME_ONLY 0 +#define CONFIG_ONTHEFLY_BITPACKING 0 +#define CONFIG_ERROR_CONCEALMENT 0 +#define CONFIG_SHARED 0 +#define CONFIG_STATIC 0 +#define CONFIG_SMALL 0 +#define CONFIG_POSTPROC_VISUALIZER 0 +#define CONFIG_OS_SUPPORT 1 +#define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 0 +#define CONFIG_LIBYUV 0 +#define CONFIG_DECODE_PERF_TESTS 0 +#define CONFIG_ENCODE_PERF_TESTS 0 +#define CONFIG_MULTI_RES_ENCODING 0 +#define CONFIG_TEMPORAL_DENOISING 0 +#define CONFIG_VP9_TEMPORAL_DENOISING 0 +#define CONFIG_COEFFICIENT_RANGE_CHECKING 0 +#define CONFIG_VP9_HIGHBITDEPTH 0 +#define CONFIG_BETTER_HW_COMPATIBILITY 0 +#define CONFIG_EXPERIMENTAL 0 +#define CONFIG_SIZE_LIMIT 0 +#define CONFIG_SPATIAL_SVC 0 +#define CONFIG_FP_MB_STATS 0 +#define CONFIG_EMULATE_HARDWARE 0 +#define CONFIG_MISC_FIXES 0 +#endif /* VPX_CONFIG_H */ diff --git a/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon_asm.asm b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm similarity index 98% rename from thirdparty/libvpx/vpx_dsp/arm/intrapred_neon_asm.asm rename to thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm index 115790d4801..b2846c410bb 100644 --- a/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon_asm.asm +++ b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm @@ -1,3 +1,5 @@ +; This file was created from a .asm file +; using the ads2armasm_ms.pl script. ; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; @@ -20,11 +22,10 @@ EXPORT |vpx_tm_predictor_8x8_neon| EXPORT |vpx_tm_predictor_16x16_neon| EXPORT |vpx_tm_predictor_32x32_neon| - ARM - REQUIRE8 - PRESERVE8 + + - AREA ||.text||, CODE, READONLY, ALIGN=2 + AREA |.text|, CODE, READONLY, ALIGN=2 ;void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -42,6 +43,7 @@ vst1.32 {d0[0]}, [r0], r1 bx lr ENDP ; |vpx_v_predictor_4x4_neon| + ALIGN 4 ;void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -63,6 +65,7 @@ vst1.8 {d0}, [r0], r1 bx lr ENDP ; |vpx_v_predictor_8x8_neon| + ALIGN 4 ;void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -92,6 +95,7 @@ vst1.8 {q0}, [r0], r1 bx lr ENDP ; |vpx_v_predictor_16x16_neon| + ALIGN 4 ;void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -125,6 +129,7 @@ loop_v bgt loop_v bx lr ENDP ; |vpx_v_predictor_32x32_neon| + ALIGN 4 ;void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -146,6 +151,7 @@ loop_v vst1.32 {d0[0]}, [r0], r1 bx lr ENDP ; |vpx_h_predictor_4x4_neon| + ALIGN 4 ;void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -175,6 +181,7 @@ loop_v vst1.64 {d0}, [r0], r1 bx lr ENDP ; |vpx_h_predictor_8x8_neon| + ALIGN 4 ;void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -220,6 +227,7 @@ loop_v vst1.8 {q0}, [r0], r1 bx lr ENDP ; |vpx_h_predictor_16x16_neon| + ALIGN 4 ;void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -286,6 +294,7 @@ loop_h bgt loop_h bx lr ENDP ; |vpx_h_predictor_32x32_neon| + ALIGN 4 ;void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -332,6 +341,7 @@ loop_h vst1.32 {d1[0]}, [r0], r1 bx lr ENDP ; |vpx_tm_predictor_4x4_neon| + ALIGN 4 ;void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -404,6 +414,7 @@ loop_h bx lr ENDP ; |vpx_tm_predictor_8x8_neon| + ALIGN 4 ;void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -497,6 +508,7 @@ loop_16x16_neon bx lr ENDP ; |vpx_tm_predictor_16x16_neon| + ALIGN 4 ;void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, @@ -626,5 +638,6 @@ loop_32x32_neon bx lr ENDP ; |vpx_tm_predictor_32x32_neon| + ALIGN 4 END diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm similarity index 99% rename from thirdparty/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm rename to thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm index d5da7a8409c..9c3736faf84 100644 --- a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm +++ b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm @@ -1,3 +1,5 @@ +; This file was created from a .asm file +; using the ads2armasm_ms.pl script. ; ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. ; @@ -11,9 +13,8 @@ EXPORT |vpx_lpf_horizontal_edge_8_neon| EXPORT |vpx_lpf_horizontal_edge_16_neon| EXPORT |vpx_lpf_vertical_16_neon| - ARM - AREA ||.text||, CODE, READONLY, ALIGN=2 + AREA |.text|, CODE, READONLY, ALIGN=2 ; void mb_lpf_horizontal_edge(uint8_t *s, int p, ; const uint8_t *blimit, @@ -117,6 +118,7 @@ h_next pop {r4-r8, pc} ENDP ; |mb_lpf_horizontal_edge| + ALIGN 4 ; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, ; const uint8_t *blimit, @@ -131,6 +133,7 @@ h_next mov r12, #1 b mb_lpf_horizontal_edge ENDP ; |vpx_lpf_horizontal_edge_8_neon| + ALIGN 4 ; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, ; const uint8_t *blimit, @@ -145,6 +148,7 @@ h_next mov r12, #2 b mb_lpf_horizontal_edge ENDP ; |vpx_lpf_horizontal_edge_16_neon| + ALIGN 4 ; void vpx_lpf_vertical_16_neon(uint8_t *s, int p, ; const uint8_t *blimit, @@ -309,6 +313,7 @@ v_end pop {r4-r8, pc} ENDP ; |vpx_lpf_vertical_16_neon| + ALIGN 4 ; void vpx_wide_mbfilter_neon(); ; This is a helper function for the loopfilters. The invidual functions do the @@ -631,5 +636,6 @@ v_end bx lr ENDP ; |vpx_wide_mbfilter_neon| + ALIGN 4 END diff --git a/thirdparty/libvpx/vpx_dsp/arm/save_reg_neon.asm b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm similarity index 83% rename from thirdparty/libvpx/vpx_dsp/arm/save_reg_neon.asm rename to thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm index c9ca10801df..4cf9988e659 100644 --- a/thirdparty/libvpx/vpx_dsp/arm/save_reg_neon.asm +++ b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm @@ -1,3 +1,5 @@ +; This file was created from a .asm file +; using the ads2armasm_ms.pl script. ; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; @@ -12,11 +14,10 @@ EXPORT |vpx_push_neon| EXPORT |vpx_pop_neon| - ARM - REQUIRE8 - PRESERVE8 + + - AREA ||.text||, CODE, READONLY, ALIGN=2 + AREA |.text|, CODE, READONLY, ALIGN=2 |vpx_push_neon| PROC vst1.i64 {d8, d9, d10, d11}, [r0]! @@ -24,6 +25,7 @@ bx lr ENDP + ALIGN 4 |vpx_pop_neon| PROC vld1.i64 {d8, d9, d10, d11}, [r0]! @@ -31,6 +33,7 @@ bx lr ENDP + ALIGN 4 END diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s b/thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s new file mode 100644 index 00000000000..3932227fc5b --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s @@ -0,0 +1,658 @@ +@ This file was created from a .asm file +@ using the ads2gas.pl script. + .equ DO1STROUNDING, 0 +@ +@ Copyright (c) 2014 The WebM project authors. All Rights Reserved. +@ +@ Use of this source code is governed by a BSD-style license +@ that can be found in the LICENSE file in the root of the source +@ tree. An additional intellectual property rights grant can be found +@ in the file PATENTS. All contributing project authors may +@ be found in the AUTHORS file in the root of the source tree. +@ + + .global vpx_v_predictor_4x4_neon + .type vpx_v_predictor_4x4_neon, function + .global vpx_v_predictor_8x8_neon + .type vpx_v_predictor_8x8_neon, function + .global vpx_v_predictor_16x16_neon + .type vpx_v_predictor_16x16_neon, function + .global vpx_v_predictor_32x32_neon + .type vpx_v_predictor_32x32_neon, function + .global vpx_h_predictor_4x4_neon + .type vpx_h_predictor_4x4_neon, function + .global vpx_h_predictor_8x8_neon + .type vpx_h_predictor_8x8_neon, function + .global vpx_h_predictor_16x16_neon + .type vpx_h_predictor_16x16_neon, function + .global vpx_h_predictor_32x32_neon + .type vpx_h_predictor_32x32_neon, function + .global vpx_tm_predictor_4x4_neon + .type vpx_tm_predictor_4x4_neon, function + .global vpx_tm_predictor_8x8_neon + .type vpx_tm_predictor_8x8_neon, function + .global vpx_tm_predictor_16x16_neon + .type vpx_tm_predictor_16x16_neon, function + .global vpx_tm_predictor_32x32_neon + .type vpx_tm_predictor_32x32_neon, function + .arm + .eabi_attribute 24, 1 @Tag_ABI_align_needed + .eabi_attribute 25, 1 @Tag_ABI_align_preserved + +.text +.p2align 2 + +@void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_v_predictor_4x4_neon: + vpx_v_predictor_4x4_neon: @ PROC + vld1.32 {d0[0]}, [r2] + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d0[0]}, [r0], r1 + bx lr + .size vpx_v_predictor_4x4_neon, .-vpx_v_predictor_4x4_neon @ ENDP @ |vpx_v_predictor_4x4_neon| + +@void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_v_predictor_8x8_neon: + vpx_v_predictor_8x8_neon: @ PROC + vld1.8 {d0}, [r2] + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + bx lr + .size vpx_v_predictor_8x8_neon, .-vpx_v_predictor_8x8_neon @ ENDP @ |vpx_v_predictor_8x8_neon| + +@void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_v_predictor_16x16_neon: + vpx_v_predictor_16x16_neon: @ PROC + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + bx lr + .size vpx_v_predictor_16x16_neon, .-vpx_v_predictor_16x16_neon @ ENDP @ |vpx_v_predictor_16x16_neon| + +@void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_v_predictor_32x32_neon: + vpx_v_predictor_32x32_neon: @ PROC + vld1.8 {q0, q1}, [r2] + mov r2, #2 +loop_v: + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + subs r2, r2, #1 + bgt loop_v + bx lr + .size vpx_v_predictor_32x32_neon, .-vpx_v_predictor_32x32_neon @ ENDP @ |vpx_v_predictor_32x32_neon| + +@void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_h_predictor_4x4_neon: + vpx_h_predictor_4x4_neon: @ PROC + vld1.32 {d1[0]}, [r3] + vdup.8 d0, d1[0] + vst1.32 {d0[0]}, [r0], r1 + vdup.8 d0, d1[1] + vst1.32 {d0[0]}, [r0], r1 + vdup.8 d0, d1[2] + vst1.32 {d0[0]}, [r0], r1 + vdup.8 d0, d1[3] + vst1.32 {d0[0]}, [r0], r1 + bx lr + .size vpx_h_predictor_4x4_neon, .-vpx_h_predictor_4x4_neon @ ENDP @ |vpx_h_predictor_4x4_neon| + +@void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_h_predictor_8x8_neon: + vpx_h_predictor_8x8_neon: @ PROC + vld1.64 {d1}, [r3] + vdup.8 d0, d1[0] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[1] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[2] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[3] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[4] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[5] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[6] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[7] + vst1.64 {d0}, [r0], r1 + bx lr + .size vpx_h_predictor_8x8_neon, .-vpx_h_predictor_8x8_neon @ ENDP @ |vpx_h_predictor_8x8_neon| + +@void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_h_predictor_16x16_neon: + vpx_h_predictor_16x16_neon: @ PROC + vld1.8 {q1}, [r3] + vdup.8 q0, d2[0] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[1] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[2] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[3] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[4] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[5] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[6] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[7] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[0] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[1] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[2] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[3] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[4] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[5] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[6] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[7] + vst1.8 {q0}, [r0], r1 + bx lr + .size vpx_h_predictor_16x16_neon, .-vpx_h_predictor_16x16_neon @ ENDP @ |vpx_h_predictor_16x16_neon| + +@void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_h_predictor_32x32_neon: + vpx_h_predictor_32x32_neon: @ PROC + sub r1, r1, #16 + mov r2, #2 +loop_h: + vld1.8 {q1}, [r3]! + vdup.8 q0, d2[0] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[1] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[2] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[3] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[4] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[5] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[6] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[7] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[0] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[1] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[2] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[3] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[4] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[5] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[6] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[7] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + subs r2, r2, #1 + bgt loop_h + bx lr + .size vpx_h_predictor_32x32_neon, .-vpx_h_predictor_32x32_neon @ ENDP @ |vpx_h_predictor_32x32_neon| + +@void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_tm_predictor_4x4_neon: + vpx_tm_predictor_4x4_neon: @ PROC + @ Load ytop_left = above[-1]; + sub r12, r2, #1 + vld1.u8 {d0[]}, [r12] + + @ Load above 4 pixels + vld1.32 {d2[0]}, [r2] + + @ Compute above - ytop_left + vsubl.u8 q3, d2, d0 + + @ Load left row by row and compute left + (above - ytop_left) + @ 1st row and 2nd row + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3]! + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqmovun.s16 d0, q1 + vqmovun.s16 d1, q2 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + + @ 3rd row and 4th row + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3] + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqmovun.s16 d0, q1 + vqmovun.s16 d1, q2 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + bx lr + .size vpx_tm_predictor_4x4_neon, .-vpx_tm_predictor_4x4_neon @ ENDP @ |vpx_tm_predictor_4x4_neon| + +@void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_tm_predictor_8x8_neon: + vpx_tm_predictor_8x8_neon: @ PROC + @ Load ytop_left = above[-1]; + sub r12, r2, #1 + vld1.8 {d0[]}, [r12] + + @ preload 8 left + vld1.8 {d30}, [r3] + + @ Load above 8 pixels + vld1.64 {d2}, [r2] + + vmovl.u8 q10, d30 + + @ Compute above - ytop_left + vsubl.u8 q3, d2, d0 + + @ Load left row by row and compute left + (above - ytop_left) + @ 1st row and 2nd row + vdup.16 q0, d20[0] + vdup.16 q1, d20[1] + vadd.s16 q0, q3, q0 + vadd.s16 q1, q3, q1 + + @ 3rd row and 4th row + vdup.16 q8, d20[2] + vdup.16 q9, d20[3] + vadd.s16 q8, q3, q8 + vadd.s16 q9, q3, q9 + + vqmovun.s16 d0, q0 + vqmovun.s16 d1, q1 + vqmovun.s16 d2, q8 + vqmovun.s16 d3, q9 + + vst1.64 {d0}, [r0], r1 + vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 + + @ 5th row and 6th row + vdup.16 q0, d21[0] + vdup.16 q1, d21[1] + vadd.s16 q0, q3, q0 + vadd.s16 q1, q3, q1 + + @ 7th row and 8th row + vdup.16 q8, d21[2] + vdup.16 q9, d21[3] + vadd.s16 q8, q3, q8 + vadd.s16 q9, q3, q9 + + vqmovun.s16 d0, q0 + vqmovun.s16 d1, q1 + vqmovun.s16 d2, q8 + vqmovun.s16 d3, q9 + + vst1.64 {d0}, [r0], r1 + vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 + + bx lr + .size vpx_tm_predictor_8x8_neon, .-vpx_tm_predictor_8x8_neon @ ENDP @ |vpx_tm_predictor_8x8_neon| + +@void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_tm_predictor_16x16_neon: + vpx_tm_predictor_16x16_neon: @ PROC + @ Load ytop_left = above[-1]; + sub r12, r2, #1 + vld1.8 {d0[]}, [r12] + + @ Load above 8 pixels + vld1.8 {q1}, [r2] + + @ preload 8 left into r12 + vld1.8 {d18}, [r3]! + + @ Compute above - ytop_left + vsubl.u8 q2, d2, d0 + vsubl.u8 q3, d3, d0 + + vmovl.u8 q10, d18 + + @ Load left row by row and compute left + (above - ytop_left) + @ Process 8 rows in each single loop and loop 2 times to process 16 rows. + mov r2, #2 + +loop_16x16_neon: + @ Process two rows. + vdup.16 q0, d20[0] + vdup.16 q8, d20[1] + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vdup.16 q0, d20[2] @ proload next 2 rows data + vdup.16 q8, d20[3] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + @ Process two rows. + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vdup.16 q0, d21[0] @ proload next 2 rows data + vdup.16 q8, d21[1] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vdup.16 q0, d21[2] @ proload next 2 rows data + vdup.16 q8, d21[3] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vld1.8 {d18}, [r3]! @ preload 8 left into r12 + vmovl.u8 q10, d18 + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + subs r2, r2, #1 + bgt loop_16x16_neon + + bx lr + .size vpx_tm_predictor_16x16_neon, .-vpx_tm_predictor_16x16_neon @ ENDP @ |vpx_tm_predictor_16x16_neon| + +@void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, +@ const uint8_t *above, +@ const uint8_t *left) +@ r0 uint8_t *dst +@ r1 ptrdiff_t y_stride +@ r2 const uint8_t *above +@ r3 const uint8_t *left + +_vpx_tm_predictor_32x32_neon: + vpx_tm_predictor_32x32_neon: @ PROC + @ Load ytop_left = above[-1]; + sub r12, r2, #1 + vld1.8 {d0[]}, [r12] + + @ Load above 32 pixels + vld1.8 {q1}, [r2]! + vld1.8 {q2}, [r2] + + @ preload 8 left pixels + vld1.8 {d26}, [r3]! + + @ Compute above - ytop_left + vsubl.u8 q8, d2, d0 + vsubl.u8 q9, d3, d0 + vsubl.u8 q10, d4, d0 + vsubl.u8 q11, d5, d0 + + vmovl.u8 q3, d26 + + @ Load left row by row and compute left + (above - ytop_left) + @ Process 8 rows in each single loop and loop 4 times to process 32 rows. + mov r2, #4 + +loop_32x32_neon: + @ Process two rows. + vdup.16 q0, d6[0] + vdup.16 q2, d6[1] + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vdup.16 q1, d6[2] + vdup.16 q2, d6[3] + vst1.64 {d24-d27}, [r0], r1 + + @ Process two rows. + vadd.s16 q12, q1, q8 + vadd.s16 q13, q1, q9 + vadd.s16 q14, q1, q10 + vadd.s16 q15, q1, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vdup.16 q0, d7[0] + vdup.16 q2, d7[1] + vst1.64 {d24-d27}, [r0], r1 + + @ Process two rows. + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vdup.16 q0, d7[2] + vdup.16 q2, d7[3] + vst1.64 {d24-d27}, [r0], r1 + + @ Process two rows. + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vld1.8 {d0}, [r3]! @ preload 8 left pixels + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vmovl.u8 q3, d0 + vst1.64 {d24-d27}, [r0], r1 + + subs r2, r2, #1 + bgt loop_32x32_neon + + bx lr + .size vpx_tm_predictor_32x32_neon, .-vpx_tm_predictor_32x32_neon @ ENDP @ |vpx_tm_predictor_32x32_neon| + + .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s new file mode 100644 index 00000000000..f6b05406fb2 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s @@ -0,0 +1,647 @@ +@ This file was created from a .asm file +@ using the ads2gas.pl script. + .equ DO1STROUNDING, 0 +@ +@ Copyright (c) 2013 The WebM project authors. All Rights Reserved. +@ +@ Use of this source code is governed by a BSD-style license +@ that can be found in the LICENSE file in the root of the source +@ tree. An additional intellectual property rights grant can be found +@ in the file PATENTS. All contributing project authors may +@ be found in the AUTHORS file in the root of the source tree. +@ + + .global vpx_lpf_horizontal_edge_8_neon + .type vpx_lpf_horizontal_edge_8_neon, function + .global vpx_lpf_horizontal_edge_16_neon + .type vpx_lpf_horizontal_edge_16_neon, function + .global vpx_lpf_vertical_16_neon + .type vpx_lpf_vertical_16_neon, function + .arm + +.text +.p2align 2 + +@ void mb_lpf_horizontal_edge(uint8_t *s, int p, +@ const uint8_t *blimit, +@ const uint8_t *limit, +@ const uint8_t *thresh, +@ int count) +@ r0 uint8_t *s, +@ r1 int p, /* pitch */ +@ r2 const uint8_t *blimit, +@ r3 const uint8_t *limit, +@ sp const uint8_t *thresh, +@ r12 int count +_mb_lpf_horizontal_edge: + mb_lpf_horizontal_edge: @ PROC + push {r4-r8, lr} + vpush {d8-d15} + ldr r4, [sp, #88] @ load thresh + +h_count: + vld1.8 {d16[]}, [r2] @ load *blimit + vld1.8 {d17[]}, [r3] @ load *limit + vld1.8 {d18[]}, [r4] @ load *thresh + + sub r8, r0, r1, lsl #3 @ move src pointer down by 8 lines + + vld1.u8 {d0}, [r8,:64], r1 @ p7 + vld1.u8 {d1}, [r8,:64], r1 @ p6 + vld1.u8 {d2}, [r8,:64], r1 @ p5 + vld1.u8 {d3}, [r8,:64], r1 @ p4 + vld1.u8 {d4}, [r8,:64], r1 @ p3 + vld1.u8 {d5}, [r8,:64], r1 @ p2 + vld1.u8 {d6}, [r8,:64], r1 @ p1 + vld1.u8 {d7}, [r8,:64], r1 @ p0 + vld1.u8 {d8}, [r8,:64], r1 @ q0 + vld1.u8 {d9}, [r8,:64], r1 @ q1 + vld1.u8 {d10}, [r8,:64], r1 @ q2 + vld1.u8 {d11}, [r8,:64], r1 @ q3 + vld1.u8 {d12}, [r8,:64], r1 @ q4 + vld1.u8 {d13}, [r8,:64], r1 @ q5 + vld1.u8 {d14}, [r8,:64], r1 @ q6 + vld1.u8 {d15}, [r8,:64], r1 @ q7 + + bl vpx_wide_mbfilter_neon + + tst r7, #1 + beq h_mbfilter + + @ flat && mask were not set for any of the channels. Just store the values + @ from filter. + sub r8, r0, r1, lsl #1 + + vst1.u8 {d25}, [r8,:64], r1 @ store op1 + vst1.u8 {d24}, [r8,:64], r1 @ store op0 + vst1.u8 {d23}, [r8,:64], r1 @ store oq0 + vst1.u8 {d26}, [r8,:64], r1 @ store oq1 + + b h_next + +h_mbfilter: + tst r7, #2 + beq h_wide_mbfilter + + @ flat2 was not set for any of the channels. Just store the values from + @ mbfilter. + sub r8, r0, r1, lsl #1 + sub r8, r8, r1 + + vst1.u8 {d18}, [r8,:64], r1 @ store op2 + vst1.u8 {d19}, [r8,:64], r1 @ store op1 + vst1.u8 {d20}, [r8,:64], r1 @ store op0 + vst1.u8 {d21}, [r8,:64], r1 @ store oq0 + vst1.u8 {d22}, [r8,:64], r1 @ store oq1 + vst1.u8 {d23}, [r8,:64], r1 @ store oq2 + + b h_next + +h_wide_mbfilter: + sub r8, r0, r1, lsl #3 + add r8, r8, r1 + + vst1.u8 {d16}, [r8,:64], r1 @ store op6 + vst1.u8 {d24}, [r8,:64], r1 @ store op5 + vst1.u8 {d25}, [r8,:64], r1 @ store op4 + vst1.u8 {d26}, [r8,:64], r1 @ store op3 + vst1.u8 {d27}, [r8,:64], r1 @ store op2 + vst1.u8 {d18}, [r8,:64], r1 @ store op1 + vst1.u8 {d19}, [r8,:64], r1 @ store op0 + vst1.u8 {d20}, [r8,:64], r1 @ store oq0 + vst1.u8 {d21}, [r8,:64], r1 @ store oq1 + vst1.u8 {d22}, [r8,:64], r1 @ store oq2 + vst1.u8 {d23}, [r8,:64], r1 @ store oq3 + vst1.u8 {d1}, [r8,:64], r1 @ store oq4 + vst1.u8 {d2}, [r8,:64], r1 @ store oq5 + vst1.u8 {d3}, [r8,:64], r1 @ store oq6 + +h_next: + add r0, r0, #8 + subs r12, r12, #1 + bne h_count + + vpop {d8-d15} + pop {r4-r8, pc} + + .size mb_lpf_horizontal_edge, .-mb_lpf_horizontal_edge @ ENDP @ |mb_lpf_horizontal_edge| + +@ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, +@ const uint8_t *blimit, +@ const uint8_t *limit, +@ const uint8_t *thresh) +@ r0 uint8_t *s, +@ r1 int pitch, +@ r2 const uint8_t *blimit, +@ r3 const uint8_t *limit, +@ sp const uint8_t *thresh +_vpx_lpf_horizontal_edge_8_neon: + vpx_lpf_horizontal_edge_8_neon: @ PROC + mov r12, #1 + b mb_lpf_horizontal_edge + .size vpx_lpf_horizontal_edge_8_neon, .-vpx_lpf_horizontal_edge_8_neon @ ENDP @ |vpx_lpf_horizontal_edge_8_neon| + +@ void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, +@ const uint8_t *blimit, +@ const uint8_t *limit, +@ const uint8_t *thresh) +@ r0 uint8_t *s, +@ r1 int pitch, +@ r2 const uint8_t *blimit, +@ r3 const uint8_t *limit, +@ sp const uint8_t *thresh +_vpx_lpf_horizontal_edge_16_neon: + vpx_lpf_horizontal_edge_16_neon: @ PROC + mov r12, #2 + b mb_lpf_horizontal_edge + .size vpx_lpf_horizontal_edge_16_neon, .-vpx_lpf_horizontal_edge_16_neon @ ENDP @ |vpx_lpf_horizontal_edge_16_neon| + +@ void vpx_lpf_vertical_16_neon(uint8_t *s, int p, +@ const uint8_t *blimit, +@ const uint8_t *limit, +@ const uint8_t *thresh) +@ r0 uint8_t *s, +@ r1 int p, /* pitch */ +@ r2 const uint8_t *blimit, +@ r3 const uint8_t *limit, +@ sp const uint8_t *thresh, +_vpx_lpf_vertical_16_neon: + vpx_lpf_vertical_16_neon: @ PROC + push {r4-r8, lr} + vpush {d8-d15} + ldr r4, [sp, #88] @ load thresh + + vld1.8 {d16[]}, [r2] @ load *blimit + vld1.8 {d17[]}, [r3] @ load *limit + vld1.8 {d18[]}, [r4] @ load *thresh + + sub r8, r0, #8 + + vld1.8 {d0}, [r8,:64], r1 + vld1.8 {d8}, [r0,:64], r1 + vld1.8 {d1}, [r8,:64], r1 + vld1.8 {d9}, [r0,:64], r1 + vld1.8 {d2}, [r8,:64], r1 + vld1.8 {d10}, [r0,:64], r1 + vld1.8 {d3}, [r8,:64], r1 + vld1.8 {d11}, [r0,:64], r1 + vld1.8 {d4}, [r8,:64], r1 + vld1.8 {d12}, [r0,:64], r1 + vld1.8 {d5}, [r8,:64], r1 + vld1.8 {d13}, [r0,:64], r1 + vld1.8 {d6}, [r8,:64], r1 + vld1.8 {d14}, [r0,:64], r1 + vld1.8 {d7}, [r8,:64], r1 + vld1.8 {d15}, [r0,:64], r1 + + sub r0, r0, r1, lsl #3 + + vtrn.32 q0, q2 + vtrn.32 q1, q3 + vtrn.32 q4, q6 + vtrn.32 q5, q7 + + vtrn.16 q0, q1 + vtrn.16 q2, q3 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + + vtrn.8 d0, d1 + vtrn.8 d2, d3 + vtrn.8 d4, d5 + vtrn.8 d6, d7 + + vtrn.8 d8, d9 + vtrn.8 d10, d11 + vtrn.8 d12, d13 + vtrn.8 d14, d15 + + bl vpx_wide_mbfilter_neon + + tst r7, #1 + beq v_mbfilter + + @ flat && mask were not set for any of the channels. Just store the values + @ from filter. + sub r8, r0, #2 + + vswp d23, d25 + + vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 + vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 + vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 + vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 + vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 + vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 + vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 + vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 + + b v_end + +v_mbfilter: + tst r7, #2 + beq v_wide_mbfilter + + @ flat2 was not set for any of the channels. Just store the values from + @ mbfilter. + sub r8, r0, #3 + + vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 + vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 + vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 + vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 + vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 + vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 + vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 + vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 + vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 + vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 + vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 + vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 + vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 + vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 + vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 + vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 + + b v_end + +v_wide_mbfilter: + sub r8, r0, #8 + + vtrn.32 d0, d26 + vtrn.32 d16, d27 + vtrn.32 d24, d18 + vtrn.32 d25, d19 + + vtrn.16 d0, d24 + vtrn.16 d16, d25 + vtrn.16 d26, d18 + vtrn.16 d27, d19 + + vtrn.8 d0, d16 + vtrn.8 d24, d25 + vtrn.8 d26, d27 + vtrn.8 d18, d19 + + vtrn.32 d20, d1 + vtrn.32 d21, d2 + vtrn.32 d22, d3 + vtrn.32 d23, d15 + + vtrn.16 d20, d22 + vtrn.16 d21, d23 + vtrn.16 d1, d3 + vtrn.16 d2, d15 + + vtrn.8 d20, d21 + vtrn.8 d22, d23 + vtrn.8 d1, d2 + vtrn.8 d3, d15 + + vst1.8 {d0}, [r8,:64], r1 + vst1.8 {d20}, [r0,:64], r1 + vst1.8 {d16}, [r8,:64], r1 + vst1.8 {d21}, [r0,:64], r1 + vst1.8 {d24}, [r8,:64], r1 + vst1.8 {d22}, [r0,:64], r1 + vst1.8 {d25}, [r8,:64], r1 + vst1.8 {d23}, [r0,:64], r1 + vst1.8 {d26}, [r8,:64], r1 + vst1.8 {d1}, [r0,:64], r1 + vst1.8 {d27}, [r8,:64], r1 + vst1.8 {d2}, [r0,:64], r1 + vst1.8 {d18}, [r8,:64], r1 + vst1.8 {d3}, [r0,:64], r1 + vst1.8 {d19}, [r8,:64], r1 + vst1.8 {d15}, [r0,:64], r1 + +v_end: + vpop {d8-d15} + pop {r4-r8, pc} + + .size vpx_lpf_vertical_16_neon, .-vpx_lpf_vertical_16_neon @ ENDP @ |vpx_lpf_vertical_16_neon| + +@ void vpx_wide_mbfilter_neon(); +@ This is a helper function for the loopfilters. The invidual functions do the +@ necessary load, transpose (if necessary) and store. +@ +@ r0-r3 PRESERVE +@ d16 blimit +@ d17 limit +@ d18 thresh +@ d0 p7 +@ d1 p6 +@ d2 p5 +@ d3 p4 +@ d4 p3 +@ d5 p2 +@ d6 p1 +@ d7 p0 +@ d8 q0 +@ d9 q1 +@ d10 q2 +@ d11 q3 +@ d12 q4 +@ d13 q5 +@ d14 q6 +@ d15 q7 +_vpx_wide_mbfilter_neon: + vpx_wide_mbfilter_neon: @ PROC + mov r7, #0 + + @ filter_mask + vabd.u8 d19, d4, d5 @ abs(p3 - p2) + vabd.u8 d20, d5, d6 @ abs(p2 - p1) + vabd.u8 d21, d6, d7 @ abs(p1 - p0) + vabd.u8 d22, d9, d8 @ abs(q1 - q0) + vabd.u8 d23, d10, d9 @ abs(q2 - q1) + vabd.u8 d24, d11, d10 @ abs(q3 - q2) + + @ only compare the largest value to limit + vmax.u8 d19, d19, d20 @ max(abs(p3 - p2), abs(p2 - p1)) + vmax.u8 d20, d21, d22 @ max(abs(p1 - p0), abs(q1 - q0)) + vmax.u8 d23, d23, d24 @ max(abs(q2 - q1), abs(q3 - q2)) + vmax.u8 d19, d19, d20 + + vabd.u8 d24, d7, d8 @ abs(p0 - q0) + + vmax.u8 d19, d19, d23 + + vabd.u8 d23, d6, d9 @ a = abs(p1 - q1) + vqadd.u8 d24, d24, d24 @ b = abs(p0 - q0) * 2 + + @ abs () > limit + vcge.u8 d19, d17, d19 + + @ flatmask4 + vabd.u8 d25, d7, d5 @ abs(p0 - p2) + vabd.u8 d26, d8, d10 @ abs(q0 - q2) + vabd.u8 d27, d4, d7 @ abs(p3 - p0) + vabd.u8 d28, d11, d8 @ abs(q3 - q0) + + @ only compare the largest value to thresh + vmax.u8 d25, d25, d26 @ max(abs(p0 - p2), abs(q0 - q2)) + vmax.u8 d26, d27, d28 @ max(abs(p3 - p0), abs(q3 - q0)) + vmax.u8 d25, d25, d26 + vmax.u8 d20, d20, d25 + + vshr.u8 d23, d23, #1 @ a = a / 2 + vqadd.u8 d24, d24, d23 @ a = b + a + + vmov.u8 d30, #1 + vcge.u8 d24, d16, d24 @ (a > blimit * 2 + limit) * -1 + + vcge.u8 d20, d30, d20 @ flat + + vand d19, d19, d24 @ mask + + @ hevmask + vcgt.u8 d21, d21, d18 @ (abs(p1 - p0) > thresh)*-1 + vcgt.u8 d22, d22, d18 @ (abs(q1 - q0) > thresh)*-1 + vorr d21, d21, d22 @ hev + + vand d16, d20, d19 @ flat && mask + vmov r5, r6, d16 + + @ flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) + vabd.u8 d22, d3, d7 @ abs(p4 - p0) + vabd.u8 d23, d12, d8 @ abs(q4 - q0) + vabd.u8 d24, d7, d2 @ abs(p0 - p5) + vabd.u8 d25, d8, d13 @ abs(q0 - q5) + vabd.u8 d26, d1, d7 @ abs(p6 - p0) + vabd.u8 d27, d14, d8 @ abs(q6 - q0) + vabd.u8 d28, d0, d7 @ abs(p7 - p0) + vabd.u8 d29, d15, d8 @ abs(q7 - q0) + + @ only compare the largest value to thresh + vmax.u8 d22, d22, d23 @ max(abs(p4 - p0), abs(q4 - q0)) + vmax.u8 d23, d24, d25 @ max(abs(p0 - p5), abs(q0 - q5)) + vmax.u8 d24, d26, d27 @ max(abs(p6 - p0), abs(q6 - q0)) + vmax.u8 d25, d28, d29 @ max(abs(p7 - p0), abs(q7 - q0)) + + vmax.u8 d26, d22, d23 + vmax.u8 d27, d24, d25 + vmax.u8 d23, d26, d27 + + vcge.u8 d18, d30, d23 @ flat2 + + vmov.u8 d22, #0x80 + + orrs r5, r5, r6 @ Check for 0 + orreq r7, r7, #1 @ Only do filter branch + + vand d17, d18, d16 @ flat2 && flat && mask + vmov r5, r6, d17 + + @ mbfilter() function + + @ filter() function + @ convert to signed + veor d23, d8, d22 @ qs0 + veor d24, d7, d22 @ ps0 + veor d25, d6, d22 @ ps1 + veor d26, d9, d22 @ qs1 + + vmov.u8 d27, #3 + + vsub.s8 d28, d23, d24 @ ( qs0 - ps0) + vqsub.s8 d29, d25, d26 @ filter = clamp(ps1-qs1) + vmull.s8 q15, d28, d27 @ 3 * ( qs0 - ps0) + vand d29, d29, d21 @ filter &= hev + vaddw.s8 q15, q15, d29 @ filter + 3 * (qs0 - ps0) + vmov.u8 d29, #4 + + @ filter = clamp(filter + 3 * ( qs0 - ps0)) + vqmovn.s16 d28, q15 + + vand d28, d28, d19 @ filter &= mask + + vqadd.s8 d30, d28, d27 @ filter2 = clamp(filter+3) + vqadd.s8 d29, d28, d29 @ filter1 = clamp(filter+4) + vshr.s8 d30, d30, #3 @ filter2 >>= 3 + vshr.s8 d29, d29, #3 @ filter1 >>= 3 + + + vqadd.s8 d24, d24, d30 @ op0 = clamp(ps0 + filter2) + vqsub.s8 d23, d23, d29 @ oq0 = clamp(qs0 - filter1) + + @ outer tap adjustments: ++filter1 >> 1 + vrshr.s8 d29, d29, #1 + vbic d29, d29, d21 @ filter &= ~hev + + vqadd.s8 d25, d25, d29 @ op1 = clamp(ps1 + filter) + vqsub.s8 d26, d26, d29 @ oq1 = clamp(qs1 - filter) + + veor d24, d24, d22 @ *f_op0 = u^0x80 + veor d23, d23, d22 @ *f_oq0 = u^0x80 + veor d25, d25, d22 @ *f_op1 = u^0x80 + veor d26, d26, d22 @ *f_oq1 = u^0x80 + + tst r7, #1 + bxne lr + + orrs r5, r5, r6 @ Check for 0 + orreq r7, r7, #2 @ Only do mbfilter branch + + @ mbfilter flat && mask branch + @ TODO(fgalligan): Can I decrease the cycles shifting to consective d's + @ and using vibt on the q's? + vmov.u8 d29, #2 + vaddl.u8 q15, d7, d8 @ op2 = p0 + q0 + vmlal.u8 q15, d4, d27 @ op2 = p0 + q0 + p3 * 3 + vmlal.u8 q15, d5, d29 @ op2 = p0 + q0 + p3 * 3 + p2 * 2 + vaddl.u8 q10, d4, d5 + vaddw.u8 q15, d6 @ op2=p1 + p0 + q0 + p3 * 3 + p2 *2 + vaddl.u8 q14, d6, d9 + vqrshrn.u16 d18, q15, #3 @ r_op2 + + vsub.i16 q15, q10 + vaddl.u8 q10, d4, d6 + vadd.i16 q15, q14 + vaddl.u8 q14, d7, d10 + vqrshrn.u16 d19, q15, #3 @ r_op1 + + vsub.i16 q15, q10 + vadd.i16 q15, q14 + vaddl.u8 q14, d8, d11 + vqrshrn.u16 d20, q15, #3 @ r_op0 + + vsubw.u8 q15, d4 @ oq0 = op0 - p3 + vsubw.u8 q15, d7 @ oq0 -= p0 + vadd.i16 q15, q14 + vaddl.u8 q14, d9, d11 + vqrshrn.u16 d21, q15, #3 @ r_oq0 + + vsubw.u8 q15, d5 @ oq1 = oq0 - p2 + vsubw.u8 q15, d8 @ oq1 -= q0 + vadd.i16 q15, q14 + vaddl.u8 q14, d10, d11 + vqrshrn.u16 d22, q15, #3 @ r_oq1 + + vsubw.u8 q15, d6 @ oq2 = oq0 - p1 + vsubw.u8 q15, d9 @ oq2 -= q1 + vadd.i16 q15, q14 + vqrshrn.u16 d27, q15, #3 @ r_oq2 + + @ Filter does not set op2 or oq2, so use p2 and q2. + vbif d18, d5, d16 @ t_op2 |= p2 & ~(flat & mask) + vbif d19, d25, d16 @ t_op1 |= f_op1 & ~(flat & mask) + vbif d20, d24, d16 @ t_op0 |= f_op0 & ~(flat & mask) + vbif d21, d23, d16 @ t_oq0 |= f_oq0 & ~(flat & mask) + vbif d22, d26, d16 @ t_oq1 |= f_oq1 & ~(flat & mask) + + vbit d23, d27, d16 @ t_oq2 |= r_oq2 & (flat & mask) + vbif d23, d10, d16 @ t_oq2 |= q2 & ~(flat & mask) + + tst r7, #2 + bxne lr + + @ wide_mbfilter flat2 && flat && mask branch + vmov.u8 d16, #7 + vaddl.u8 q15, d7, d8 @ op6 = p0 + q0 + vaddl.u8 q12, d2, d3 + vaddl.u8 q13, d4, d5 + vaddl.u8 q14, d1, d6 + vmlal.u8 q15, d0, d16 @ op6 += p7 * 3 + vadd.i16 q12, q13 + vadd.i16 q15, q14 + vaddl.u8 q14, d2, d9 + vadd.i16 q15, q12 + vaddl.u8 q12, d0, d1 + vaddw.u8 q15, d1 + vaddl.u8 q13, d0, d2 + vadd.i16 q14, q15, q14 + vqrshrn.u16 d16, q15, #4 @ w_op6 + + vsub.i16 q15, q14, q12 + vaddl.u8 q14, d3, d10 + vqrshrn.u16 d24, q15, #4 @ w_op5 + + vsub.i16 q15, q13 + vaddl.u8 q13, d0, d3 + vadd.i16 q15, q14 + vaddl.u8 q14, d4, d11 + vqrshrn.u16 d25, q15, #4 @ w_op4 + + vadd.i16 q15, q14 + vaddl.u8 q14, d0, d4 + vsub.i16 q15, q13 + vsub.i16 q14, q15, q14 + vqrshrn.u16 d26, q15, #4 @ w_op3 + + vaddw.u8 q15, q14, d5 @ op2 += p2 + vaddl.u8 q14, d0, d5 + vaddw.u8 q15, d12 @ op2 += q4 + vbif d26, d4, d17 @ op3 |= p3 & ~(f2 & f & m) + vqrshrn.u16 d27, q15, #4 @ w_op2 + + vsub.i16 q15, q14 + vaddl.u8 q14, d0, d6 + vaddw.u8 q15, d6 @ op1 += p1 + vaddw.u8 q15, d13 @ op1 += q5 + vbif d27, d18, d17 @ op2 |= t_op2 & ~(f2 & f & m) + vqrshrn.u16 d18, q15, #4 @ w_op1 + + vsub.i16 q15, q14 + vaddl.u8 q14, d0, d7 + vaddw.u8 q15, d7 @ op0 += p0 + vaddw.u8 q15, d14 @ op0 += q6 + vbif d18, d19, d17 @ op1 |= t_op1 & ~(f2 & f & m) + vqrshrn.u16 d19, q15, #4 @ w_op0 + + vsub.i16 q15, q14 + vaddl.u8 q14, d1, d8 + vaddw.u8 q15, d8 @ oq0 += q0 + vaddw.u8 q15, d15 @ oq0 += q7 + vbif d19, d20, d17 @ op0 |= t_op0 & ~(f2 & f & m) + vqrshrn.u16 d20, q15, #4 @ w_oq0 + + vsub.i16 q15, q14 + vaddl.u8 q14, d2, d9 + vaddw.u8 q15, d9 @ oq1 += q1 + vaddl.u8 q4, d10, d15 + vaddw.u8 q15, d15 @ oq1 += q7 + vbif d20, d21, d17 @ oq0 |= t_oq0 & ~(f2 & f & m) + vqrshrn.u16 d21, q15, #4 @ w_oq1 + + vsub.i16 q15, q14 + vaddl.u8 q14, d3, d10 + vadd.i16 q15, q4 + vaddl.u8 q4, d11, d15 + vbif d21, d22, d17 @ oq1 |= t_oq1 & ~(f2 & f & m) + vqrshrn.u16 d22, q15, #4 @ w_oq2 + + vsub.i16 q15, q14 + vaddl.u8 q14, d4, d11 + vadd.i16 q15, q4 + vaddl.u8 q4, d12, d15 + vbif d22, d23, d17 @ oq2 |= t_oq2 & ~(f2 & f & m) + vqrshrn.u16 d23, q15, #4 @ w_oq3 + + vsub.i16 q15, q14 + vaddl.u8 q14, d5, d12 + vadd.i16 q15, q4 + vaddl.u8 q4, d13, d15 + vbif d16, d1, d17 @ op6 |= p6 & ~(f2 & f & m) + vqrshrn.u16 d1, q15, #4 @ w_oq4 + + vsub.i16 q15, q14 + vaddl.u8 q14, d6, d13 + vadd.i16 q15, q4 + vaddl.u8 q4, d14, d15 + vbif d24, d2, d17 @ op5 |= p5 & ~(f2 & f & m) + vqrshrn.u16 d2, q15, #4 @ w_oq5 + + vsub.i16 q15, q14 + vbif d25, d3, d17 @ op4 |= p4 & ~(f2 & f & m) + vadd.i16 q15, q4 + vbif d23, d11, d17 @ oq3 |= q3 & ~(f2 & f & m) + vqrshrn.u16 d3, q15, #4 @ w_oq6 + vbif d1, d12, d17 @ oq4 |= q4 & ~(f2 & f & m) + vbif d2, d13, d17 @ oq5 |= q5 & ~(f2 & f & m) + vbif d3, d14, d17 @ oq6 |= q6 & ~(f2 & f & m) + + bx lr + .size vpx_wide_mbfilter_neon, .-vpx_wide_mbfilter_neon @ ENDP @ |vpx_wide_mbfilter_neon| + + .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s new file mode 100644 index 00000000000..e8852fa0d0d --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s @@ -0,0 +1,44 @@ +@ This file was created from a .asm file +@ using the ads2gas.pl script. + .equ DO1STROUNDING, 0 +@ +@ Copyright (c) 2010 The WebM project authors. All Rights Reserved. +@ +@ Use of this source code is governed by a BSD-style license +@ that can be found in the LICENSE file in the root of the source +@ tree. An additional intellectual property rights grant can be found +@ in the file PATENTS. All contributing project authors may +@ be found in the AUTHORS file in the root of the source tree. +@ + + + .global vpx_push_neon + .type vpx_push_neon, function + .global vpx_pop_neon + .type vpx_pop_neon, function + + .arm + .eabi_attribute 24, 1 @Tag_ABI_align_needed + .eabi_attribute 25, 1 @Tag_ABI_align_preserved + +.text +.p2align 2 + +_vpx_push_neon: + vpx_push_neon: @ PROC + vst1.i64 {d8, d9, d10, d11}, [r0]! + vst1.i64 {d12, d13, d14, d15}, [r0]! + bx lr + + .size vpx_push_neon, .-vpx_push_neon @ ENDP + +_vpx_pop_neon: + vpx_pop_neon: @ PROC + vld1.i64 {d8, d9, d10, d11}, [r0]! + vld1.i64 {d12, d13, d14, d15}, [r0]! + bx lr + + .size vpx_pop_neon, .-vpx_pop_neon @ ENDP + + + .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s new file mode 100644 index 00000000000..1c527afcff6 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s @@ -0,0 +1,660 @@ +@ This file was created from a .asm file +@ using the ads2gas_apple.pl script. + + .set WIDE_REFERENCE, 0 + .set ARCHITECTURE, 5 + .set DO1STROUNDING, 0 + @ + @ Copyright (c) 2014 The WebM project authors. All Rights Reserved. + @ + @ Use of this source code is governed by a BSD-style license + @ that can be found in the LICENSE file in the root of the source + @ tree. An additional intellectual property rights grant can be found + @ in the file PATENTS. All contributing project authors may + @ be found in the AUTHORS file in the root of the source tree. + @ + + .globl _vpx_v_predictor_4x4_neon + .globl vpx_v_predictor_4x4_neon + .globl _vpx_v_predictor_8x8_neon + .globl vpx_v_predictor_8x8_neon + .globl _vpx_v_predictor_16x16_neon + .globl vpx_v_predictor_16x16_neon + .globl _vpx_v_predictor_32x32_neon + .globl vpx_v_predictor_32x32_neon + .globl _vpx_h_predictor_4x4_neon + .globl vpx_h_predictor_4x4_neon + .globl _vpx_h_predictor_8x8_neon + .globl vpx_h_predictor_8x8_neon + .globl _vpx_h_predictor_16x16_neon + .globl vpx_h_predictor_16x16_neon + .globl _vpx_h_predictor_32x32_neon + .globl vpx_h_predictor_32x32_neon + .globl _vpx_tm_predictor_4x4_neon + .globl vpx_tm_predictor_4x4_neon + .globl _vpx_tm_predictor_8x8_neon + .globl vpx_tm_predictor_8x8_neon + .globl _vpx_tm_predictor_16x16_neon + .globl vpx_tm_predictor_16x16_neon + .globl _vpx_tm_predictor_32x32_neon + .globl vpx_tm_predictor_32x32_neon + @ ARM + @ + @ PRESERVE8 + +.text +.p2align 2 + + @void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_v_predictor_4x4_neon: + vpx_v_predictor_4x4_neon: @ + vld1.32 {d0[0]}, [r2] + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d0[0]}, [r0], r1 + bx lr + @ @ |vpx_v_predictor_4x4_neon| + + @void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_v_predictor_8x8_neon: + vpx_v_predictor_8x8_neon: @ + vld1.8 {d0}, [r2] + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + vst1.8 {d0}, [r0], r1 + bx lr + @ @ |vpx_v_predictor_8x8_neon| + + @void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_v_predictor_16x16_neon: + vpx_v_predictor_16x16_neon: @ + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + vst1.8 {q0}, [r0], r1 + bx lr + @ @ |vpx_v_predictor_16x16_neon| + + @void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_v_predictor_32x32_neon: + vpx_v_predictor_32x32_neon: @ + vld1.8 {q0, q1}, [r2] + mov r2, #2 +loop_v: + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + vst1.8 {q0, q1}, [r0], r1 + subs r2, r2, #1 + bgt loop_v + bx lr + @ @ |vpx_v_predictor_32x32_neon| + + @void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_h_predictor_4x4_neon: + vpx_h_predictor_4x4_neon: @ + vld1.32 {d1[0]}, [r3] + vdup.8 d0, d1[0] + vst1.32 {d0[0]}, [r0], r1 + vdup.8 d0, d1[1] + vst1.32 {d0[0]}, [r0], r1 + vdup.8 d0, d1[2] + vst1.32 {d0[0]}, [r0], r1 + vdup.8 d0, d1[3] + vst1.32 {d0[0]}, [r0], r1 + bx lr + @ @ |vpx_h_predictor_4x4_neon| + + @void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_h_predictor_8x8_neon: + vpx_h_predictor_8x8_neon: @ + vld1.64 {d1}, [r3] + vdup.8 d0, d1[0] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[1] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[2] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[3] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[4] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[5] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[6] + vst1.64 {d0}, [r0], r1 + vdup.8 d0, d1[7] + vst1.64 {d0}, [r0], r1 + bx lr + @ @ |vpx_h_predictor_8x8_neon| + + @void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_h_predictor_16x16_neon: + vpx_h_predictor_16x16_neon: @ + vld1.8 {q1}, [r3] + vdup.8 q0, d2[0] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[1] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[2] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[3] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[4] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[5] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[6] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[7] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[0] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[1] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[2] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[3] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[4] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[5] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[6] + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[7] + vst1.8 {q0}, [r0], r1 + bx lr + @ @ |vpx_h_predictor_16x16_neon| + + @void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_h_predictor_32x32_neon: + vpx_h_predictor_32x32_neon: @ + sub r1, r1, #16 + mov r2, #2 +loop_h: + vld1.8 {q1}, [r3]! + vdup.8 q0, d2[0] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[1] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[2] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[3] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[4] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[5] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[6] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d2[7] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[0] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[1] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[2] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[3] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[4] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[5] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[6] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + vdup.8 q0, d3[7] + vst1.8 {q0}, [r0]! + vst1.8 {q0}, [r0], r1 + subs r2, r2, #1 + bgt loop_h + bx lr + @ @ |vpx_h_predictor_32x32_neon| + + @void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_tm_predictor_4x4_neon: + vpx_tm_predictor_4x4_neon: @ + @ Load ytop_left = above[-1] @ + sub r12, r2, #1 + vld1.u8 {d0[]}, [r12] + + @ Load above 4 pixels + vld1.32 {d2[0]}, [r2] + + @ Compute above - ytop_left + vsubl.u8 q3, d2, d0 + + @ Load left row by row and compute left + (above - ytop_left) + @ 1st row and 2nd row + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3]! + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqmovun.s16 d0, q1 + vqmovun.s16 d1, q2 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + + @ 3rd row and 4th row + vld1.u8 {d2[]}, [r3]! + vld1.u8 {d4[]}, [r3] + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqmovun.s16 d0, q1 + vqmovun.s16 d1, q2 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + bx lr + @ @ |vpx_tm_predictor_4x4_neon| + + @void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_tm_predictor_8x8_neon: + vpx_tm_predictor_8x8_neon: @ + @ Load ytop_left = above[-1] @ + sub r12, r2, #1 + vld1.8 {d0[]}, [r12] + + @ preload 8 left + vld1.8 {d30}, [r3] + + @ Load above 8 pixels + vld1.64 {d2}, [r2] + + vmovl.u8 q10, d30 + + @ Compute above - ytop_left + vsubl.u8 q3, d2, d0 + + @ Load left row by row and compute left + (above - ytop_left) + @ 1st row and 2nd row + vdup.16 q0, d20[0] + vdup.16 q1, d20[1] + vadd.s16 q0, q3, q0 + vadd.s16 q1, q3, q1 + + @ 3rd row and 4th row + vdup.16 q8, d20[2] + vdup.16 q9, d20[3] + vadd.s16 q8, q3, q8 + vadd.s16 q9, q3, q9 + + vqmovun.s16 d0, q0 + vqmovun.s16 d1, q1 + vqmovun.s16 d2, q8 + vqmovun.s16 d3, q9 + + vst1.64 {d0}, [r0], r1 + vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 + + @ 5th row and 6th row + vdup.16 q0, d21[0] + vdup.16 q1, d21[1] + vadd.s16 q0, q3, q0 + vadd.s16 q1, q3, q1 + + @ 7th row and 8th row + vdup.16 q8, d21[2] + vdup.16 q9, d21[3] + vadd.s16 q8, q3, q8 + vadd.s16 q9, q3, q9 + + vqmovun.s16 d0, q0 + vqmovun.s16 d1, q1 + vqmovun.s16 d2, q8 + vqmovun.s16 d3, q9 + + vst1.64 {d0}, [r0], r1 + vst1.64 {d1}, [r0], r1 + vst1.64 {d2}, [r0], r1 + vst1.64 {d3}, [r0], r1 + + bx lr + @ @ |vpx_tm_predictor_8x8_neon| + + @void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_tm_predictor_16x16_neon: + vpx_tm_predictor_16x16_neon: @ + @ Load ytop_left = above[-1] @ + sub r12, r2, #1 + vld1.8 {d0[]}, [r12] + + @ Load above 8 pixels + vld1.8 {q1}, [r2] + + @ preload 8 left into r12 + vld1.8 {d18}, [r3]! + + @ Compute above - ytop_left + vsubl.u8 q2, d2, d0 + vsubl.u8 q3, d3, d0 + + vmovl.u8 q10, d18 + + @ Load left row by row and compute left + (above - ytop_left) + @ Process 8 rows in each single loop and loop 2 times to process 16 rows. + mov r2, #2 + +loop_16x16_neon: + @ Process two rows. + vdup.16 q0, d20[0] + vdup.16 q8, d20[1] + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vdup.16 q0, d20[2] @ proload next 2 rows data + vdup.16 q8, d20[3] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + @ Process two rows. + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vdup.16 q0, d21[0] @ proload next 2 rows data + vdup.16 q8, d21[1] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vdup.16 q0, d21[2] @ proload next 2 rows data + vdup.16 q8, d21[3] + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + + vadd.s16 q1, q0, q2 + vadd.s16 q0, q0, q3 + vadd.s16 q11, q8, q2 + vadd.s16 q8, q8, q3 + vqmovun.s16 d2, q1 + vqmovun.s16 d3, q0 + vqmovun.s16 d22, q11 + vqmovun.s16 d23, q8 + vld1.8 {d18}, [r3]! @ preload 8 left into r12 + vmovl.u8 q10, d18 + vst1.64 {d2,d3}, [r0], r1 + vst1.64 {d22,d23}, [r0], r1 + + subs r2, r2, #1 + bgt loop_16x16_neon + + bx lr + @ @ |vpx_tm_predictor_16x16_neon| + + @void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, + @ const uint8_t *above, + @ const uint8_t *left) + @ r0 uint8_t *dst + @ r1 ptrdiff_t y_stride + @ r2 const uint8_t *above + @ r3 const uint8_t *left + +_vpx_tm_predictor_32x32_neon: + vpx_tm_predictor_32x32_neon: @ + @ Load ytop_left = above[-1] @ + sub r12, r2, #1 + vld1.8 {d0[]}, [r12] + + @ Load above 32 pixels + vld1.8 {q1}, [r2]! + vld1.8 {q2}, [r2] + + @ preload 8 left pixels + vld1.8 {d26}, [r3]! + + @ Compute above - ytop_left + vsubl.u8 q8, d2, d0 + vsubl.u8 q9, d3, d0 + vsubl.u8 q10, d4, d0 + vsubl.u8 q11, d5, d0 + + vmovl.u8 q3, d26 + + @ Load left row by row and compute left + (above - ytop_left) + @ Process 8 rows in each single loop and loop 4 times to process 32 rows. + mov r2, #4 + +loop_32x32_neon: + @ Process two rows. + vdup.16 q0, d6[0] + vdup.16 q2, d6[1] + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vdup.16 q1, d6[2] + vdup.16 q2, d6[3] + vst1.64 {d24-d27}, [r0], r1 + + @ Process two rows. + vadd.s16 q12, q1, q8 + vadd.s16 q13, q1, q9 + vadd.s16 q14, q1, q10 + vadd.s16 q15, q1, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vdup.16 q0, d7[0] + vdup.16 q2, d7[1] + vst1.64 {d24-d27}, [r0], r1 + + @ Process two rows. + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vdup.16 q0, d7[2] + vdup.16 q2, d7[3] + vst1.64 {d24-d27}, [r0], r1 + + @ Process two rows. + vadd.s16 q12, q0, q8 + vadd.s16 q13, q0, q9 + vadd.s16 q14, q0, q10 + vadd.s16 q15, q0, q11 + vqmovun.s16 d0, q12 + vqmovun.s16 d1, q13 + vadd.s16 q12, q2, q8 + vadd.s16 q13, q2, q9 + vqmovun.s16 d2, q14 + vqmovun.s16 d3, q15 + vadd.s16 q14, q2, q10 + vadd.s16 q15, q2, q11 + vst1.64 {d0-d3}, [r0], r1 + vqmovun.s16 d24, q12 + vqmovun.s16 d25, q13 + vld1.8 {d0}, [r3]! @ preload 8 left pixels + vqmovun.s16 d26, q14 + vqmovun.s16 d27, q15 + vmovl.u8 q3, d0 + vst1.64 {d24-d27}, [r0], r1 + + subs r2, r2, #1 + bgt loop_32x32_neon + + bx lr + @ @ |vpx_tm_predictor_32x32_neon| + diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s new file mode 100644 index 00000000000..69f7e5207ed --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s @@ -0,0 +1,649 @@ +@ This file was created from a .asm file +@ using the ads2gas_apple.pl script. + + .set WIDE_REFERENCE, 0 + .set ARCHITECTURE, 5 + .set DO1STROUNDING, 0 + @ + @ Copyright (c) 2013 The WebM project authors. All Rights Reserved. + @ + @ Use of this source code is governed by a BSD-style license + @ that can be found in the LICENSE file in the root of the source + @ tree. An additional intellectual property rights grant can be found + @ in the file PATENTS. All contributing project authors may + @ be found in the AUTHORS file in the root of the source tree. + @ + + .globl _vpx_lpf_horizontal_edge_8_neon + .globl vpx_lpf_horizontal_edge_8_neon + .globl _vpx_lpf_horizontal_edge_16_neon + .globl vpx_lpf_horizontal_edge_16_neon + .globl _vpx_lpf_vertical_16_neon + .globl vpx_lpf_vertical_16_neon + @ ARM + +.text +.p2align 2 + + @ void mb_lpf_horizontal_edge(uint8_t *s, int p, + @ const uint8_t *blimit, + @ const uint8_t *limit, + @ const uint8_t *thresh, + @ int count) + @ r0 uint8_t *s, + @ r1 int p, /* pitch */ + @ r2 const uint8_t *blimit, + @ r3 const uint8_t *limit, + @ sp const uint8_t *thresh, + @ r12 int count +_mb_lpf_horizontal_edge: + mb_lpf_horizontal_edge: @ + push {r4-r8, lr} + vpush {d8-d15} + ldr r4, [sp, #88] @ load thresh + +h_count: + vld1.8 {d16[]}, [r2] @ load *blimit + vld1.8 {d17[]}, [r3] @ load *limit + vld1.8 {d18[]}, [r4] @ load *thresh + + sub r8, r0, r1, lsl #3 @ move src pointer down by 8 lines + + vld1.u8 {d0}, [r8,:64], r1 @ p7 + vld1.u8 {d1}, [r8,:64], r1 @ p6 + vld1.u8 {d2}, [r8,:64], r1 @ p5 + vld1.u8 {d3}, [r8,:64], r1 @ p4 + vld1.u8 {d4}, [r8,:64], r1 @ p3 + vld1.u8 {d5}, [r8,:64], r1 @ p2 + vld1.u8 {d6}, [r8,:64], r1 @ p1 + vld1.u8 {d7}, [r8,:64], r1 @ p0 + vld1.u8 {d8}, [r8,:64], r1 @ q0 + vld1.u8 {d9}, [r8,:64], r1 @ q1 + vld1.u8 {d10}, [r8,:64], r1 @ q2 + vld1.u8 {d11}, [r8,:64], r1 @ q3 + vld1.u8 {d12}, [r8,:64], r1 @ q4 + vld1.u8 {d13}, [r8,:64], r1 @ q5 + vld1.u8 {d14}, [r8,:64], r1 @ q6 + vld1.u8 {d15}, [r8,:64], r1 @ q7 + + bl vpx_wide_mbfilter_neon + + tst r7, #1 + beq h_mbfilter + + @ flat && mask were not set for any of the channels. Just store the values + @ from filter. + sub r8, r0, r1, lsl #1 + + vst1.u8 {d25}, [r8,:64], r1 @ store op1 + vst1.u8 {d24}, [r8,:64], r1 @ store op0 + vst1.u8 {d23}, [r8,:64], r1 @ store oq0 + vst1.u8 {d26}, [r8,:64], r1 @ store oq1 + + b h_next + +h_mbfilter: + tst r7, #2 + beq h_wide_mbfilter + + @ flat2 was not set for any of the channels. Just store the values from + @ mbfilter. + sub r8, r0, r1, lsl #1 + sub r8, r8, r1 + + vst1.u8 {d18}, [r8,:64], r1 @ store op2 + vst1.u8 {d19}, [r8,:64], r1 @ store op1 + vst1.u8 {d20}, [r8,:64], r1 @ store op0 + vst1.u8 {d21}, [r8,:64], r1 @ store oq0 + vst1.u8 {d22}, [r8,:64], r1 @ store oq1 + vst1.u8 {d23}, [r8,:64], r1 @ store oq2 + + b h_next + +h_wide_mbfilter: + sub r8, r0, r1, lsl #3 + add r8, r8, r1 + + vst1.u8 {d16}, [r8,:64], r1 @ store op6 + vst1.u8 {d24}, [r8,:64], r1 @ store op5 + vst1.u8 {d25}, [r8,:64], r1 @ store op4 + vst1.u8 {d26}, [r8,:64], r1 @ store op3 + vst1.u8 {d27}, [r8,:64], r1 @ store op2 + vst1.u8 {d18}, [r8,:64], r1 @ store op1 + vst1.u8 {d19}, [r8,:64], r1 @ store op0 + vst1.u8 {d20}, [r8,:64], r1 @ store oq0 + vst1.u8 {d21}, [r8,:64], r1 @ store oq1 + vst1.u8 {d22}, [r8,:64], r1 @ store oq2 + vst1.u8 {d23}, [r8,:64], r1 @ store oq3 + vst1.u8 {d1}, [r8,:64], r1 @ store oq4 + vst1.u8 {d2}, [r8,:64], r1 @ store oq5 + vst1.u8 {d3}, [r8,:64], r1 @ store oq6 + +h_next: + add r0, r0, #8 + subs r12, r12, #1 + bne h_count + + vpop {d8-d15} + pop {r4-r8, pc} + + @ @ |mb_lpf_horizontal_edge| + + @ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, + @ const uint8_t *blimit, + @ const uint8_t *limit, + @ const uint8_t *thresh) + @ r0 uint8_t *s, + @ r1 int pitch, + @ r2 const uint8_t *blimit, + @ r3 const uint8_t *limit, + @ sp const uint8_t *thresh +_vpx_lpf_horizontal_edge_8_neon: + vpx_lpf_horizontal_edge_8_neon: @ + mov r12, #1 + b mb_lpf_horizontal_edge + @ @ |vpx_lpf_horizontal_edge_8_neon| + + @ void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, + @ const uint8_t *blimit, + @ const uint8_t *limit, + @ const uint8_t *thresh) + @ r0 uint8_t *s, + @ r1 int pitch, + @ r2 const uint8_t *blimit, + @ r3 const uint8_t *limit, + @ sp const uint8_t *thresh +_vpx_lpf_horizontal_edge_16_neon: + vpx_lpf_horizontal_edge_16_neon: @ + mov r12, #2 + b mb_lpf_horizontal_edge + @ @ |vpx_lpf_horizontal_edge_16_neon| + + @ void vpx_lpf_vertical_16_neon(uint8_t *s, int p, + @ const uint8_t *blimit, + @ const uint8_t *limit, + @ const uint8_t *thresh) + @ r0 uint8_t *s, + @ r1 int p, /* pitch */ + @ r2 const uint8_t *blimit, + @ r3 const uint8_t *limit, + @ sp const uint8_t *thresh, +_vpx_lpf_vertical_16_neon: + vpx_lpf_vertical_16_neon: @ + push {r4-r8, lr} + vpush {d8-d15} + ldr r4, [sp, #88] @ load thresh + + vld1.8 {d16[]}, [r2] @ load *blimit + vld1.8 {d17[]}, [r3] @ load *limit + vld1.8 {d18[]}, [r4] @ load *thresh + + sub r8, r0, #8 + + vld1.8 {d0}, [r8,:64], r1 + vld1.8 {d8}, [r0,:64], r1 + vld1.8 {d1}, [r8,:64], r1 + vld1.8 {d9}, [r0,:64], r1 + vld1.8 {d2}, [r8,:64], r1 + vld1.8 {d10}, [r0,:64], r1 + vld1.8 {d3}, [r8,:64], r1 + vld1.8 {d11}, [r0,:64], r1 + vld1.8 {d4}, [r8,:64], r1 + vld1.8 {d12}, [r0,:64], r1 + vld1.8 {d5}, [r8,:64], r1 + vld1.8 {d13}, [r0,:64], r1 + vld1.8 {d6}, [r8,:64], r1 + vld1.8 {d14}, [r0,:64], r1 + vld1.8 {d7}, [r8,:64], r1 + vld1.8 {d15}, [r0,:64], r1 + + sub r0, r0, r1, lsl #3 + + vtrn.32 q0, q2 + vtrn.32 q1, q3 + vtrn.32 q4, q6 + vtrn.32 q5, q7 + + vtrn.16 q0, q1 + vtrn.16 q2, q3 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + + vtrn.8 d0, d1 + vtrn.8 d2, d3 + vtrn.8 d4, d5 + vtrn.8 d6, d7 + + vtrn.8 d8, d9 + vtrn.8 d10, d11 + vtrn.8 d12, d13 + vtrn.8 d14, d15 + + bl vpx_wide_mbfilter_neon + + tst r7, #1 + beq v_mbfilter + + @ flat && mask were not set for any of the channels. Just store the values + @ from filter. + sub r8, r0, #2 + + vswp d23, d25 + + vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 + vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 + vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 + vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 + vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 + vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 + vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 + vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 + + b v_end + +v_mbfilter: + tst r7, #2 + beq v_wide_mbfilter + + @ flat2 was not set for any of the channels. Just store the values from + @ mbfilter. + sub r8, r0, #3 + + vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 + vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 + vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 + vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 + vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 + vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 + vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 + vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 + vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 + vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 + vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 + vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 + vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 + vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 + vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 + vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 + + b v_end + +v_wide_mbfilter: + sub r8, r0, #8 + + vtrn.32 d0, d26 + vtrn.32 d16, d27 + vtrn.32 d24, d18 + vtrn.32 d25, d19 + + vtrn.16 d0, d24 + vtrn.16 d16, d25 + vtrn.16 d26, d18 + vtrn.16 d27, d19 + + vtrn.8 d0, d16 + vtrn.8 d24, d25 + vtrn.8 d26, d27 + vtrn.8 d18, d19 + + vtrn.32 d20, d1 + vtrn.32 d21, d2 + vtrn.32 d22, d3 + vtrn.32 d23, d15 + + vtrn.16 d20, d22 + vtrn.16 d21, d23 + vtrn.16 d1, d3 + vtrn.16 d2, d15 + + vtrn.8 d20, d21 + vtrn.8 d22, d23 + vtrn.8 d1, d2 + vtrn.8 d3, d15 + + vst1.8 {d0}, [r8,:64], r1 + vst1.8 {d20}, [r0,:64], r1 + vst1.8 {d16}, [r8,:64], r1 + vst1.8 {d21}, [r0,:64], r1 + vst1.8 {d24}, [r8,:64], r1 + vst1.8 {d22}, [r0,:64], r1 + vst1.8 {d25}, [r8,:64], r1 + vst1.8 {d23}, [r0,:64], r1 + vst1.8 {d26}, [r8,:64], r1 + vst1.8 {d1}, [r0,:64], r1 + vst1.8 {d27}, [r8,:64], r1 + vst1.8 {d2}, [r0,:64], r1 + vst1.8 {d18}, [r8,:64], r1 + vst1.8 {d3}, [r0,:64], r1 + vst1.8 {d19}, [r8,:64], r1 + vst1.8 {d15}, [r0,:64], r1 + +v_end: + vpop {d8-d15} + pop {r4-r8, pc} + + @ @ |vpx_lpf_vertical_16_neon| + + @ void vpx_wide_mbfilter_neon() @ + @ This is a helper function for the loopfilters. The invidual functions do the + @ necessary load, transpose (if necessary) and store. + @ + @ r0-r3 PRESERVE + @ d16 blimit + @ d17 limit + @ d18 thresh + @ d0 p7 + @ d1 p6 + @ d2 p5 + @ d3 p4 + @ d4 p3 + @ d5 p2 + @ d6 p1 + @ d7 p0 + @ d8 q0 + @ d9 q1 + @ d10 q2 + @ d11 q3 + @ d12 q4 + @ d13 q5 + @ d14 q6 + @ d15 q7 +_vpx_wide_mbfilter_neon: + vpx_wide_mbfilter_neon: @ + mov r7, #0 + + @ filter_mask + vabd.u8 d19, d4, d5 @ abs(p3 - p2) + vabd.u8 d20, d5, d6 @ abs(p2 - p1) + vabd.u8 d21, d6, d7 @ abs(p1 - p0) + vabd.u8 d22, d9, d8 @ abs(q1 - q0) + vabd.u8 d23, d10, d9 @ abs(q2 - q1) + vabd.u8 d24, d11, d10 @ abs(q3 - q2) + + @ only compare the largest value to limit + vmax.u8 d19, d19, d20 @ max(abs(p3 - p2), abs(p2 - p1)) + vmax.u8 d20, d21, d22 @ max(abs(p1 - p0), abs(q1 - q0)) + vmax.u8 d23, d23, d24 @ max(abs(q2 - q1), abs(q3 - q2)) + vmax.u8 d19, d19, d20 + + vabd.u8 d24, d7, d8 @ abs(p0 - q0) + + vmax.u8 d19, d19, d23 + + vabd.u8 d23, d6, d9 @ a = abs(p1 - q1) + vqadd.u8 d24, d24, d24 @ b = abs(p0 - q0) * 2 + + @ abs () > limit + vcge.u8 d19, d17, d19 + + @ flatmask4 + vabd.u8 d25, d7, d5 @ abs(p0 - p2) + vabd.u8 d26, d8, d10 @ abs(q0 - q2) + vabd.u8 d27, d4, d7 @ abs(p3 - p0) + vabd.u8 d28, d11, d8 @ abs(q3 - q0) + + @ only compare the largest value to thresh + vmax.u8 d25, d25, d26 @ max(abs(p0 - p2), abs(q0 - q2)) + vmax.u8 d26, d27, d28 @ max(abs(p3 - p0), abs(q3 - q0)) + vmax.u8 d25, d25, d26 + vmax.u8 d20, d20, d25 + + vshr.u8 d23, d23, #1 @ a = a / 2 + vqadd.u8 d24, d24, d23 @ a = b + a + + vmov.u8 d30, #1 + vcge.u8 d24, d16, d24 @ (a > blimit * 2 + limit) * -1 + + vcge.u8 d20, d30, d20 @ flat + + vand d19, d19, d24 @ mask + + @ hevmask + vcgt.u8 d21, d21, d18 @ (abs(p1 - p0) > thresh)*-1 + vcgt.u8 d22, d22, d18 @ (abs(q1 - q0) > thresh)*-1 + vorr d21, d21, d22 @ hev + + vand d16, d20, d19 @ flat && mask + vmov r5, r6, d16 + + @ flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) + vabd.u8 d22, d3, d7 @ abs(p4 - p0) + vabd.u8 d23, d12, d8 @ abs(q4 - q0) + vabd.u8 d24, d7, d2 @ abs(p0 - p5) + vabd.u8 d25, d8, d13 @ abs(q0 - q5) + vabd.u8 d26, d1, d7 @ abs(p6 - p0) + vabd.u8 d27, d14, d8 @ abs(q6 - q0) + vabd.u8 d28, d0, d7 @ abs(p7 - p0) + vabd.u8 d29, d15, d8 @ abs(q7 - q0) + + @ only compare the largest value to thresh + vmax.u8 d22, d22, d23 @ max(abs(p4 - p0), abs(q4 - q0)) + vmax.u8 d23, d24, d25 @ max(abs(p0 - p5), abs(q0 - q5)) + vmax.u8 d24, d26, d27 @ max(abs(p6 - p0), abs(q6 - q0)) + vmax.u8 d25, d28, d29 @ max(abs(p7 - p0), abs(q7 - q0)) + + vmax.u8 d26, d22, d23 + vmax.u8 d27, d24, d25 + vmax.u8 d23, d26, d27 + + vcge.u8 d18, d30, d23 @ flat2 + + vmov.u8 d22, #0x80 + + orrs r5, r5, r6 @ Check for 0 + orreq r7, r7, #1 @ Only do filter branch + + vand d17, d18, d16 @ flat2 && flat && mask + vmov r5, r6, d17 + + @ mbfilter() function + + @ filter() function + @ convert to signed + veor d23, d8, d22 @ qs0 + veor d24, d7, d22 @ ps0 + veor d25, d6, d22 @ ps1 + veor d26, d9, d22 @ qs1 + + vmov.u8 d27, #3 + + vsub.s8 d28, d23, d24 @ ( qs0 - ps0) + vqsub.s8 d29, d25, d26 @ filter = clamp(ps1-qs1) + vmull.s8 q15, d28, d27 @ 3 * ( qs0 - ps0) + vand d29, d29, d21 @ filter &= hev + vaddw.s8 q15, q15, d29 @ filter + 3 * (qs0 - ps0) + vmov.u8 d29, #4 + + @ filter = clamp(filter + 3 * ( qs0 - ps0)) + vqmovn.s16 d28, q15 + + vand d28, d28, d19 @ filter &= mask + + vqadd.s8 d30, d28, d27 @ filter2 = clamp(filter+3) + vqadd.s8 d29, d28, d29 @ filter1 = clamp(filter+4) + vshr.s8 d30, d30, #3 @ filter2 >>= 3 + vshr.s8 d29, d29, #3 @ filter1 >>= 3 + + + vqadd.s8 d24, d24, d30 @ op0 = clamp(ps0 + filter2) + vqsub.s8 d23, d23, d29 @ oq0 = clamp(qs0 - filter1) + + @ outer tap adjustments: ++filter1 >> 1 + vrshr.s8 d29, d29, #1 + vbic d29, d29, d21 @ filter &= ~hev + + vqadd.s8 d25, d25, d29 @ op1 = clamp(ps1 + filter) + vqsub.s8 d26, d26, d29 @ oq1 = clamp(qs1 - filter) + + veor d24, d24, d22 @ *f_op0 = u^0x80 + veor d23, d23, d22 @ *f_oq0 = u^0x80 + veor d25, d25, d22 @ *f_op1 = u^0x80 + veor d26, d26, d22 @ *f_oq1 = u^0x80 + + tst r7, #1 + bxne lr + + orrs r5, r5, r6 @ Check for 0 + orreq r7, r7, #2 @ Only do mbfilter branch + + @ mbfilter flat && mask branch + @ TODO(fgalligan): Can I decrease the cycles shifting to consective d's + @ and using vibt on the q's? + vmov.u8 d29, #2 + vaddl.u8 q15, d7, d8 @ op2 = p0 + q0 + vmlal.u8 q15, d4, d27 @ op2 = p0 + q0 + p3 * 3 + vmlal.u8 q15, d5, d29 @ op2 = p0 + q0 + p3 * 3 + p2 * 2 + vaddl.u8 q10, d4, d5 + vaddw.u8 q15, d6 @ op2=p1 + p0 + q0 + p3 * 3 + p2 *2 + vaddl.u8 q14, d6, d9 + vqrshrn.u16 d18, q15, #3 @ r_op2 + + vsub.i16 q15, q10 + vaddl.u8 q10, d4, d6 + vadd.i16 q15, q14 + vaddl.u8 q14, d7, d10 + vqrshrn.u16 d19, q15, #3 @ r_op1 + + vsub.i16 q15, q10 + vadd.i16 q15, q14 + vaddl.u8 q14, d8, d11 + vqrshrn.u16 d20, q15, #3 @ r_op0 + + vsubw.u8 q15, d4 @ oq0 = op0 - p3 + vsubw.u8 q15, d7 @ oq0 -= p0 + vadd.i16 q15, q14 + vaddl.u8 q14, d9, d11 + vqrshrn.u16 d21, q15, #3 @ r_oq0 + + vsubw.u8 q15, d5 @ oq1 = oq0 - p2 + vsubw.u8 q15, d8 @ oq1 -= q0 + vadd.i16 q15, q14 + vaddl.u8 q14, d10, d11 + vqrshrn.u16 d22, q15, #3 @ r_oq1 + + vsubw.u8 q15, d6 @ oq2 = oq0 - p1 + vsubw.u8 q15, d9 @ oq2 -= q1 + vadd.i16 q15, q14 + vqrshrn.u16 d27, q15, #3 @ r_oq2 + + @ Filter does not set op2 or oq2, so use p2 and q2. + vbif d18, d5, d16 @ t_op2 |= p2 & ~(flat & mask) + vbif d19, d25, d16 @ t_op1 |= f_op1 & ~(flat & mask) + vbif d20, d24, d16 @ t_op0 |= f_op0 & ~(flat & mask) + vbif d21, d23, d16 @ t_oq0 |= f_oq0 & ~(flat & mask) + vbif d22, d26, d16 @ t_oq1 |= f_oq1 & ~(flat & mask) + + vbit d23, d27, d16 @ t_oq2 |= r_oq2 & (flat & mask) + vbif d23, d10, d16 @ t_oq2 |= q2 & ~(flat & mask) + + tst r7, #2 + bxne lr + + @ wide_mbfilter flat2 && flat && mask branch + vmov.u8 d16, #7 + vaddl.u8 q15, d7, d8 @ op6 = p0 + q0 + vaddl.u8 q12, d2, d3 + vaddl.u8 q13, d4, d5 + vaddl.u8 q14, d1, d6 + vmlal.u8 q15, d0, d16 @ op6 += p7 * 3 + vadd.i16 q12, q13 + vadd.i16 q15, q14 + vaddl.u8 q14, d2, d9 + vadd.i16 q15, q12 + vaddl.u8 q12, d0, d1 + vaddw.u8 q15, d1 + vaddl.u8 q13, d0, d2 + vadd.i16 q14, q15, q14 + vqrshrn.u16 d16, q15, #4 @ w_op6 + + vsub.i16 q15, q14, q12 + vaddl.u8 q14, d3, d10 + vqrshrn.u16 d24, q15, #4 @ w_op5 + + vsub.i16 q15, q13 + vaddl.u8 q13, d0, d3 + vadd.i16 q15, q14 + vaddl.u8 q14, d4, d11 + vqrshrn.u16 d25, q15, #4 @ w_op4 + + vadd.i16 q15, q14 + vaddl.u8 q14, d0, d4 + vsub.i16 q15, q13 + vsub.i16 q14, q15, q14 + vqrshrn.u16 d26, q15, #4 @ w_op3 + + vaddw.u8 q15, q14, d5 @ op2 += p2 + vaddl.u8 q14, d0, d5 + vaddw.u8 q15, d12 @ op2 += q4 + vbif d26, d4, d17 @ op3 |= p3 & ~(f2 & f & m) + vqrshrn.u16 d27, q15, #4 @ w_op2 + + vsub.i16 q15, q14 + vaddl.u8 q14, d0, d6 + vaddw.u8 q15, d6 @ op1 += p1 + vaddw.u8 q15, d13 @ op1 += q5 + vbif d27, d18, d17 @ op2 |= t_op2 & ~(f2 & f & m) + vqrshrn.u16 d18, q15, #4 @ w_op1 + + vsub.i16 q15, q14 + vaddl.u8 q14, d0, d7 + vaddw.u8 q15, d7 @ op0 += p0 + vaddw.u8 q15, d14 @ op0 += q6 + vbif d18, d19, d17 @ op1 |= t_op1 & ~(f2 & f & m) + vqrshrn.u16 d19, q15, #4 @ w_op0 + + vsub.i16 q15, q14 + vaddl.u8 q14, d1, d8 + vaddw.u8 q15, d8 @ oq0 += q0 + vaddw.u8 q15, d15 @ oq0 += q7 + vbif d19, d20, d17 @ op0 |= t_op0 & ~(f2 & f & m) + vqrshrn.u16 d20, q15, #4 @ w_oq0 + + vsub.i16 q15, q14 + vaddl.u8 q14, d2, d9 + vaddw.u8 q15, d9 @ oq1 += q1 + vaddl.u8 q4, d10, d15 + vaddw.u8 q15, d15 @ oq1 += q7 + vbif d20, d21, d17 @ oq0 |= t_oq0 & ~(f2 & f & m) + vqrshrn.u16 d21, q15, #4 @ w_oq1 + + vsub.i16 q15, q14 + vaddl.u8 q14, d3, d10 + vadd.i16 q15, q4 + vaddl.u8 q4, d11, d15 + vbif d21, d22, d17 @ oq1 |= t_oq1 & ~(f2 & f & m) + vqrshrn.u16 d22, q15, #4 @ w_oq2 + + vsub.i16 q15, q14 + vaddl.u8 q14, d4, d11 + vadd.i16 q15, q4 + vaddl.u8 q4, d12, d15 + vbif d22, d23, d17 @ oq2 |= t_oq2 & ~(f2 & f & m) + vqrshrn.u16 d23, q15, #4 @ w_oq3 + + vsub.i16 q15, q14 + vaddl.u8 q14, d5, d12 + vadd.i16 q15, q4 + vaddl.u8 q4, d13, d15 + vbif d16, d1, d17 @ op6 |= p6 & ~(f2 & f & m) + vqrshrn.u16 d1, q15, #4 @ w_oq4 + + vsub.i16 q15, q14 + vaddl.u8 q14, d6, d13 + vadd.i16 q15, q4 + vaddl.u8 q4, d14, d15 + vbif d24, d2, d17 @ op5 |= p5 & ~(f2 & f & m) + vqrshrn.u16 d2, q15, #4 @ w_oq5 + + vsub.i16 q15, q14 + vbif d25, d3, d17 @ op4 |= p4 & ~(f2 & f & m) + vadd.i16 q15, q4 + vbif d23, d11, d17 @ oq3 |= q3 & ~(f2 & f & m) + vqrshrn.u16 d3, q15, #4 @ w_oq6 + vbif d1, d12, d17 @ oq4 |= q4 & ~(f2 & f & m) + vbif d2, d13, d17 @ oq5 |= q5 & ~(f2 & f & m) + vbif d3, d14, d17 @ oq6 |= q6 & ~(f2 & f & m) + + bx lr + @ @ |vpx_wide_mbfilter_neon| + diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s new file mode 100644 index 00000000000..f322b698b44 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s @@ -0,0 +1,46 @@ +@ This file was created from a .asm file +@ using the ads2gas_apple.pl script. + + .set WIDE_REFERENCE, 0 + .set ARCHITECTURE, 5 + .set DO1STROUNDING, 0 + @ + @ Copyright (c) 2010 The WebM project authors. All Rights Reserved. + @ + @ Use of this source code is governed by a BSD-style license + @ that can be found in the LICENSE file in the root of the source + @ tree. An additional intellectual property rights grant can be found + @ in the file PATENTS. All contributing project authors may + @ be found in the AUTHORS file in the root of the source tree. + @ + + + .globl _vpx_push_neon + .globl vpx_push_neon + .globl _vpx_pop_neon + .globl vpx_pop_neon + + @ ARM + @ + @ PRESERVE8 + +.text +.p2align 2 + +_vpx_push_neon: + vpx_push_neon: @ + vst1.i64 {d8, d9, d10, d11}, [r0]! + vst1.i64 {d12, d13, d14, d15}, [r0]! + bx lr + + @ + +_vpx_pop_neon: + vpx_pop_neon: @ + vld1.i64 {d8, d9, d10, d11}, [r0]! + vld1.i64 {d12, d13, d14, d15}, [r0]! + bx lr + + @ + + diff --git a/thirdparty/libvpx/vpx_dsp_rtcd.h b/thirdparty/libvpx/vpx_dsp_rtcd.h new file mode 100644 index 00000000000..4d5ad895337 --- /dev/null +++ b/thirdparty/libvpx/vpx_dsp_rtcd.h @@ -0,0 +1,9 @@ +#include "vpx_config.h" + +#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) + #include "rtcd/vpx_dsp_rtcd_x86.h" +#elif defined(WEBM_ARMASM) && ARCH_ARM + #include "rtcd/vpx_dsp_rtcd_arm.h" +#else + #include "rtcd/vpx_dsp_rtcd_c.h" +#endif diff --git a/thirdparty/libvpx/vpx_ports/arm_cpudetect.c b/thirdparty/libvpx/vpx_ports/arm_cpudetect.c index 8a4b8af964f..7eb74a7dc9f 100644 --- a/thirdparty/libvpx/vpx_ports/arm_cpudetect.c +++ b/thirdparty/libvpx/vpx_ports/arm_cpudetect.c @@ -38,26 +38,7 @@ static int arm_cpu_env_mask(void) { } #if !CONFIG_RUNTIME_CPU_DETECT - -int arm_cpu_caps(void) { - /* This function should actually be a no-op. There is no way to adjust any of - * these because the RTCD tables do not exist: the functions are called - * statically */ - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) { - return flags; - } - mask = arm_cpu_env_mask(); -#if HAVE_MEDIA - flags |= HAS_MEDIA; -#endif /* HAVE_MEDIA */ -#if HAVE_NEON || HAVE_NEON_ASM - flags |= HAS_NEON; -#endif /* HAVE_NEON || HAVE_NEON_ASM */ - return flags & mask; -} - + #error "CONFIG_RUNTIME_CPU_DETECT should be enabled!" #elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ #define WIN32_LEAN_AND_MEAN @@ -76,28 +57,28 @@ int arm_cpu_caps(void) { * All of these instructions should be essentially nops. */ #if HAVE_MEDIA - if (mask & HAS_MEDIA) + if (mask & HAS_MEDIA) { __try { /*SHADD8 r3,r3,r3*/ __emit(0xE6333F93); flags |= HAS_MEDIA; } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { /*Ignore exception.*/ + } } -} #endif /* HAVE_MEDIA */ #if HAVE_NEON || HAVE_NEON_ASM -if (mask &HAS_NEON) { - __try { - /*VORR q0,q0,q0*/ - __emit(0xF2200150); - flags |= HAS_NEON; - } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { - /*Ignore exception.*/ + if (mask &HAS_NEON) { + __try { + /*VORR q0,q0,q0*/ + __emit(0xF2200150); + flags |= HAS_NEON; + } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { + /*Ignore exception.*/ + } } -} #endif /* HAVE_NEON || HAVE_NEON_ASM */ -return flags & mask; + return flags & mask; } #elif defined(__ANDROID__) /* end _MSC_VER */ @@ -170,6 +151,20 @@ int arm_cpu_caps(void) { return flags & mask; } #else /* end __linux__ */ -#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \ -"available for your platform. Reconfigure with --disable-runtime-cpu-detect." +int arm_cpu_caps(void) { + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); +#if HAVE_MEDIA + flags |= HAS_MEDIA; +#endif /* HAVE_MEDIA */ +#if HAVE_NEON || HAVE_NEON_ASM + flags |= HAS_NEON; +#endif /* HAVE_NEON || HAVE_NEON_ASM */ + return flags & mask; +} +#warning "ARM run-time CPU detection is disabled for this platform..." #endif diff --git a/thirdparty/libvpx/vpx_ports/system_state.h b/thirdparty/libvpx/vpx_ports/system_state.h index 086c64681f5..01989dcafc1 100644 --- a/thirdparty/libvpx/vpx_ports/system_state.h +++ b/thirdparty/libvpx/vpx_ports/system_state.h @@ -13,10 +13,10 @@ #include "./vpx_config.h" -#if ARCH_X86 || ARCH_X86_64 -void vpx_reset_mmx_state(void); -#define vpx_clear_system_state() vpx_reset_mmx_state() +#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) + void vpx_reset_mmx_state(void); + #define vpx_clear_system_state() vpx_reset_mmx_state() #else -#define vpx_clear_system_state() + #define vpx_clear_system_state() #endif // ARCH_X86 || ARCH_X86_64 #endif // VPX_PORTS_SYSTEM_STATE_H_ diff --git a/thirdparty/libvpx/vpx_scale_rtcd.h b/thirdparty/libvpx/vpx_scale_rtcd.h new file mode 100644 index 00000000000..4d59467fe93 --- /dev/null +++ b/thirdparty/libvpx/vpx_scale_rtcd.h @@ -0,0 +1,44 @@ +#ifndef VPX_SCALE_RTCD_H_ +#define VPX_SCALE_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +struct yv12_buffer_config; + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_yv12_copy_frame_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); +#define vp8_yv12_copy_frame vp8_yv12_copy_frame_c + +void vp8_yv12_extend_frame_borders_c(struct yv12_buffer_config *ybf); +#define vp8_yv12_extend_frame_borders vp8_yv12_extend_frame_borders_c + +void vpx_extend_frame_borders_c(struct yv12_buffer_config *ybf); +#define vpx_extend_frame_borders vpx_extend_frame_borders_c + +void vpx_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf); +#define vpx_extend_frame_inner_borders vpx_extend_frame_inner_borders_c + +void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); +#define vpx_yv12_copy_y vpx_yv12_copy_y_c + +void vpx_scale_rtcd(void); + +#ifdef RTCD_C +static void setup_rtcd_internal(void) +{ + //Only MIPS has something here, but it is not supported +} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif From 4428115916144b45c4697cd65d9c8c093631bec6 Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Wed, 19 Oct 2016 11:14:41 -0300 Subject: [PATCH 010/223] Everything returning to normal in 3D, still a long way to go -implemented the scene part of visual server and rasterizer, objects without lighting and material are rendererd only --- bin/tests/test_render.cpp | 5 +- core/math/math_funcs.h | 102 ++ core/rid.h | 8 + drivers/gles3/rasterizer_canvas_gles3.cpp | 1 + drivers/gles3/rasterizer_gles3.cpp | 9 +- drivers/gles3/rasterizer_gles3.h | 3 + drivers/gles3/rasterizer_scene_gles3.cpp | 808 +++++++++ drivers/gles3/rasterizer_scene_gles3.h | 178 ++ drivers/gles3/rasterizer_storage_gles3.cpp | 720 +++++++- drivers/gles3/rasterizer_storage_gles3.h | 200 ++- drivers/gles3/shader_gles3.cpp | 12 +- drivers/gles3/shaders/SCsub | 1 + drivers/gles3/shaders/scene.glsl | 351 ++++ servers/visual/rasterizer.h | 99 +- servers/visual/shader_types.cpp | 13 +- servers/visual/visual_server_raster.cpp | 179 +- servers/visual/visual_server_raster.h | 103 +- servers/visual/visual_server_scene.cpp | 1517 +++++++++++++++++ servers/visual/visual_server_scene.h | 390 +++++ servers/visual/visual_server_viewport.cpp | 14 + servers/visual_server.cpp | 746 ++++++++ servers/visual_server.h | 8 +- .../editor/plugins/spatial_editor_plugin.cpp | 1 + 23 files changed, 5196 insertions(+), 272 deletions(-) create mode 100644 drivers/gles3/rasterizer_scene_gles3.cpp create mode 100644 drivers/gles3/rasterizer_scene_gles3.h create mode 100644 drivers/gles3/shaders/scene.glsl create mode 100644 servers/visual/visual_server_scene.cpp create mode 100644 servers/visual/visual_server_scene.h diff --git a/bin/tests/test_render.cpp b/bin/tests/test_render.cpp index 7bf833c8a72..b971d412aa9 100644 --- a/bin/tests/test_render.cpp +++ b/bin/tests/test_render.cpp @@ -173,7 +173,10 @@ public: // vs->camera_set_perspective( camera, 60.0,0.1, 100.0 ); viewport = vs->viewport_create(); - vs->viewport_attach_to_screen(viewport,Rect2(Vector2(),OS::get_singleton()->get_window_size())); + Size2i screen_size = OS::get_singleton()->get_window_size(); + vs->viewport_set_size(viewport,screen_size.x,screen_size.y); + vs->viewport_attach_to_screen(viewport,Rect2(Vector2(),screen_size)); + vs->viewport_set_active(viewport,true); vs->viewport_attach_camera( viewport, camera ); vs->viewport_set_scenario( viewport, scenario ); vs->camera_set_transform(camera, Transform( Matrix3(), Vector3(0,3,30 ) ) ); diff --git a/core/math/math_funcs.h b/core/math/math_funcs.h index fc76d96b2ee..c8ced0b306d 100644 --- a/core/math/math_funcs.h +++ b/core/math/math_funcs.h @@ -175,6 +175,108 @@ public: static double log(double x); static double exp(double x); + + static _FORCE_INLINE_ uint32_t halfbits_to_floatbits(uint16_t h) + { + uint16_t h_exp, h_sig; + uint32_t f_sgn, f_exp, f_sig; + + h_exp = (h&0x7c00u); + f_sgn = ((uint32_t)h&0x8000u) << 16; + switch (h_exp) { + case 0x0000u: /* 0 or subnormal */ + h_sig = (h&0x03ffu); + /* Signed zero */ + if (h_sig == 0) { + return f_sgn; + } + /* Subnormal */ + h_sig <<= 1; + while ((h_sig&0x0400u) == 0) { + h_sig <<= 1; + h_exp++; + } + f_exp = ((uint32_t)(127 - 15 - h_exp)) << 23; + f_sig = ((uint32_t)(h_sig&0x03ffu)) << 13; + return f_sgn + f_exp + f_sig; + case 0x7c00u: /* inf or NaN */ + /* All-ones exponent and a copy of the significand */ + return f_sgn + 0x7f800000u + (((uint32_t)(h&0x03ffu)) << 13); + default: /* normalized */ + /* Just need to adjust the exponent and shift */ + return f_sgn + (((uint32_t)(h&0x7fffu) + 0x1c000u) << 13); + } + } + + static _FORCE_INLINE_ float halfptr_to_float(uint16_t *h) { + + union { + uint32_t u32; + float f32; + } u; + + u.u32=halfbits_to_floatbits(*h); + return u.f32; + } + + static _FORCE_INLINE_ uint16_t make_half_float(float f) { + + union { + float fv; + uint32_t ui; + } ci; + ci.fv=f; + + uint32_t x = ci.ui; + uint32_t sign = (unsigned short)(x >> 31); + uint32_t mantissa; + uint32_t exp; + uint16_t hf; + + // get mantissa + mantissa = x & ((1 << 23) - 1); + // get exponent bits + exp = x & (0xFF << 23); + if (exp >= 0x47800000) + { + // check if the original single precision float number is a NaN + if (mantissa && (exp == (0xFF << 23))) + { + // we have a single precision NaN + mantissa = (1 << 23) - 1; + } + else + { + // 16-bit half-float representation stores number as Inf + mantissa = 0; + } + hf = (((uint16_t)sign) << 15) | (uint16_t)((0x1F << 10)) | + (uint16_t)(mantissa >> 13); + } + // check if exponent is <= -15 + else if (exp <= 0x38000000) + { + + /*// store a denorm half-float value or zero + exp = (0x38000000 - exp) >> 23; + mantissa >>= (14 + exp); + + hf = (((uint16_t)sign) << 15) | (uint16_t)(mantissa); + */ + hf=0; //denormals do not work for 3D, convert to zero + } + else + { + hf = (((uint16_t)sign) << 15) | + (uint16_t)((exp - 0x38000000) >> 13) | + (uint16_t)(mantissa >> 13); + } + + return hf; + } + + + }; diff --git a/core/rid.h b/core/rid.h index 85a69ac0efa..92b7e6ee693 100644 --- a/core/rid.h +++ b/core/rid.h @@ -181,6 +181,14 @@ public: } + + _FORCE_INLINE_ T * getptr(const RID& p_rid) { + + return static_cast(p_rid.get_data()); + + } + + _FORCE_INLINE_ bool owns(const RID& p_rid) const { if (p_rid.get_data()==NULL) diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index b2228a6cfa0..237b3ec3fc6 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -117,6 +117,7 @@ void RasterizerCanvasGLES3::canvas_begin(){ glClearColor( storage->frame.clear_request_color.r, storage->frame.clear_request_color.g, storage->frame.clear_request_color.b, storage->frame.clear_request_color.a ); glClear(GL_COLOR_BUFFER_BIT); storage->frame.clear_request=false; + print_line("canvas clear?"); } diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index eaa9825605d..83c40edc1da 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -15,7 +15,7 @@ RasterizerCanvas *RasterizerGLES3::get_canvas() { RasterizerScene *RasterizerGLES3::get_scene() { - return NULL; + return scene; } @@ -111,6 +111,7 @@ void RasterizerGLES3::initialize() { */ storage->initialize(); canvas->initialize(); + scene->initialize(); } void RasterizerGLES3::begin_frame(){ @@ -124,6 +125,7 @@ void RasterizerGLES3::begin_frame(){ storage->update_dirty_shaders(); storage->update_dirty_materials(); + } void RasterizerGLES3::set_current_render_target(RID p_render_target){ @@ -131,6 +133,7 @@ void RasterizerGLES3::set_current_render_target(RID p_render_target){ if (!p_render_target.is_valid() && storage->frame.current_rt && storage->frame.clear_request) { //handle pending clear request, if the framebuffer was not cleared glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->front.fbo); + print_line("unbind clear of: "+storage->frame.clear_request_color); glClearColor( storage->frame.clear_request_color.r, storage->frame.clear_request_color.g, @@ -265,8 +268,12 @@ RasterizerGLES3::RasterizerGLES3() storage = memnew( RasterizerStorageGLES3 ); canvas = memnew( RasterizerCanvasGLES3 ); + scene = memnew( RasterizerSceneGLES3 ); canvas->storage=storage; storage->canvas=canvas; + scene->storage=storage; + storage->scene=scene; + } diff --git a/drivers/gles3/rasterizer_gles3.h b/drivers/gles3/rasterizer_gles3.h index d461664ea22..f70dac506db 100644 --- a/drivers/gles3/rasterizer_gles3.h +++ b/drivers/gles3/rasterizer_gles3.h @@ -4,6 +4,7 @@ #include "servers/visual/rasterizer.h" #include "rasterizer_storage_gles3.h" #include "rasterizer_canvas_gles3.h" +#include "rasterizer_scene_gles3.h" class RasterizerGLES3 : public Rasterizer { @@ -12,6 +13,8 @@ class RasterizerGLES3 : public Rasterizer { RasterizerStorageGLES3 *storage; RasterizerCanvasGLES3 *canvas; + RasterizerSceneGLES3 *scene; + public: virtual RasterizerStorage *get_storage(); diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp new file mode 100644 index 00000000000..121620594d0 --- /dev/null +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -0,0 +1,808 @@ +#include "rasterizer_scene_gles3.h" +#include "globals.h" +static _FORCE_INLINE_ void store_matrix32(const Matrix32& p_mtx, float* p_array) { + + p_array[ 0]=p_mtx.elements[0][0]; + p_array[ 1]=p_mtx.elements[0][1]; + p_array[ 2]=0; + p_array[ 3]=0; + p_array[ 4]=p_mtx.elements[1][0]; + p_array[ 5]=p_mtx.elements[1][1]; + p_array[ 6]=0; + p_array[ 7]=0; + p_array[ 8]=0; + p_array[ 9]=0; + p_array[10]=1; + p_array[11]=0; + p_array[12]=p_mtx.elements[2][0]; + p_array[13]=p_mtx.elements[2][1]; + p_array[14]=0; + p_array[15]=1; +} + + +static _FORCE_INLINE_ void store_transform(const Transform& p_mtx, float* p_array) { + p_array[ 0]=p_mtx.basis.elements[0][0]; + p_array[ 1]=p_mtx.basis.elements[1][0]; + p_array[ 2]=p_mtx.basis.elements[2][0]; + p_array[ 3]=0; + p_array[ 4]=p_mtx.basis.elements[0][1]; + p_array[ 5]=p_mtx.basis.elements[1][1]; + p_array[ 6]=p_mtx.basis.elements[2][1]; + p_array[ 7]=0; + p_array[ 8]=p_mtx.basis.elements[0][2]; + p_array[ 9]=p_mtx.basis.elements[1][2]; + p_array[10]=p_mtx.basis.elements[2][2]; + p_array[11]=0; + p_array[12]=p_mtx.origin.x; + p_array[13]=p_mtx.origin.y; + p_array[14]=p_mtx.origin.z; + p_array[15]=1; +} + +static _FORCE_INLINE_ void store_camera(const CameraMatrix& p_mtx, float* p_array) { + + for (int i=0;i<4;i++) { + for (int j=0;j<4;j++) { + + p_array[i*4+j]=p_mtx.matrix[i][j]; + } + } +} + + + +RID RasterizerSceneGLES3::light_instance_create(RID p_light) { + + + return RID(); +} + +void RasterizerSceneGLES3::light_instance_set_transform(RID p_light_instance,const Transform& p_transform){ + + +} + + +bool RasterizerSceneGLES3::_setup_material(RasterizerStorageGLES3::Material* p_material,bool p_alpha_pass) { + + if (p_material->shader->spatial.cull_mode==RasterizerStorageGLES3::Shader::Spatial::CULL_MODE_DISABLED) { + glDisable(GL_CULL_FACE); + } else { + glEnable(GL_CULL_FACE); + } + + //glPolygonMode(GL_FRONT_AND_BACK,GL_LINE); + + /* + if (p_material->flags[VS::MATERIAL_FLAG_WIREFRAME]) + glPolygonMode(GL_FRONT_AND_BACK,GL_LINE); + else + glPolygonMode(GL_FRONT_AND_BACK,GL_FILL); + */ + + //if (p_material->line_width) + // glLineWidth(p_material->line_width); + + + //blend mode + if (state.current_blend_mode!=p_material->shader->spatial.blend_mode) { + + switch(p_material->shader->spatial.blend_mode) { + + case RasterizerStorageGLES3::Shader::Spatial::BLEND_MODE_MIX: { + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + + } break; + case RasterizerStorageGLES3::Shader::Spatial::BLEND_MODE_ADD: { + + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(p_alpha_pass?GL_SRC_ALPHA:GL_ONE,GL_ONE); + + } break; + case RasterizerStorageGLES3::Shader::Spatial::BLEND_MODE_SUB: { + + glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); + glBlendFunc(GL_SRC_ALPHA,GL_ONE); + } break; + case RasterizerStorageGLES3::Shader::Spatial::BLEND_MODE_MUL: { + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + + } break; + } + + state.current_blend_mode=p_material->shader->spatial.blend_mode; + + } + + //material parameters + + state.scene_shader.set_custom_shader(p_material->shader->custom_code_id); + bool rebind = state.scene_shader.bind(); + + + if (p_material->ubo_id) { + glBindBufferBase(GL_UNIFORM_BUFFER,1,p_material->ubo_id); + } + + + + int tc = p_material->textures.size(); + RID* textures = p_material->textures.ptr(); + + for(int i=0;itexture_owner.getornull( textures[i] ); + if (!t) { + //check hints + glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + continue; + } + + glBindTexture(t->target,t->tex_id); + } + + + return rebind; + +} + + +void RasterizerSceneGLES3::_setup_geometry(RenderList::Element *e) { + + switch(e->instance->base_type) { + + case VS::INSTANCE_MESH: { + + RasterizerStorageGLES3::Surface *s = static_cast(e->geometry); + glBindVertexArray(s->array_id); // everything is so easy nowadays + } break; + } + +} + +static const GLenum gl_primitive[]={ + GL_POINTS, + GL_LINES, + GL_LINE_STRIP, + GL_LINE_LOOP, + GL_TRIANGLES, + GL_TRIANGLE_STRIP, + GL_TRIANGLE_FAN +}; + + + +void RasterizerSceneGLES3::_render_geometry(RenderList::Element *e) { + + switch(e->instance->base_type) { + + case VS::INSTANCE_MESH: { + + RasterizerStorageGLES3::Surface *s = static_cast(e->geometry); + + if (s->index_array_len>0) { + + glDrawElements(gl_primitive[s->primitive],s->index_array_len, (s->array_len>=(1<<16))?GL_UNSIGNED_INT:GL_UNSIGNED_SHORT,0); + + } else { + + glDrawArrays(gl_primitive[s->primitive],0,s->array_len); + + } + + } break; + } + +} + +void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements,int p_element_count,const Transform& p_view_transform,const CameraMatrix& p_projection,bool p_reverse_cull,bool p_alpha_pass) { + + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) { + //p_reverse_cull=!p_reverse_cull; + glFrontFace(GL_CCW); + } else { + glFrontFace(GL_CW); + } + + glBindBufferBase(GL_UNIFORM_BUFFER,0,state.scene_ubo); //bind globals ubo + + state.scene_shader.set_conditional(SceneShaderGLES3::USE_SKELETON,false); + + state.current_blend_mode=-1; + + glDisable(GL_BLEND); + + RasterizerStorageGLES3::Material* prev_material=NULL; + RasterizerStorageGLES3::Geometry* prev_geometry=NULL; + VS::InstanceType prev_base_type = VS::INSTANCE_MAX; + + for (int i=0;imaterial; + + bool rebind=i==0; + + if (material!=prev_material || rebind) { + + rebind = _setup_material(material,p_alpha_pass); +// _rinfo.mat_change_count++; + } + + + if (prev_base_type != e->instance->base_type || prev_geometry!=e->geometry) { + + _setup_geometry(e); + } + +// _set_cull(e->mirror,p_reverse_cull); + + state.scene_shader.set_uniform(SceneShaderGLES3::NORMAL_MULT, e->instance->mirror?-1.0:1.0); + state.scene_shader.set_uniform(SceneShaderGLES3::WORLD_TRANSFORM, e->instance->transform); + + +// _render(e->geometry, material, skeleton,e->owner,e->instance->transform); + + _render_geometry(e); + + prev_material=material; + prev_base_type=e->instance->base_type; + prev_geometry=e->geometry; + } + + //print_line("shaderchanges: "+itos(p_alpha_pass)+": "+itos(_rinfo.shader_change_count)); + + + glFrontFace(GL_CW); + glBindVertexArray(0); + +} + +void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_geometry, InstanceBase *p_instance, RasterizerStorageGLES3::GeometryOwner *p_owner,int p_material) { + + RasterizerStorageGLES3::Material *m=NULL; + RID m_src=p_instance->material_override.is_valid() ? p_instance->material_override :(p_material>=0?p_instance->materials[p_material]:p_geometry->material); + +/* +#ifdef DEBUG_ENABLED + if (current_debug==VS::SCENARIO_DEBUG_OVERDRAW) { + m_src=overdraw_material; + } + +#endif +*/ + if (m_src.is_valid()) { + m=storage->material_owner.getornull( m_src ); + if (!m->shader) { + m=NULL; + } + } + + if (!m) { + m=storage->material_owner.getptr( default_material ); + } + + ERR_FAIL_COND(!m); + + + + //bool has_base_alpha=(m->shader_cache && m->shader_cache->has_alpha); + //bool has_blend_alpha=m->blend_mode!=VS::MATERIAL_BLEND_MODE_MIX || m->flags[VS::MATERIAL_FLAG_ONTOP]; + bool has_alpha = false; //has_base_alpha || has_blend_alpha; + +#if 0 + if (shadow) { + + if (has_blend_alpha || (has_base_alpha && m->depth_draw_mode!=VS::MATERIAL_DEPTH_DRAW_OPAQUE_PRE_PASS_ALPHA)) + return; //bye + + if (!m->shader_cache || (!m->shader_cache->writes_vertex && !m->shader_cache->uses_discard && m->depth_draw_mode!=VS::MATERIAL_DEPTH_DRAW_OPAQUE_PRE_PASS_ALPHA)) { + //shader does not use discard and does not write a vertex position, use generic material + if (p_instance->cast_shadows == VS::SHADOW_CASTING_SETTING_DOUBLE_SIDED) + m = shadow_mat_double_sided_ptr; + else + m = shadow_mat_ptr; + if (m->last_pass!=frame) { + + if (m->shader.is_valid()) { + + m->shader_cache=shader_owner.get(m->shader); + if (m->shader_cache) { + + + if (!m->shader_cache->valid) + m->shader_cache=NULL; + } else { + m->shader=RID(); + } + + } else { + m->shader_cache=NULL; + } + + m->last_pass=frame; + } + } + + render_list = &opaque_render_list; + /* notyet + if (!m->shader_cache || m->shader_cache->can_zpass) + render_list = &alpha_render_list; + } else { + render_list = &opaque_render_list; + }*/ + + } else { + if (has_alpha) { + render_list = &alpha_render_list; + } else { + render_list = &opaque_render_list; + + } + } +#endif + + RenderList::Element *e = has_alpha ? render_list.add_alpha_element() : render_list.add_element(); + + if (!e) + return; + + e->geometry=p_geometry; + e->material=m; + e->instance=p_instance; + e->owner=p_owner; + e->additive=false; + e->additive_ptr=&e->additive; + e->sort_key=0; + + if (e->geometry->last_pass!=render_pass) { + e->geometry->last_pass=render_pass; + e->geometry->index=current_geometry_index++; + } + + e->sort_key|=uint64_t(e->instance->base_type)<sort_key|=uint64_t(e->instance->base_type)<material->last_pass!=render_pass) { + e->material->last_pass=render_pass; + e->material->index=current_material_index++; + } + + e->sort_key|=uint64_t(e->material->index)<sort_key|=uint64_t(e->instance->depth_layer)<geometry->type==RasterizerStorageGLES3::Geometry::GEOMETRY_MULTISURFACE) + // e->sort_flags|=RenderList::SORT_FLAG_INSTANCING; + + bool mirror = e->instance->mirror; + +// if (m->flags[VS::MATERIAL_FLAG_INVERT_FACES]) +// e->mirror=!e->mirror; + + if (mirror) { + e->sort_key|=RenderList::SORT_KEY_MIRROR_FLAG; + } + + //e->light_type=0xFF; // no lights! + e->sort_key|=uint64_t(0xF)<sort_key|=uint64_t(0xFFFF)<depth_draw_mode==VS::MATERIAL_DEPTH_DRAW_OPAQUE_PRE_PASS_ALPHA) { + + //if nothing exists, add this element as opaque too + RenderList::Element *oe = opaque_render_list.add_element(); + + if (!oe) + return; + + memcpy(oe,e,sizeof(RenderList::Element)); + oe->additive_ptr=&oe->additive; + } +*/ + +#if 0 + if (shadow || m->flags[VS::MATERIAL_FLAG_UNSHADED] || current_debug==VS::SCENARIO_DEBUG_SHADELESS) { + + e->light_type=0x7F; //unshaded is zero + } else { + + bool duplicate=false; + + + for(int i=0;isort_key; + uint8_t light_type = VS::LIGHT_DIRECTIONAL; + if (directional_lights[i]->base->shadow_enabled) { + light_type|=0x8; + if (directional_lights[i]->base->directional_shadow_mode==VS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_2_SPLITS) + light_type|=0x10; + else if (directional_lights[i]->base->directional_shadow_mode==VS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_4_SPLITS) + light_type|=0x30; + + } + + RenderList::Element *ec; + if (duplicate) { + + ec = render_list->add_element(); + memcpy(ec,e,sizeof(RenderList::Element)); + } else { + + ec=e; + duplicate=true; + } + + ec->light_type=light_type; + ec->light=sort_key; + ec->additive_ptr=&e->additive; + + } + + + const RID *liptr = p_instance->light_instances.ptr(); + int ilc=p_instance->light_instances.size(); + + + + for(int i=0;ilast_pass!=scene_pass) //lit by light not in visible scene + continue; + uint8_t light_type=li->base->type|0x40; //penalty to ensure directionals always go first + if (li->base->shadow_enabled) { + light_type|=0x8; + } + uint16_t sort_key =li->sort_key; + + RenderList::Element *ec; + if (duplicate) { + + ec = render_list->add_element(); + memcpy(ec,e,sizeof(RenderList::Element)); + } else { + + duplicate=true; + ec=e; + } + + ec->light_type=light_type; + ec->light=sort_key; + ec->additive_ptr=&e->additive; + + } + + + + } + +#endif +} + +void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraMatrix& p_cam_projection,bool p_cam_ortogonal,InstanceBase** p_cull_result,int p_cull_count,RID* p_light_cull_result,int p_light_cull_count,RID* p_directional_lights,int p_directional_light_count,RID p_environment){ + + + //fill up ubo + + store_camera(p_cam_projection,state.ubo_data.projection_matrix); + store_transform(p_cam_transform,state.ubo_data.camera_matrix); + store_transform(p_cam_transform.affine_inverse(),state.ubo_data.camera_inverse_matrix); + for(int i=0;i<4;i++) { + state.ubo_data.time[i]=storage->frame.time[i]; + } + + + glBindBuffer(GL_UNIFORM_BUFFER, state.scene_ubo); + glBufferSubData(GL_UNIFORM_BUFFER, 0,sizeof(State::SceneDataUBO), &state.ubo_data); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + + render_list.clear(); + + render_pass++; + current_material_index=0; + + //fill list + + for(int i=0;ibase_type) { + + case VS::INSTANCE_MESH: { + + RasterizerStorageGLES3::Mesh *mesh = storage->mesh_owner.getptr(inst->base); + ERR_CONTINUE(!mesh); + + int ssize = mesh->surfaces.size(); + + for (int i=0;imaterials[i].is_valid() ? i : -1; + RasterizerStorageGLES3::Surface *s = mesh->surfaces[i]; + _add_geometry(s,inst,NULL,mat_idx); + } + + //mesh->last_pass=frame; + + } break; + case VS::INSTANCE_MULTIMESH: { + + } break; + case VS::INSTANCE_IMMEDIATE: { + + } break; + + } + } + + // + + + glEnable(GL_BLEND); + glDepthMask(GL_TRUE); + glEnable(GL_DEPTH_TEST); + glDisable(GL_SCISSOR_TEST); + glClearDepth(1.0); + glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->front.fbo); + + + if (true) { + + if (storage->frame.clear_request) { + + glClearColor( storage->frame.clear_request_color.r, storage->frame.clear_request_color.g, storage->frame.clear_request_color.b, storage->frame.clear_request_color.a ); + glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); + storage->frame.clear_request=false; + + } + } + + state.current_depth_test=true; + state.current_depth_mask=true; + state.texscreen_copied=false; + + glBlendEquation(GL_FUNC_ADD); + + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + + glDisable(GL_BLEND); + //current_blend_mode=VS::MATERIAL_BLEND_MODE_MIX; + + + render_list.sort_by_key(false); + + //_render_list_forward(&opaque_render_list,camera_transform,camera_transform_inverse,camera_projection,false,fragment_lighting); +/* + if (draw_tex_background) { + + //most 3D vendors recommend drawing a texture bg or skybox here, + //after opaque geometry has been drawn + //so the zbuffer can get rid of most pixels + _draw_tex_bg(); + } +*/ + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + +// glDisable(GL_BLEND); +// current_blend_mode=VS::MATERIAL_BLEND_MODE_MIX; +// state.scene_shader.set_conditional(SceneShaderGLES3::USE_GLOW,false); +// if (current_env && current_env->fx_enabled[VS::ENV_FX_GLOW]) { +// glColorMask(1,1,1,0); //don't touch alpha +// } + + + _render_list(render_list.elements,render_list.element_count,p_cam_transform,p_cam_projection,false,false); + + //_render_list_forward(&alpha_render_list,camera_transform,camera_transform_inverse,camera_projection,false,fragment_lighting,true); + //glColorMask(1,1,1,1); + +// state.scene_shader.set_conditional( SceneShaderGLES3::USE_FOG,false); + + glPolygonMode(GL_FRONT_AND_BACK,GL_FILL); +#if 0 + if (use_fb) { + + + + for(int i=0;ifx_enabled[VS::ENV_FX_HDR]) { + + int hdr_tm = current_env->fx_param[VS::ENV_FX_PARAM_HDR_TONEMAPPER]; + switch(hdr_tm) { + case VS::ENV_FX_HDR_TONE_MAPPER_LINEAR: { + + + } break; + case VS::ENV_FX_HDR_TONE_MAPPER_LOG: { + copy_shader.set_conditional(CopyShaderGLES2::USE_LOG_TONEMAPPER,true); + + } break; + case VS::ENV_FX_HDR_TONE_MAPPER_REINHARDT: { + copy_shader.set_conditional(CopyShaderGLES2::USE_REINHARDT_TONEMAPPER,true); + } break; + case VS::ENV_FX_HDR_TONE_MAPPER_REINHARDT_AUTOWHITE: { + + copy_shader.set_conditional(CopyShaderGLES2::USE_REINHARDT_TONEMAPPER,true); + copy_shader.set_conditional(CopyShaderGLES2::USE_AUTOWHITE,true); + } break; + } + + + _process_hdr(); + } + if (current_env && current_env->fx_enabled[VS::ENV_FX_GLOW]) { + _process_glow_bloom(); + int glow_transfer_mode=current_env->fx_param[VS::ENV_FX_PARAM_GLOW_BLUR_BLEND_MODE]; + if (glow_transfer_mode==1) + copy_shader.set_conditional(CopyShaderGLES2::USE_GLOW_SCREEN,true); + if (glow_transfer_mode==2) + copy_shader.set_conditional(CopyShaderGLES2::USE_GLOW_SOFTLIGHT,true); + } + + glBindFramebuffer(GL_FRAMEBUFFER, current_rt?current_rt->fbo:base_framebuffer); + + Size2 size; + if (current_rt) { + glBindFramebuffer(GL_FRAMEBUFFER, current_rt->fbo); + glViewport( 0,0,viewport.width,viewport.height); + size=Size2(viewport.width,viewport.height); + } else { + glBindFramebuffer(GL_FRAMEBUFFER, base_framebuffer); + glViewport( viewport.x, window_size.height-(viewport.height+viewport.y), viewport.width,viewport.height ); + size=Size2(viewport.width,viewport.height); + } + + //time to copy!!! + copy_shader.set_conditional(CopyShaderGLES2::USE_BCS,current_env && current_env->fx_enabled[VS::ENV_FX_BCS]); + copy_shader.set_conditional(CopyShaderGLES2::USE_SRGB,current_env && current_env->fx_enabled[VS::ENV_FX_SRGB]); + copy_shader.set_conditional(CopyShaderGLES2::USE_GLOW,current_env && current_env->fx_enabled[VS::ENV_FX_GLOW]); + copy_shader.set_conditional(CopyShaderGLES2::USE_HDR,current_env && current_env->fx_enabled[VS::ENV_FX_HDR]); + copy_shader.set_conditional(CopyShaderGLES2::USE_NO_ALPHA,true); + copy_shader.set_conditional(CopyShaderGLES2::USE_FXAA,current_env && current_env->fx_enabled[VS::ENV_FX_FXAA]); + + copy_shader.bind(); + //copy_shader.set_uniform(CopyShaderGLES2::SOURCE,0); + + if (current_env && current_env->fx_enabled[VS::ENV_FX_GLOW]) { + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, framebuffer.blur[0].color ); + glUniform1i(copy_shader.get_uniform_location(CopyShaderGLES2::GLOW_SOURCE),1); + + } + + if (current_env && current_env->fx_enabled[VS::ENV_FX_HDR]) { + + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, current_vd->lum_color ); + glUniform1i(copy_shader.get_uniform_location(CopyShaderGLES2::HDR_SOURCE),2); + copy_shader.set_uniform(CopyShaderGLES2::TONEMAP_EXPOSURE,float(current_env->fx_param[VS::ENV_FX_PARAM_HDR_EXPOSURE])); + copy_shader.set_uniform(CopyShaderGLES2::TONEMAP_WHITE,float(current_env->fx_param[VS::ENV_FX_PARAM_HDR_WHITE])); + + } + + if (current_env && current_env->fx_enabled[VS::ENV_FX_FXAA]) + copy_shader.set_uniform(CopyShaderGLES2::PIXEL_SIZE,Size2(1.0/size.x,1.0/size.y)); + + + if (current_env && current_env->fx_enabled[VS::ENV_FX_BCS]) { + + Vector3 bcs; + bcs.x=current_env->fx_param[VS::ENV_FX_PARAM_BCS_BRIGHTNESS]; + bcs.y=current_env->fx_param[VS::ENV_FX_PARAM_BCS_CONTRAST]; + bcs.z=current_env->fx_param[VS::ENV_FX_PARAM_BCS_SATURATION]; + copy_shader.set_uniform(CopyShaderGLES2::BCS,bcs); + } + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, framebuffer.color ); + glUniform1i(copy_shader.get_uniform_location(CopyShaderGLES2::SOURCE),0); + + _copy_screen_quad(); + + copy_shader.set_conditional(CopyShaderGLES2::USE_BCS,false); + copy_shader.set_conditional(CopyShaderGLES2::USE_SRGB,false); + copy_shader.set_conditional(CopyShaderGLES2::USE_GLOW,false); + copy_shader.set_conditional(CopyShaderGLES2::USE_HDR,false); + copy_shader.set_conditional(CopyShaderGLES2::USE_NO_ALPHA,false); + copy_shader.set_conditional(CopyShaderGLES2::USE_FXAA,false); + copy_shader.set_conditional(CopyShaderGLES2::USE_GLOW_SCREEN,false); + copy_shader.set_conditional(CopyShaderGLES2::USE_GLOW_SOFTLIGHT,false); + copy_shader.set_conditional(CopyShaderGLES2::USE_REINHARDT_TONEMAPPER,false); + copy_shader.set_conditional(CopyShaderGLES2::USE_AUTOWHITE,false); + copy_shader.set_conditional(CopyShaderGLES2::USE_LOG_TONEMAPPER,false); + + state.scene_shader.set_conditional(SceneShaderGLES3::USE_8BIT_HDR,false); + + + if (current_env && current_env->fx_enabled[VS::ENV_FX_HDR] && GLOBAL_DEF("rasterizer/debug_hdr",false)) { + _debug_luminances(); + } + } + + current_env=NULL; + current_debug=VS::SCENARIO_DEBUG_DISABLED; + if (GLOBAL_DEF("rasterizer/debug_shadow_maps",false)) { + _debug_shadows(); + } +// _debug_luminances(); +// _debug_samplers(); + + if (using_canvas_bg) { + using_canvas_bg=false; + glColorMask(1,1,1,1); //don't touch alpha + } +#endif +} + +bool RasterizerSceneGLES3::free(RID p_rid) { + + return false; + +} + +void RasterizerSceneGLES3::initialize() { + + state.scene_shader.init(); + + default_shader = storage->shader_create(VS::SHADER_SPATIAL); + default_material = storage->material_create(); + storage->material_set_shader(default_material,default_shader); + + glGenBuffers(1, &state.scene_ubo); + glBindBuffer(GL_UNIFORM_BUFFER, state.scene_ubo); + glBufferData(GL_UNIFORM_BUFFER, sizeof(State::SceneDataUBO), &state.scene_ubo, GL_DYNAMIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + render_list.max_elements=GLOBAL_DEF("rendering/gles3/max_renderable_elements",(int)RenderList::DEFAULT_MAX_ELEMENTS); + if (render_list.max_elements>1000000) + render_list.max_elements=1000000; + if (render_list.max_elements<1024) + render_list.max_elements=1024; + + render_list.init(); +} + +void RasterizerSceneGLES3::finalize(){ + + +} + + +RasterizerSceneGLES3::RasterizerSceneGLES3() +{ + +} diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h new file mode 100644 index 00000000000..6fba777fc89 --- /dev/null +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -0,0 +1,178 @@ +#ifndef RASTERIZERSCENEGLES3_H +#define RASTERIZERSCENEGLES3_H + +#include "rasterizer_storage_gles3.h" +#include "drivers/gles3/shaders/scene.glsl.h" + +class RasterizerSceneGLES3 : public RasterizerScene { +public: + + uint64_t render_pass; + uint32_t current_material_index; + uint32_t current_geometry_index; + + RID default_material; + RID default_shader; + + RasterizerStorageGLES3 *storage; + + + + struct State { + + bool current_depth_test; + bool current_depth_mask; + bool texscreen_copied; + int current_blend_mode; + + SceneShaderGLES3 scene_shader; + + + struct SceneDataUBO { + + float projection_matrix[16]; + float camera_inverse_matrix[16]; + float camera_matrix[16]; + float time[4]; + float ambient_light[4]; + + } ubo_data; + + GLuint scene_ubo; + + + + } state; + + struct RenderList { + + enum { + DEFAULT_MAX_ELEMENTS=65536, + MAX_LIGHTS=4, + SORT_FLAG_SKELETON=1, + SORT_FLAG_INSTANCING=2, + + SORT_KEY_DEPTH_LAYER_SHIFT=58, + SORT_KEY_LIGHT_TYPE_SHIFT=54, //type is most important + SORT_KEY_LIGHT_INDEX_SHIFT=38, //type is most important + SORT_KEY_MATERIAL_INDEX_SHIFT=22, + SORT_KEY_GEOMETRY_INDEX_SHIFT=6, + SORT_KEY_GEOMETRY_TYPE_SHIFT=2, + SORT_KEY_SKELETON_FLAG=2, + SORT_KEY_MIRROR_FLAG=1 + + }; + + int max_elements; + + struct Element { + + RasterizerScene::InstanceBase *instance; + RasterizerStorageGLES3::Geometry *geometry; + RasterizerStorageGLES3::Material *material; + RasterizerStorageGLES3::GeometryOwner *owner; + uint64_t sort_key; + bool *additive_ptr; + bool additive; + + }; + + + Element *_elements; + Element **elements; + + int element_count; + int alpha_element_count; + + void clear() { + + element_count=0; + alpha_element_count=0; + } + + //should eventually be replaced by radix + + struct SortByKey { + + _FORCE_INLINE_ bool operator()(const Element* A, const Element* B ) const { + return A->sort_key < B->sort_key; + } + }; + + void sort_by_key(bool p_alpha) { + + SortArray sorter; + if (p_alpha) { + sorter.sort(&elements[max_elements-alpha_element_count-1],alpha_element_count); + } else { + sorter.sort(elements,element_count); + } + } + + + _FORCE_INLINE_ Element* add_element() { + + if (element_count+alpha_element_count>=max_elements) + return NULL; + elements[element_count]=&_elements[element_count]; + return elements[element_count++]; + } + + _FORCE_INLINE_ Element* add_alpha_element() { + + if (element_count+alpha_element_count>=max_elements) + return NULL; + int idx = max_elements-alpha_element_count-1; + elements[idx]=&_elements[idx]; + alpha_element_count++; + return elements[idx]; + } + + void init() { + + element_count = 0; + alpha_element_count =0; + elements=memnew_arr(Element*,max_elements); + _elements=memnew_arr(Element,max_elements); + for (int i=0;istate.canvas_shader, - &canvas->state.canvas_shader, + &scene->state.scene_shader, &canvas->state.canvas_shader, }; @@ -1108,6 +1109,37 @@ void RasterizerStorageGLES3::_update_shader(Shader* p_shader) const { actions->uniforms=&p_shader->uniforms; } break; + + case VS::SHADER_SPATIAL: { + + p_shader->spatial.blend_mode=Shader::Spatial::BLEND_MODE_MIX; + p_shader->spatial.depth_draw_mode=Shader::Spatial::DEPTH_DRAW_OPAQUE; + p_shader->spatial.cull_mode=Shader::Spatial::CULL_MODE_BACK; + p_shader->spatial.uses_alpha=false; + p_shader->spatial.unshaded=false; + p_shader->spatial.ontop=false; + + shaders.actions_scene.render_mode_values["blend_add"]=Pair(&p_shader->spatial.blend_mode,Shader::Spatial::BLEND_MODE_ADD); + shaders.actions_scene.render_mode_values["blend_mix"]=Pair(&p_shader->spatial.blend_mode,Shader::Spatial::BLEND_MODE_MIX); + shaders.actions_scene.render_mode_values["blend_sub"]=Pair(&p_shader->spatial.blend_mode,Shader::Spatial::BLEND_MODE_SUB); + shaders.actions_scene.render_mode_values["blend_mul"]=Pair(&p_shader->spatial.blend_mode,Shader::Spatial::BLEND_MODE_MUL); + + shaders.actions_scene.render_mode_values["depth_draw_opaque"]=Pair(&p_shader->spatial.depth_draw_mode,Shader::Spatial::DEPTH_DRAW_OPAQUE); + shaders.actions_scene.render_mode_values["depth_draw_always"]=Pair(&p_shader->spatial.depth_draw_mode,Shader::Spatial::DEPTH_DRAW_ALWAYS); + shaders.actions_scene.render_mode_values["depth_draw_never"]=Pair(&p_shader->spatial.depth_draw_mode,Shader::Spatial::DEPTH_DRAW_NEVER); + shaders.actions_scene.render_mode_values["depth_draw_alpha_prepass"]=Pair(&p_shader->spatial.depth_draw_mode,Shader::Spatial::DEPTH_DRAW_ALPHA_PREPASS); + + shaders.actions_scene.render_mode_values["cull_front"]=Pair(&p_shader->spatial.cull_mode,Shader::Spatial::CULL_MODE_FRONT); + shaders.actions_scene.render_mode_values["cull_back"]=Pair(&p_shader->spatial.cull_mode,Shader::Spatial::CULL_MODE_BACK); + shaders.actions_scene.render_mode_values["cull_disable"]=Pair(&p_shader->spatial.cull_mode,Shader::Spatial::CULL_MODE_DISABLED); + + shaders.actions_canvas.render_mode_flags["unshaded"]=&p_shader->spatial.unshaded; + shaders.actions_canvas.render_mode_flags["ontop"]=&p_shader->spatial.ontop; + + shaders.actions_canvas.usage_flag_pointers["ALPHA"]=&p_shader->spatial.uses_alpha; + + } + } @@ -1905,97 +1937,685 @@ void RasterizerStorageGLES3::update_dirty_materials() { RID RasterizerStorageGLES3::mesh_create(){ - return RID(); + Mesh * mesh = memnew( Mesh ); + + return mesh_owner.make_rid(mesh); } -void RasterizerStorageGLES3::mesh_add_surface(RID p_mesh,uint32_t p_format,VS::PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes){ + +void RasterizerStorageGLES3::mesh_add_surface(RID p_mesh,uint32_t p_format,VS::PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const AABB& p_aabb,const Vector >& p_blend_shapes,const Vector& p_bone_aabbs){ + + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + + ERR_FAIL_COND(!(p_format&VS::ARRAY_FORMAT_VERTEX)); + + //must have index and bones, both. + { + uint32_t bones_weight = VS::ARRAY_FORMAT_BONES|VS::ARRAY_FORMAT_WEIGHTS; + ERR_EXPLAIN("Array must have both bones and weights in format or none."); + ERR_FAIL_COND( (p_format&bones_weight) && (p_format&bones_weight)!=bones_weight ); + } + bool has_morph = p_blend_shapes.size(); + + Surface::Attrib attribs[VS::ARRAY_MAX],morph_attribs[VS::ARRAY_MAX]; + + int stride=0; + int morph_stride=0; + + for(int i=0;i=(1<<16)) { + attribs[i].type=GL_UNSIGNED_INT; + attribs[i].stride=4; + } else { + attribs[i].type=GL_UNSIGNED_SHORT; + attribs[i].stride=2; + } + + attribs[i].normalized=GL_FALSE; + + } break; + + } + } + + for(int i=0;imorph_target_count); + + for(int i=0;iactive=true; + surface->array_len=p_vertex_count; + surface->index_array_len=p_index_count; + surface->primitive=p_primitive; + surface->mesh=mesh; + surface->format=p_format; + surface->skeleton_bone_aabb=p_bone_aabbs; + surface->skeleton_bone_used.resize(surface->skeleton_bone_aabb.size()); + surface->aabb=p_aabb; + surface->max_bone=p_bone_aabbs.size(); + + for(int i=0;iskeleton_bone_used.size();i++) { + if (surface->skeleton_bone_aabb[i].size.x<0 || surface->skeleton_bone_aabb[i].size.y<0 || surface->skeleton_bone_aabb[i].size.z<0) { + surface->skeleton_bone_used[i]=false; + } else { + surface->skeleton_bone_used[i]=true; + } + } + + for(int i=0;iattribs[i]=attribs[i]; + surface->morph_attribs[i]=morph_attribs[i]; + } + + { + + DVector::Read vr = p_array.read(); + + glGenBuffers(1,&surface->vertex_id); + glBindBuffer(GL_ARRAY_BUFFER,surface->vertex_id); + glBufferData(GL_ARRAY_BUFFER,array_size,vr.ptr(),GL_STATIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER,0); //unbind + + + if (p_format&VS::ARRAY_FORMAT_INDEX) { + + DVector::Read ir = p_index_array.read(); + + glGenBuffers(1,&surface->index_id); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,surface->index_id); + glBufferData(GL_ELEMENT_ARRAY_BUFFER,index_array_size,ir.ptr(),GL_STATIC_DRAW); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,0); //unbind + } + + //generate arrays for faster state switching + + glGenVertexArrays(1,&surface->array_id); + glBindVertexArray(surface->array_id); + glBindBuffer(GL_ARRAY_BUFFER,surface->vertex_id); + + for(int i=0;iindex_id) { + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,surface->index_id); + } + + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER,0); //unbind + + } + + { + + //blend shapes + + for(int i=0;i::Read vr = p_blend_shapes[i].read(); + + glGenBuffers(1,&mt.vertex_id); + glBindBuffer(GL_ARRAY_BUFFER,mt.vertex_id); + glBufferData(GL_ARRAY_BUFFER,array_size,vr.ptr(),GL_STATIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER,0); //unbind + + glGenVertexArrays(1,&mt.array_id); + glBindVertexArray(mt.array_id); + glBindBuffer(GL_ARRAY_BUFFER,mt.vertex_id); + + for(int i=0;imorph_targets.push_back(mt); + + } + } + + mesh->surfaces.push_back(surface); + mesh->instance_change_notify(); } void RasterizerStorageGLES3::mesh_set_morph_target_count(RID p_mesh,int p_amount){ + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + + + ERR_FAIL_COND(mesh->surfaces.size()!=0); + ERR_FAIL_COND(p_amount<0); + + mesh->morph_target_count=p_amount; } int RasterizerStorageGLES3::mesh_get_morph_target_count(RID p_mesh) const{ - return 0; + const Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh,0); + + return mesh->morph_target_count; } void RasterizerStorageGLES3::mesh_set_morph_target_mode(RID p_mesh,VS::MorphTargetMode p_mode){ + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + + mesh->morph_target_mode=p_mode; } VS::MorphTargetMode RasterizerStorageGLES3::mesh_get_morph_target_mode(RID p_mesh) const{ - return VS::MORPH_MODE_NORMALIZED; + const Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh,VS::MORPH_MODE_NORMALIZED); + + return mesh->morph_target_mode; } void RasterizerStorageGLES3::mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material){ + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + ERR_FAIL_INDEX(p_surface,mesh->surfaces.size()); + + mesh->surfaces[p_surface]->material=p_material; } RID RasterizerStorageGLES3::mesh_surface_get_material(RID p_mesh, int p_surface) const{ - return RID(); + const Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh,RID()); + ERR_FAIL_INDEX_V(p_surface,mesh->surfaces.size(),RID()); + + return mesh->surfaces[p_surface]->material; } int RasterizerStorageGLES3::mesh_surface_get_array_len(RID p_mesh, int p_surface) const{ - return 0; + const Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh,0); + ERR_FAIL_INDEX_V(p_surface,mesh->surfaces.size(),0); + + return mesh->surfaces[p_surface]->array_len; + } int RasterizerStorageGLES3::mesh_surface_get_array_index_len(RID p_mesh, int p_surface) const{ + const Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh,0); + ERR_FAIL_INDEX_V(p_surface,mesh->surfaces.size(),0); - return 0; + return mesh->surfaces[p_surface]->index_array_len; } DVector RasterizerStorageGLES3::mesh_surface_get_array(RID p_mesh, int p_surface) const{ - return DVector(); + const Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh,DVector()); + ERR_FAIL_INDEX_V(p_surface,mesh->surfaces.size(),DVector()); + + Surface *surface = mesh->surfaces[p_surface]; + + glBindBuffer(GL_ARRAY_BUFFER,surface->vertex_id); + void * data = glMapBufferRange(GL_ARRAY_BUFFER,0,surface->array_len,GL_MAP_READ_BIT); + + ERR_FAIL_COND_V(!data,DVector()); + + DVector ret; + ret.resize(surface->array_len); + + { + + DVector::Write w = ret.write(); + copymem(w.ptr(),data,surface->array_len); + } + glUnmapBuffer(GL_ARRAY_BUFFER); + + + return ret; } -DVector RasterizerStorageGLES3::mesh_surface_get_index_array(RID p_mesh, int p_surface) const{ +DVector RasterizerStorageGLES3::mesh_surface_get_index_array(RID p_mesh, int p_surface) const { + const Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh,DVector()); + ERR_FAIL_INDEX_V(p_surface,mesh->surfaces.size(),DVector()); - return DVector(); + Surface *surface = mesh->surfaces[p_surface]; + + ERR_FAIL_COND_V(surface->index_array_len==0,DVector()); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,surface->vertex_id); + void * data = glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER,0,surface->index_array_len,GL_MAP_READ_BIT); + + ERR_FAIL_COND_V(!data,DVector()); + + DVector ret; + ret.resize(surface->index_array_len); + + { + + DVector::Write w = ret.write(); + copymem(w.ptr(),data,surface->index_array_len); + } + + glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER); + + return ret; } uint32_t RasterizerStorageGLES3::mesh_surface_get_format(RID p_mesh, int p_surface) const{ - return 0; + const Mesh *mesh = mesh_owner.getornull(p_mesh); + + ERR_FAIL_COND_V(!mesh,0); + ERR_FAIL_INDEX_V(p_surface,mesh->surfaces.size(),0); + + return mesh->surfaces[p_surface]->format; + } + VS::PrimitiveType RasterizerStorageGLES3::mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const{ - return VS::PRIMITIVE_MAX; + const Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh,VS::PRIMITIVE_MAX); + ERR_FAIL_INDEX_V(p_surface,mesh->surfaces.size(),VS::PRIMITIVE_MAX); + + return mesh->surfaces[p_surface]->primitive; } -void RasterizerStorageGLES3::mesh_remove_surface(RID p_mesh,int p_index){ +void RasterizerStorageGLES3::mesh_remove_surface(RID p_mesh, int p_surface){ + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + ERR_FAIL_INDEX(p_surface,mesh->surfaces.size()); + Surface *surface = mesh->surfaces[p_surface]; + + ERR_FAIL_COND(surface->index_array_len==0); + + glDeleteBuffers(1,&surface->array_id); + if (surface->index_id) { + glDeleteBuffers(1,&surface->index_id); + } + + glDeleteVertexArrays(1,&surface->array_id); + + for(int i=0;imorph_targets.size();i++) { + + glDeleteBuffers(1,&surface->morph_targets[i].vertex_id); + glDeleteVertexArrays(1,&surface->morph_targets[i].array_id); + } + + memdelete(surface); + + mesh->surfaces.remove(p_surface); + + mesh->instance_change_notify(); } int RasterizerStorageGLES3::mesh_get_surface_count(RID p_mesh) const{ - return 0; + const Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh,0); + return mesh->surfaces.size(); + } void RasterizerStorageGLES3::mesh_set_custom_aabb(RID p_mesh,const AABB& p_aabb){ + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + mesh->custom_aabb=p_aabb; } AABB RasterizerStorageGLES3::mesh_get_custom_aabb(RID p_mesh) const{ - return AABB(); + const Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND_V(!mesh,AABB()); + + return mesh->custom_aabb; + } -AABB RasterizerStorageGLES3::mesh_get_aabb(RID p_mesh) const{ +AABB RasterizerStorageGLES3::mesh_get_aabb(RID p_mesh,RID p_skeleton) const{ + + Mesh *mesh = mesh_owner.get( p_mesh ); + ERR_FAIL_COND_V(!mesh,AABB()); + + if (mesh->custom_aabb!=AABB()) + return mesh->custom_aabb; +/* + Skeleton *sk=NULL; + if (p_skeleton.is_valid()) + sk=skeleton_owner.get(p_skeleton); +*/ + AABB aabb; + /* + if (sk && sk->bones.size()!=0) { + + + for (int i=0;isurfaces.size();i++) { + + AABB laabb; + if (mesh->surfaces[i]->format&VS::ARRAY_FORMAT_BONES && mesh->surfaces[i]->skeleton_bone_aabb.size()) { + + + int bs = mesh->surfaces[i]->skeleton_bone_aabb.size(); + const AABB *skbones = mesh->surfaces[i]->skeleton_bone_aabb.ptr(); + const bool *skused = mesh->surfaces[i]->skeleton_bone_used.ptr(); + + int sbs = sk->bones.size(); + ERR_CONTINUE(bs>sbs); + Skeleton::Bone *skb = sk->bones.ptr(); + + bool first=true; + for(int j=0;jsurfaces[i]->aabb; + } + + if (i==0) + aabb=laabb; + else + aabb.merge_with(laabb); + } + } else { +*/ + for (int i=0;isurfaces.size();i++) { + + if (i==0) + aabb=mesh->surfaces[i]->aabb; + else + aabb.merge_with(mesh->surfaces[i]->aabb); + } +/* + } +*/ + return aabb; - return AABB(); } void RasterizerStorageGLES3::mesh_clear(RID p_mesh){ + Mesh *mesh = mesh_owner.getornull(p_mesh); + ERR_FAIL_COND(!mesh); + while(mesh->surfaces.size()) { + mesh_remove_surface(p_mesh,0); + } } /* MULTIMESH API */ @@ -2206,6 +2826,16 @@ void RasterizerStorageGLES3::light_directional_set_shadow_mode(RID p_light,VS::L } +VS::LightType RasterizerStorageGLES3::light_get_type(RID p_light) const { + + return VS::LIGHT_DIRECTIONAL; +} + +AABB RasterizerStorageGLES3::light_get_aabb(RID p_light) const { + + return AABB(); +} + /* PROBE API */ RID RasterizerStorageGLES3::reflection_probe_create(){ @@ -2292,6 +2922,42 @@ void RasterizerStorageGLES3::portal_set_disabled_color(RID p_portal, const Color } +void RasterizerStorageGLES3::instance_add_dependency(RID p_base,RasterizerScene::InstanceBase *p_instance) { + + Instantiable *inst=NULL; + switch(p_instance->base_type) { + case VS::INSTANCE_MESH: { + inst = mesh_owner.getornull(p_base); + ERR_FAIL_COND(!inst); + } break; + default: { + ERR_FAIL(); + } + } + + inst->instance_list.add( &p_instance->dependency_item ); +} + +void RasterizerStorageGLES3::instance_remove_dependency(RID p_base,RasterizerScene::InstanceBase *p_instance){ + + Instantiable *inst=NULL; + + switch(p_instance->base_type) { + case VS::INSTANCE_MESH: { + inst = mesh_owner.getornull(p_base); + ERR_FAIL_COND(!inst); + + } break; + default: { + ERR_FAIL(); + } + } + + ERR_FAIL_COND(!inst); + + inst->instance_list.remove( &p_instance->dependency_item ); +} + /* RENDER TARGET */ @@ -2773,6 +3439,15 @@ void RasterizerStorageGLES3::canvas_light_occluder_set_polylines(RID p_occluder, } +VS::InstanceType RasterizerStorageGLES3::get_base_type(RID p_rid) const { + + if (mesh_owner.owns(p_rid)) { + return VS::INSTANCE_MESH; + } + + return VS::INSTANCE_NONE; +} + bool RasterizerStorageGLES3::free(RID p_rid){ if (render_target_owner.owns(p_rid)) { @@ -2834,6 +3509,15 @@ bool RasterizerStorageGLES3::free(RID p_rid){ material_owner.free(p_rid); memdelete(material); + } else if (mesh_owner.owns(p_rid)) { + + // delete the texture + Mesh *mesh = mesh_owner.get(p_rid); + + mesh_clear(p_rid); + + mesh_owner.free(p_rid); + memdelete(mesh); } else if (canvas_occluder_owner.owns(p_rid)) { diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index b7b3e607c6a..950d65b9d06 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -10,12 +10,14 @@ #include "shader_compiler_gles3.h" class RasterizerCanvasGLES3; +class RasterizerSceneGLES3; class RasterizerStorageGLES3 : public RasterizerStorage { public: RasterizerCanvasGLES3 *canvas; + RasterizerSceneGLES3 *scene; enum FBOFormat { FBO_FORMAT_16_BITS, @@ -59,6 +61,7 @@ public: ShaderCompilerGLES3 compiler; ShaderCompilerGLES3::IdentifierActions actions_canvas; + ShaderCompilerGLES3::IdentifierActions actions_scene; } shaders; struct Resources { @@ -230,6 +233,40 @@ public: } canvas_item; + struct Spatial { + + enum BlendMode { + BLEND_MODE_MIX, + BLEND_MODE_ADD, + BLEND_MODE_SUB, + BLEND_MODE_MUL, + }; + + int blend_mode; + + enum DepthDrawMode { + DEPTH_DRAW_OPAQUE, + DEPTH_DRAW_ALWAYS, + DEPTH_DRAW_NEVER, + DEPTH_DRAW_ALPHA_PREPASS, + }; + + int depth_draw_mode; + + enum CullMode { + CULL_MODE_FRONT, + CULL_MODE_BACK, + CULL_MODE_DISABLED, + }; + + int cull_mode; + + bool uses_alpha; + bool unshaded; + bool ontop; + + } spatial; + Shader() : dirty_list(this) { shader=NULL; @@ -272,10 +309,14 @@ public: SelfList dirty_list; Vector textures; + uint32_t index; + uint64_t last_pass; + Material() : list(this), dirty_list(this) { shader=NULL; ubo_id=0; ubo_size=0; + last_pass=0; } }; @@ -300,9 +341,155 @@ public: /* MESH API */ + struct Instantiable : public RID_Data { + + enum Type { + GEOMETRY_INVALID, + GEOMETRY_SURFACE, + GEOMETRY_IMMEDIATE, + GEOMETRY_MULTISURFACE, + }; + + SelfList::List instance_list; + + _FORCE_INLINE_ void instance_change_notify() { + + SelfList *instances = instance_list.first(); + while(instances) { + + instances->self()->base_changed(); + instances=instances->next(); + } + } + + Instantiable() { } + virtual ~Instantiable() { + + while(instance_list.first()) { + instance_list.first()->self()->base_removed(); + } + } + }; + + struct Geometry : Instantiable { + + enum Type { + GEOMETRY_INVALID, + GEOMETRY_SURFACE, + GEOMETRY_IMMEDIATE, + GEOMETRY_MULTISURFACE, + }; + + Type type; + RID material; + uint64_t last_pass; + uint32_t index; + + Geometry() { + last_pass=0; + index=0; + } + + }; + + struct GeometryOwner : public Instantiable { + + virtual ~GeometryOwner() {} + }; + + struct Mesh; + struct Surface : public Geometry { + + struct Attrib { + + bool enabled; + GLuint index; + GLint size; + GLenum type; + GLboolean normalized; + GLsizei stride; + uint32_t offset; + }; + + Attrib attribs[VS::ARRAY_MAX]; + Attrib morph_attribs[VS::ARRAY_MAX]; + + + Mesh *mesh; + uint32_t format; + + GLuint array_id; + GLuint vertex_id; + GLuint index_id; + + Vector skeleton_bone_aabb; + Vector skeleton_bone_used; + + //bool packed; + + struct MorphTarget { + GLuint vertex_id; + GLuint array_id; + }; + + Vector morph_targets; + + AABB aabb; + + int array_len; + int index_array_len; + int max_bone; + + int array_bytes; + + + VS::PrimitiveType primitive; + + bool active; + + Surface() { + + array_bytes=0; + mesh=NULL; + format=0; + array_id=0; + vertex_id=0; + index_id=0; + array_len=0; + type=GEOMETRY_SURFACE; + primitive=VS::PRIMITIVE_POINTS; + index_array_len=0; + active=false; + + } + + ~Surface() { + + } + }; + + + struct Mesh : public GeometryOwner { + + bool active; + Vector surfaces; + int morph_target_count; + VS::MorphTargetMode morph_target_mode; + AABB custom_aabb; + mutable uint64_t last_pass; + Mesh() { + morph_target_mode=VS::MORPH_MODE_NORMALIZED; + morph_target_count=0; + last_pass=0; + active=false; + } + }; + + mutable RID_Owner mesh_owner; + virtual RID mesh_create(); - virtual void mesh_add_surface(RID p_mesh,uint32_t p_format,VS::PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes=Vector >()); + virtual void mesh_add_surface(RID p_mesh,uint32_t p_format,VS::PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const AABB& p_aabb,const Vector >& p_blend_shapes=Vector >(),const Vector& p_bone_aabbs=Vector()); virtual void mesh_set_morph_target_count(RID p_mesh,int p_amount); virtual int mesh_get_morph_target_count(RID p_mesh) const; @@ -324,13 +511,13 @@ public: virtual uint32_t mesh_surface_get_format(RID p_mesh, int p_surface) const; virtual VS::PrimitiveType mesh_surface_get_primitive_type(RID p_mesh, int p_surface) const; - virtual void mesh_remove_surface(RID p_mesh,int p_index); + virtual void mesh_remove_surface(RID p_mesh, int p_surface); virtual int mesh_get_surface_count(RID p_mesh) const; virtual void mesh_set_custom_aabb(RID p_mesh,const AABB& p_aabb); virtual AABB mesh_get_custom_aabb(RID p_mesh) const; - virtual AABB mesh_get_aabb(RID p_mesh) const; + virtual AABB mesh_get_aabb(RID p_mesh, RID p_skeleton) const; virtual void mesh_clear(RID p_mesh); /* MULTIMESH API */ @@ -401,6 +588,8 @@ public: virtual void light_directional_set_shadow_mode(RID p_light,VS::LightDirectionalShadowMode p_mode); + virtual VS::LightType light_get_type(RID p_light) const; + virtual AABB light_get_aabb(RID p_light) const; /* PROBE API */ virtual RID reflection_probe_create(); @@ -434,6 +623,9 @@ public: virtual void portal_set_disabled_color(RID p_portal, const Color& p_color); + virtual void instance_add_dependency(RID p_base,RasterizerScene::InstanceBase *p_instance); + virtual void instance_remove_dependency(RID p_base,RasterizerScene::InstanceBase *p_instance); + /* RENDER TARGET */ struct RenderTarget : public RID_Data { @@ -522,6 +714,8 @@ public: virtual RID canvas_light_occluder_create(); virtual void canvas_light_occluder_set_polylines(RID p_occluder, const DVector& p_lines); + virtual VS::InstanceType get_base_type(RID p_rid) const; + virtual bool free(RID p_rid); diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index 35191fecf79..ebdf60cf423 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -662,8 +662,8 @@ void ShaderGLES3::setup(const char** p_conditional_defines, int p_conditional_co fragment_code0=code.ascii(); } else { fragment_code0=code.substr(0,cpos).ascii(); - code = code.substr(cpos+globals_tag.length(),code.length()); - + //print_line("CODE0:\n"+String(fragment_code0.get_data())); + code = code.substr(cpos+globals_tag.length(),code.length()); cpos = code.find(material_tag); if (cpos==-1) { @@ -671,14 +671,18 @@ void ShaderGLES3::setup(const char** p_conditional_defines, int p_conditional_co } else { fragment_code1=code.substr(0,cpos).ascii(); - String code2 = code.substr(cpos+material_tag.length(),code.length()); + //print_line("CODE1:\n"+String(fragment_code1.get_data())); + String code2 = code.substr(cpos+material_tag.length(),code.length()); cpos = code2.find(code_tag); + if (cpos==-1) { fragment_code2=code2.ascii(); } else { fragment_code2=code2.substr(0,cpos).ascii(); + //print_line("CODE2:\n"+String(fragment_code2.get_data())); + String code3 = code2.substr(cpos+code_tag.length(),code2.length()); cpos = code3.find(light_code_tag); @@ -687,7 +691,9 @@ void ShaderGLES3::setup(const char** p_conditional_defines, int p_conditional_co } else { fragment_code3=code3.substr(0,cpos).ascii(); + // print_line("CODE3:\n"+String(fragment_code3.get_data())); fragment_code4 = code3.substr(cpos+light_code_tag.length(),code3.length()).ascii(); + //print_line("CODE4:\n"+String(fragment_code4.get_data())); } } } diff --git a/drivers/gles3/shaders/SCsub b/drivers/gles3/shaders/SCsub index 628fa14e4e3..0fa0e3b73a7 100644 --- a/drivers/gles3/shaders/SCsub +++ b/drivers/gles3/shaders/SCsub @@ -4,4 +4,5 @@ if env['BUILDERS'].has_key('GLES3_GLSL'): env.GLES3_GLSL('copy.glsl'); env.GLES3_GLSL('canvas.glsl'); env.GLES3_GLSL('canvas_shadow.glsl'); + env.GLES3_GLSL('scene.glsl'); diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl new file mode 100644 index 00000000000..4183e828f55 --- /dev/null +++ b/drivers/gles3/shaders/scene.glsl @@ -0,0 +1,351 @@ +[vertex] + + + +/* +from VisualServer: + +ARRAY_VERTEX=0, +ARRAY_NORMAL=1, +ARRAY_TANGENT=2, +ARRAY_COLOR=3, +ARRAY_TEX_UV=4, +ARRAY_TEX_UV2=5, +ARRAY_BONES=6, +ARRAY_WEIGHTS=7, +ARRAY_INDEX=8, +*/ + +//hack to use uv if no uv present so it works with lightmap + + +/* INPUT ATTRIBS */ + +layout(location=0) in highp vec4 vertex_attrib; +layout(location=1) in vec3 normal_attrib; +layout(location=2) in vec4 tangent_attrib; +layout(location=3) in vec4 color_attrib; +layout(location=4) in vec2 uv_attrib; +layout(location=5) in vec2 uv2_attrib; + +uniform float normal_mult; + +#ifdef USE_SKELETON +layout(location=6) mediump ivec4 bone_indices; // attrib:6 +layout(location=7) mediump vec4 bone_weights; // attrib:7 +uniform highp sampler2D skeleton_matrices; +#endif + +#ifdef USE_ATTRIBUTE_INSTANCING + +layout(location=8) in highp vec4 instance_xform0; +layout(location=9) in highp vec4 instance_xform1; +layout(location=10) in highp vec4 instance_xform2; +layout(location=11) in lowp vec4 instance_color; + +#endif + +layout(std140) uniform SceneData { //ubo:0 + + highp mat4 projection_matrix; + highp mat4 camera_inverse_matrix; + highp mat4 camera_matrix; + highp vec4 time; + + highp vec4 ambient_light; +}; + +uniform highp mat4 world_transform; + +/* Varyings */ + +out vec3 vertex_interp; +out vec3 normal_interp; + +#if defined(ENABLE_COLOR_INTERP) +out vec4 color_interp; +#endif + +#if defined(ENABLE_UV_INTERP) +out vec2 uv_interp; +#endif + +#if defined(ENABLE_UV2_INTERP) +out vec2 uv2_interp; +#endif + +#if defined(ENABLE_VAR1_INTERP) +out vec4 var1_interp; +#endif + +#if defined(ENABLE_VAR2_INTERP) +out vec4 var2_interp; +#endif + +#if defined(ENABLE_TANGENT_INTERP) +out vec3 tangent_interp; +out vec3 binormal_interp; +#endif + + +#if !defined(USE_DEPTH_SHADOWS) && defined(USE_SHADOW_PASS) + +varying vec4 position_interp; + +#endif + +#ifdef USE_SHADOW_PASS + +uniform highp float shadow_z_offset; +uniform highp float shadow_z_slope_scale; + +#endif + + +VERTEX_SHADER_GLOBALS + + +#if defined(USE_MATERIAL) + +layout(std140) uniform UniformData { //ubo:1 + +MATERIAL_UNIFORMS + +}; + +#endif + + +void main() { + + highp vec4 vertex_in = vertex_attrib; // vec4(vertex_attrib.xyz * data_attrib.x,1.0); + highp mat4 modelview = camera_inverse_matrix * world_transform; + vec3 normal_in = normal_attrib; + normal_in*=normal_mult; +#if defined(ENABLE_TANGENT_INTERP) + vec3 tangent_in = tangent_attrib.xyz; + tangent_in*=normal_mult; + float binormalf = tangent_attrib.a; +#endif + +#ifdef USE_SKELETON + + { + //skeleton transform + highp mat4 m=mat4(texture2D(skeleton_matrices,vec2((bone_indices.x*3.0+0.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.x*3.0+1.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.x*3.0+2.0)*skeltex_pixel_size,0.0)),vec4(0.0,0.0,0.0,1.0))*bone_weights.x; + m+=mat4(texture2D(skeleton_matrices,vec2((bone_indices.y*3.0+0.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.y*3.0+1.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.y*3.0+2.0)*skeltex_pixel_size,0.0)),vec4(0.0,0.0,0.0,1.0))*bone_weights.y; + m+=mat4(texture2D(skeleton_matrices,vec2((bone_indices.z*3.0+0.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.z*3.0+1.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.z*3.0+2.0)*skeltex_pixel_size,0.0)),vec4(0.0,0.0,0.0,1.0))*bone_weights.z; + m+=mat4(texture2D(skeleton_matrices,vec2((bone_indices.w*3.0+0.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.w*3.0+1.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.w*3.0+2.0)*skeltex_pixel_size,0.0)),vec4(0.0,0.0,0.0,1.0))*bone_weights.w; + + vertex_in = vertex_in * m; + normal_in = (vec4(normal_in,0.0) * m).xyz; +#if defined(ENABLE_TANGENT_INTERP) + tangent_in = (vec4(tangent_in,0.0) * m).xyz; +#endif + } + +#endif + + vertex_interp = (modelview * vertex_in).xyz; + normal_interp = normalize((modelview * vec4(normal_in,0.0)).xyz); + +#if defined(ENABLE_TANGENT_INTERP) + tangent_interp=normalize((modelview * vec4(tangent_in,0.0)).xyz); + binormal_interp = normalize( cross(normal_interp,tangent_interp) * binormalf ); +#endif + +#if defined(ENABLE_COLOR_INTERP) + color_interp = color_attrib; +#endif + +#if defined(ENABLE_UV_INTERP) + uv_interp = uv_attrib; +#endif +#if defined(ENABLE_UV2_INTERP) + uv2_interp = uv2_attrib; +#endif + + +VERTEX_SHADER_CODE + + +#ifdef USE_SHADOW_PASS + + float z_ofs = shadow_z_offset; + z_ofs += (1.0-abs(normal_interp.z))*shadow_z_slope_scale; + vertex_interp.z-=z_ofs; +#endif + + +#ifdef USE_FOG + + fog_interp.a = pow( clamp( (length(vertex_interp)-fog_params.x)/(fog_params.y-fog_params.x), 0.0, 1.0 ), fog_params.z ); + fog_interp.rgb = mix( fog_color_begin, fog_color_end, fog_interp.a ); +#endif + +#ifndef VERTEX_SHADER_WRITE_POSITION +//vertex shader might write a position + gl_Position = projection_matrix * vec4(vertex_interp,1.0); +#endif + + + + +} + + +[fragment] + + +//hack to use uv if no uv present so it works with lightmap + + +/* Varyings */ + +#if defined(ENABLE_COLOR_INTERP) +in vec4 color_interp; +#endif + +#if defined(ENABLE_UV_INTERP) +in vec2 uv_interp; +#endif + +#if defined(ENABLE_UV2_INTERP) +in vec2 uv2_interp; +#endif + +#if defined(ENABLE_TANGENT_INTERP) +in vec3 tangent_interp; +in vec3 binormal_interp; +#endif + +#if defined(ENABLE_VAR1_INTERP) +in vec4 var1_interp; +#endif + +#if defined(ENABLE_VAR2_INTERP) +in vec4 var2_interp; +#endif + +in vec3 vertex_interp; +in vec3 normal_interp; + + +/* Material Uniforms */ + + +FRAGMENT_SHADER_GLOBALS + + +#if defined(USE_MATERIAL) + +layout(std140) uniform UniformData { + +MATERIAL_UNIFORMS + +}; + +#endif + + +layout(std140) uniform SceneData { + + highp mat4 projection_matrix; + highp mat4 camera_inverse_matrix; + highp mat4 camera_matrix; + highp vec4 time; + + highp vec4 ambient_light; +}; + +layout(location=0) out vec4 frag_color; + +void main() { + + //lay out everything, whathever is unused is optimized away anyway + vec3 vertex = vertex_interp; + vec3 albedo = vec3(0.9,0.9,0.9); + vec3 metal = vec3(0.0,0.0,0.0); + float rough = 0.0; + float alpha = 1.0; + +#ifdef METERIAL_DOUBLESIDED + float side=float(gl_FrontFacing)*2.0-1.0; +#else + float side=1.0; +#endif + + +#if defined(ENABLE_TANGENT_INTERP) + vec3 binormal = normalize(binormal_interp)*side; + vec3 tangent = normalize(tangent_interp)*side; +#endif + vec3 normal = normalize(normal_interp)*side; + +#if defined(ENABLE_UV_INTERP) + vec2 uv = uv_interp; +#endif + +#if defined(ENABLE_UV2_INTERP) + vec2 uv2 = uv2_interp; +#endif + +#if defined(ENABLE_COLOR_INTERP) + vec4 color = color_interp; +#endif + +#if defined(ENABLE_NORMALMAP) + + vec3 normalmap = vec3(0.0); +#endif + + float normaldepth=1.0; + + + +#if defined(ENABLE_DISCARD) + bool discard_=false; +#endif + +{ + + +FRAGMENT_SHADER_CODE + +} + +#if defined(ENABLE_NORMALMAP) + + normal = normalize( mix(normal_interp,tangent_interp * normalmap.x + binormal_interp * normalmap.y + normal_interp * normalmap.z,normaldepth) ) * side; + +#endif + +#if defined(ENABLE_DISCARD) + if (discard_) { + //easy to eliminate dead code + discard; + } +#endif + +#ifdef ENABLE_CLIP_ALPHA + if (diffuse.a<0.99) { + //used for doublepass and shadowmapping + discard; + } +#endif + + + +#if defined(USE_LIGHT_SHADER_CODE) +//light is written by the light shader +{ + +LIGHT_SHADER_CODE + +} +#endif + + frag_color=vec4(albedo,alpha); +} + + diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h index 865298f64ec..d3f27687caf 100644 --- a/servers/visual/rasterizer.h +++ b/servers/visual/rasterizer.h @@ -33,6 +33,75 @@ #include "servers/visual_server.h" #include "camera_matrix.h" +#include "self_list.h" + + +class RasterizerScene { +public: + + + struct InstanceBase : RID_Data { + + VS::InstanceType base_type; + RID base; + + RID skeleton; + RID material_override; + + Transform transform; + + int depth_layer; + + //RID sampled_light; + + Vector materials; + Vector light_instances; + + Vector morph_values; + + //BakedLightData *baked_light; + VS::ShadowCastingSetting cast_shadows; + //Transform *baked_light_octree_xform; + //int baked_lightmap_id; + + bool mirror :8; + bool depth_scale :8; + bool billboard :8; + bool billboard_y :8; + bool receive_shadows : 8; + + SelfList dependency_item; + + virtual void base_removed()=0; + virtual void base_changed()=0; + + InstanceBase() : dependency_item(this) { + + base_type=VS::INSTANCE_NONE; + cast_shadows=VS::SHADOW_CASTING_SETTING_ON; + receive_shadows=true; + depth_scale=false; + billboard=false; + billboard_y=false; + depth_layer=0; + + } + }; + + virtual RID light_instance_create(RID p_light)=0; + virtual void light_instance_set_transform(RID p_light_instance,const Transform& p_transform)=0; + + virtual void render_scene(const Transform& p_cam_transform,CameraMatrix& p_cam_projection,bool p_cam_ortogonal,InstanceBase** p_cull_result,int p_cull_count,RID* p_light_cull_result,int p_light_cull_count,RID* p_directional_lights,int p_directional_light_count,RID p_environment)=0; + + virtual bool free(RID p_rid)=0; + + virtual ~RasterizerScene() {} +}; + + + + + class RasterizerStorage { @@ -88,7 +157,7 @@ public: virtual RID mesh_create()=0; - virtual void mesh_add_surface(RID p_mesh,uint32_t p_format,VS::PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes=Vector >())=0; + virtual void mesh_add_surface(RID p_mesh,uint32_t p_format,VS::PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const AABB& p_aabb,const Vector >& p_blend_shapes=Vector >(),const Vector& p_bone_aabbs=Vector())=0; virtual void mesh_set_morph_target_count(RID p_mesh,int p_amount)=0; virtual int mesh_get_morph_target_count(RID p_mesh) const=0; @@ -116,7 +185,7 @@ public: virtual void mesh_set_custom_aabb(RID p_mesh,const AABB& p_aabb)=0; virtual AABB mesh_get_custom_aabb(RID p_mesh) const=0; - virtual AABB mesh_get_aabb(RID p_mesh) const=0; + virtual AABB mesh_get_aabb(RID p_mesh, RID p_skeleton) const=0; virtual void mesh_clear(RID p_mesh)=0; /* MULTIMESH API */ @@ -184,9 +253,11 @@ public: virtual void light_set_cull_mask(RID p_light,uint32_t p_mask)=0; virtual void light_set_shader(RID p_light,RID p_shader)=0; - virtual void light_directional_set_shadow_mode(RID p_light,VS::LightDirectionalShadowMode p_mode)=0; + virtual VS::LightType light_get_type(RID p_light) const=0; + virtual AABB light_get_aabb(RID p_light) const=0; + /* PROBE API */ virtual RID reflection_probe_create()=0; @@ -220,6 +291,10 @@ public: virtual void portal_set_disabled_color(RID p_portal, const Color& p_color)=0; + + virtual void instance_add_dependency(RID p_base,RasterizerScene::InstanceBase *p_instance)=0; + virtual void instance_remove_dependency(RID p_base,RasterizerScene::InstanceBase *p_instance)=0; + /* RENDER TARGET */ enum RenderTargetFlags { @@ -246,6 +321,8 @@ public: virtual RID canvas_light_occluder_create()=0; virtual void canvas_light_occluder_set_polylines(RID p_occluder, const DVector& p_lines)=0; + + virtual VS::InstanceType get_base_type(RID p_rid) const=0; virtual bool free(RID p_rid)=0; @@ -257,6 +334,7 @@ public: + class RasterizerCanvas { public: @@ -563,7 +641,7 @@ public: case Item::Command::TYPE_MESH: { const Item::CommandMesh* mesh = static_cast< const Item::CommandMesh*>(c); - AABB aabb = RasterizerStorage::base_signleton->mesh_get_aabb(mesh->mesh); + AABB aabb = RasterizerStorage::base_signleton->mesh_get_aabb(mesh->mesh,mesh->skeleton); r=Rect2(aabb.pos.x,aabb.pos.y,aabb.size.x,aabb.size.y); @@ -654,17 +732,6 @@ public: }; - -class RasterizerScene { -public: - - - - virtual ~RasterizerScene() {} -}; - - - class Rasterizer { protected: static Rasterizer* (*_create_func)(); @@ -689,8 +756,6 @@ public: }; - - #if 0 /** @author Juan Linietsky diff --git a/servers/visual/shader_types.cpp b/servers/visual/shader_types.cpp index 1a01a19021b..767d11bc845 100644 --- a/servers/visual/shader_types.cpp +++ b/servers/visual/shader_types.cpp @@ -58,8 +58,8 @@ ShaderTypes::ShaderTypes() shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["NORMAL"]=ShaderLanguage::TYPE_VEC3; shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["ALBEDO"]=ShaderLanguage::TYPE_VEC3; shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["ALPHA"]=ShaderLanguage::TYPE_FLOAT; - shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["METAL"]=ShaderLanguage::TYPE_FLOAT; - shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["ROUGH"]=ShaderLanguage::TYPE_FLOAT; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["SPECULAR"]=ShaderLanguage::TYPE_VEC3; + shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["ROUGHNESS"]=ShaderLanguage::TYPE_FLOAT; shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["EMISSION"]=ShaderLanguage::TYPE_VEC3; shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["SPECIAL"]=ShaderLanguage::TYPE_FLOAT; shader_modes[VS::SHADER_SPATIAL].functions["fragment"]["DISCARD"]=ShaderLanguage::TYPE_BOOL; @@ -77,10 +77,6 @@ ShaderTypes::ShaderTypes() shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_sub"); shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_mul"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("special_glow"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("special_subsurf"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("special_specular"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("depth_draw_opaque"); shader_modes[VS::SHADER_SPATIAL].modes.insert("depth_draw_always"); shader_modes[VS::SHADER_SPATIAL].modes.insert("depth_draw_never"); @@ -90,12 +86,10 @@ ShaderTypes::ShaderTypes() shader_modes[VS::SHADER_SPATIAL].modes.insert("cull_back"); shader_modes[VS::SHADER_SPATIAL].modes.insert("cull_disable"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("lightmap_on_uv2"); shader_modes[VS::SHADER_SPATIAL].modes.insert("unshaded"); shader_modes[VS::SHADER_SPATIAL].modes.insert("ontop"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("vertex_model_space"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("vertex_camera_space"); + shader_modes[VS::SHADER_SPATIAL].modes.insert("skip_transform"); /************ CANVAS ITEM **************************/ @@ -158,4 +152,5 @@ ShaderTypes::ShaderTypes() + } diff --git a/servers/visual/visual_server_raster.cpp b/servers/visual/visual_server_raster.cpp index d6e057bb520..70abfe2361a 100644 --- a/servers/visual/visual_server_raster.cpp +++ b/servers/visual/visual_server_raster.cpp @@ -34,183 +34,12 @@ #include "io/marshalls.h" #include "visual_server_canvas.h" #include "visual_server_global.h" +#include "visual_server_scene.h" // careful, these may run in different threads than the visual server -/* CAMERA API */ - -RID VisualServerRaster::camera_create() { - - return RID(); -} -void VisualServerRaster::camera_set_perspective(RID p_camera,float p_fovy_degrees, float p_z_near, float p_z_far) { - -} -void VisualServerRaster::camera_set_orthogonal(RID p_camera,float p_size, float p_z_near, float p_z_far){ - -} -void VisualServerRaster::camera_set_transform(RID p_camera,const Transform& p_transform) { - -} -void VisualServerRaster::camera_set_cull_mask(RID p_camera,uint32_t p_layers){ - -} -void VisualServerRaster::camera_set_environment(RID p_camera,RID p_env){ - -} -void VisualServerRaster::camera_set_use_vertical_aspect(RID p_camera,bool p_enable){ - -} - - -/* ENVIRONMENT API */ - -RID VisualServerRaster::environment_create(){ - - return RID(); -} - -void VisualServerRaster::environment_set_background(RID p_env,EnvironmentBG p_bg){ - -} -void VisualServerRaster::environment_set_skybox(RID p_env,RID p_skybox,float p_energy){ - -} -void VisualServerRaster::environment_set_bg_color(RID p_env,const Color& p_color){ - -} -void VisualServerRaster::environment_set_canvas_max_layer(RID p_env,int p_max_layer){ - -} -void VisualServerRaster::environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy){ - -} - -void VisualServerRaster::environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,EnvironmentGlowBlendMode p_blend_mode){ - -} -void VisualServerRaster::environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture){ - -} - -void VisualServerRaster::environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,EnvironmentToneMapper p_tone_mapper){ - -} -void VisualServerRaster::environment_set_brightness(RID p_env,bool p_enable,float p_brightness){ - -} -void VisualServerRaster::environment_set_contrast(RID p_env,bool p_enable,float p_contrast){ - -} -void VisualServerRaster::environment_set_saturation(RID p_env,bool p_enable,float p_saturation){ - -} -void VisualServerRaster::environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp){ - -} - - -/* SCENARIO API */ - - -RID VisualServerRaster::scenario_create() { - - return RID(); -} - -void VisualServerRaster::scenario_set_debug(RID p_scenario,ScenarioDebugMode p_debug_mode){ - -} -void VisualServerRaster::scenario_set_environment(RID p_scenario, RID p_environment){ - -} -RID VisualServerRaster::scenario_get_environment(RID p_scenario, RID p_environment) const{ - - return RID(); -} -void VisualServerRaster::scenario_set_fallback_environment(RID p_scenario, RID p_environment){ - -} - - -/* INSTANCING API */ -// from can be mesh, light, area and portal so far. -RID VisualServerRaster::instance_create(){ - - return RID(); -} - -void VisualServerRaster::instance_set_base(RID p_instance, RID p_base){ - -} -void VisualServerRaster::instance_set_scenario(RID p_instance, RID p_scenario){ - -} -void VisualServerRaster::instance_set_layer_mask(RID p_instance, uint32_t p_mask){ - -} -void VisualServerRaster::instance_set_transform(RID p_instance, const Transform& p_transform){ - -} -void VisualServerRaster::instance_attach_object_instance_ID(RID p_instance,ObjectID p_ID){ - -} -void VisualServerRaster::instance_set_morph_target_weight(RID p_instance,int p_shape, float p_weight){ - -} -void VisualServerRaster::instance_set_surface_material(RID p_instance,int p_surface, RID p_material){ - -} - -void VisualServerRaster::instance_attach_skeleton(RID p_instance,RID p_skeleton){ - -} -void VisualServerRaster::instance_set_exterior( RID p_instance, bool p_enabled ){ - -} -void VisualServerRaster::instance_set_room( RID p_instance, RID p_room ){ - -} - -void VisualServerRaster::instance_set_extra_visibility_margin( RID p_instance, real_t p_margin ){ - -} - -// don't use these in a game! -Vector VisualServerRaster::instances_cull_aabb(const AABB& p_aabb, RID p_scenario) const{ - - return Vector(); -} - -Vector VisualServerRaster::instances_cull_ray(const Vector3& p_from, const Vector3& p_to, RID p_scenario) const{ - - return Vector(); -} -Vector VisualServerRaster::instances_cull_convex(const Vector& p_convex, RID p_scenario) const { - - return Vector(); -} - - -void VisualServerRaster::instance_geometry_set_flag(RID p_instance,InstanceFlags p_flags,bool p_enabled){ - -} -void VisualServerRaster::instance_geometry_set_cast_shadows_setting(RID p_instance, ShadowCastingSetting p_shadow_casting_setting) { - -} -void VisualServerRaster::instance_geometry_set_material_override(RID p_instance, RID p_material){ - -} - - -void VisualServerRaster::instance_geometry_set_draw_range(RID p_instance,float p_min,float p_max,float p_min_margin,float p_max_margin){ - -} -void VisualServerRaster::instance_geometry_set_as_instance_lod(RID p_instance,RID p_as_lod_of_instance){ - -} /* CURSOR */ void VisualServerRaster::cursor_set_rotation(float p_rotation, int p_cursor ){ @@ -247,6 +76,8 @@ void VisualServerRaster::free( RID p_rid ){ return; if (VSG::viewport->free(p_rid)) return; + if (VSG::scene->free(p_rid)) + return; } @@ -258,6 +89,9 @@ void VisualServerRaster::draw(){ // print_line("changes: "+itos(changes)); changes=0; + + VSG::scene->update_dirty_instances(); //update scene stuff + VSG::rasterizer->begin_frame(); VSG::viewport->draw_viewports(); //_draw_cursors_and_margins(); @@ -322,6 +156,7 @@ VisualServerRaster::VisualServerRaster() { VSG::canvas = memnew( VisualServerCanvas); VSG::viewport = memnew( VisualServerViewport); + VSG::scene = memnew( VisualServerScene ); VSG::rasterizer = Rasterizer::create(); VSG::storage=VSG::rasterizer->get_storage(); VSG::canvas_render=VSG::rasterizer->get_canvas(); diff --git a/servers/visual/visual_server_raster.h b/servers/visual/visual_server_raster.h index 2171998f3fe..62d45f8206d 100644 --- a/servers/visual/visual_server_raster.h +++ b/servers/visual/visual_server_raster.h @@ -37,6 +37,7 @@ #include "visual_server_global.h" #include "visual_server_viewport.h" #include "visual_server_canvas.h" +#include "visual_server_scene.h" /** @author Juan Linietsky */ @@ -600,6 +601,7 @@ public: #define BIND6(m_name,m_type1,m_type2,m_type3,m_type4,m_type5,m_type6) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4,m_type5 arg5,m_type6 arg6) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3,arg4,arg5,arg6); } #define BIND7(m_name,m_type1,m_type2,m_type3,m_type4,m_type5,m_type6,m_type7) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4,m_type5 arg5,m_type6 arg6,m_type7 arg7) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3,arg4,arg5,arg6,arg7); } #define BIND8(m_name,m_type1,m_type2,m_type3,m_type4,m_type5,m_type6,m_type7,m_type8) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4,m_type5 arg5,m_type6 arg6,m_type7 arg7,m_type8 arg8) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8); } +#define BIND9(m_name,m_type1,m_type2,m_type3,m_type4,m_type5,m_type6,m_type7,m_type8,m_type9) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4,m_type5 arg5,m_type6 arg6,m_type7 arg7,m_type8 arg8,m_type9 arg9) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9); } #define BIND10(m_name,m_type1,m_type2,m_type3,m_type4,m_type5,m_type6,m_type7,m_type8,m_type9,m_type10) void m_name(m_type1 arg1,m_type2 arg2,m_type3 arg3,m_type4 arg4,m_type5 arg5,m_type6 arg6,m_type7 arg7,m_type8 arg8,m_type9 arg9,m_type10 arg10) { DISPLAY_CHANGED BINDBASE->m_name(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10); } //from now on, calls forwarded to this singleton @@ -659,7 +661,7 @@ public: BIND0R(RID,mesh_create) - BIND8(mesh_add_surface,RID,uint32_t,PrimitiveType,const DVector&,int ,const DVector& ,int ,const Vector >& ) + BIND10(mesh_add_surface,RID,uint32_t,PrimitiveType,const DVector&,int ,const DVector& ,int ,const AABB&,const Vector >&,const Vector& ) BIND2(mesh_set_morph_target_count,RID,int) BIND1RC(int,mesh_get_morph_target_count,RID) @@ -788,13 +790,18 @@ public: /* CAMERA API */ - virtual RID camera_create(); - virtual void camera_set_perspective(RID p_camera,float p_fovy_degrees, float p_z_near, float p_z_far); - virtual void camera_set_orthogonal(RID p_camera,float p_size, float p_z_near, float p_z_far); - virtual void camera_set_transform(RID p_camera,const Transform& p_transform); - virtual void camera_set_cull_mask(RID p_camera,uint32_t p_layers); - virtual void camera_set_environment(RID p_camera,RID p_env); - virtual void camera_set_use_vertical_aspect(RID p_camera,bool p_enable); +#undef BINDBASE +//from now on, calls forwarded to this singleton +#define BINDBASE VSG::scene + + + BIND0R(RID, camera_create) + BIND4(camera_set_perspective,RID,float, float , float ) + BIND4(camera_set_orthogonal,RID,float , float , float ) + BIND2(camera_set_transform,RID,const Transform&) + BIND2(camera_set_cull_mask,RID,uint32_t ) + BIND2(camera_set_environment,RID ,RID ) + BIND2(camera_set_use_vertical_aspect,RID,bool) #undef BINDBASE //from now on, calls forwarded to this singleton @@ -839,66 +846,69 @@ public: /* ENVIRONMENT API */ - virtual RID environment_create(); +#undef BINDBASE +//from now on, calls forwarded to this singleton +#define BINDBASE VSG::scene - virtual void environment_set_background(RID p_env,EnvironmentBG p_bg); - virtual void environment_set_skybox(RID p_env,RID p_skybox,float p_energy=1.0); - virtual void environment_set_bg_color(RID p_env,const Color& p_color); - virtual void environment_set_canvas_max_layer(RID p_env,int p_max_layer); - virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0); + BIND0R(RID,environment_create) - virtual void environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,EnvironmentGlowBlendMode p_blend_mode); - virtual void environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture); + BIND2(environment_set_background,RID ,EnvironmentBG ) + BIND3(environment_set_skybox,RID,RID ,float ) + BIND2(environment_set_bg_color,RID,const Color& ) + BIND2(environment_set_canvas_max_layer,RID,int ) + BIND3(environment_set_ambient_light,RID,const Color& ,float ) - virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,EnvironmentToneMapper p_tone_mapper); - virtual void environment_set_brightness(RID p_env,bool p_enable,float p_brightness); - virtual void environment_set_contrast(RID p_env,bool p_enable,float p_contrast); - virtual void environment_set_saturation(RID p_env,bool p_enable,float p_saturation); - virtual void environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp); + BIND7(environment_set_glow,RID,bool ,int ,float ,float ,float ,EnvironmentGlowBlendMode ) + BIND5(environment_set_fog,RID,bool ,float ,float ,RID ) + + BIND8(environment_set_tonemap,RID,bool ,float ,float ,float ,float ,float ,EnvironmentToneMapper ) + BIND3(environment_set_brightness,RID,bool ,float ) + BIND3(environment_set_contrast,RID,bool ,float ) + BIND3(environment_set_saturation,RID,bool ,float ) + BIND3(environment_set_color_correction,RID,bool ,RID ) /* SCENARIO API */ - virtual RID scenario_create(); + BIND0R(RID,scenario_create) - virtual void scenario_set_debug(RID p_scenario,ScenarioDebugMode p_debug_mode); - virtual void scenario_set_environment(RID p_scenario, RID p_environment); - virtual RID scenario_get_environment(RID p_scenario, RID p_environment) const; - virtual void scenario_set_fallback_environment(RID p_scenario, RID p_environment); + BIND2(scenario_set_debug,RID,ScenarioDebugMode ) + BIND2(scenario_set_environment,RID, RID ) + BIND2(scenario_set_fallback_environment,RID, RID ) /* INSTANCING API */ // from can be mesh, light, area and portal so far. - virtual RID instance_create(); // from can be mesh, light, poly, area and portal so far. + BIND0R(RID,instance_create) - virtual void instance_set_base(RID p_instance, RID p_base); // from can be mesh, light, poly, area and portal so far. - virtual void instance_set_scenario(RID p_instance, RID p_scenario); // from can be mesh, light, poly, area and portal so far. - virtual void instance_set_layer_mask(RID p_instance, uint32_t p_mask); - virtual void instance_set_transform(RID p_instance, const Transform& p_transform); - virtual void instance_attach_object_instance_ID(RID p_instance,ObjectID p_ID); - virtual void instance_set_morph_target_weight(RID p_instance,int p_shape, float p_weight); - virtual void instance_set_surface_material(RID p_instance,int p_surface, RID p_material); + BIND2(instance_set_base,RID, RID ) // from can be mesh, light, poly, area and portal so far. + BIND2(instance_set_scenario,RID, RID ) // from can be mesh, light, poly, area and portal so far. + BIND2(instance_set_layer_mask,RID, uint32_t ) + BIND2(instance_set_transform,RID, const Transform& ) + BIND2(instance_attach_object_instance_ID,RID,ObjectID ) + BIND3(instance_set_morph_target_weight,RID,int , float ) + BIND3(instance_set_surface_material,RID,int , RID ) - virtual void instance_attach_skeleton(RID p_instance,RID p_skeleton); - virtual void instance_set_exterior( RID p_instance, bool p_enabled ); - virtual void instance_set_room( RID p_instance, RID p_room ); + BIND2(instance_attach_skeleton,RID,RID ) + BIND2(instance_set_exterior, RID, bool ) + BIND2(instance_set_room, RID, RID ) - virtual void instance_set_extra_visibility_margin( RID p_instance, real_t p_margin ); + BIND2(instance_set_extra_visibility_margin, RID, real_t ) // don't use these in a game! - virtual Vector instances_cull_aabb(const AABB& p_aabb, RID p_scenario=RID()) const; - virtual Vector instances_cull_ray(const Vector3& p_from, const Vector3& p_to, RID p_scenario=RID()) const; - virtual Vector instances_cull_convex(const Vector& p_convex, RID p_scenario=RID()) const; + BIND2RC(Vector,instances_cull_aabb,const AABB& , RID) + BIND3RC(Vector,instances_cull_ray,const Vector3& , const Vector3& , RID ) + BIND2RC(Vector,instances_cull_convex,const Vector& , RID) - virtual void instance_geometry_set_flag(RID p_instance,InstanceFlags p_flags,bool p_enabled); - virtual void instance_geometry_set_cast_shadows_setting(RID p_instance, ShadowCastingSetting p_shadow_casting_setting); - virtual void instance_geometry_set_material_override(RID p_instance, RID p_material); + BIND3(instance_geometry_set_flag,RID,InstanceFlags ,bool ) + BIND2(instance_geometry_set_cast_shadows_setting,RID, ShadowCastingSetting ) + BIND2(instance_geometry_set_material_override,RID, RID ) - virtual void instance_geometry_set_draw_range(RID p_instance,float p_min,float p_max,float p_min_margin,float p_max_margin); - virtual void instance_geometry_set_as_instance_lod(RID p_instance,RID p_as_lod_of_instance); + BIND5(instance_geometry_set_draw_range,RID,float ,float ,float ,float ) + BIND2(instance_geometry_set_as_instance_lod,RID,RID ) #undef BINDBASE @@ -1055,6 +1065,7 @@ public: #undef BIND6 #undef BIND7 #undef BIND8 +#undef BIND9 #undef BIND10 }; diff --git a/servers/visual/visual_server_scene.cpp b/servers/visual/visual_server_scene.cpp new file mode 100644 index 00000000000..a3b1d76fb8e --- /dev/null +++ b/servers/visual/visual_server_scene.cpp @@ -0,0 +1,1517 @@ +#include "visual_server_scene.h" +#include "visual_server_global.h" + +/* CAMERA API */ + + +RID VisualServerScene::camera_create() { + + Camera * camera = memnew( Camera ); + return camera_owner.make_rid( camera ); + +} + +void VisualServerScene::camera_set_perspective(RID p_camera,float p_fovy_degrees, float p_z_near, float p_z_far) { + + Camera *camera = camera_owner.get( p_camera ); + ERR_FAIL_COND(!camera); + camera->type=Camera::PERSPECTIVE; + camera->fov=p_fovy_degrees; + camera->znear=p_z_near; + camera->zfar=p_z_far; + +} + +void VisualServerScene::camera_set_orthogonal(RID p_camera,float p_size, float p_z_near, float p_z_far) { + + Camera *camera = camera_owner.get( p_camera ); + ERR_FAIL_COND(!camera); + camera->type=Camera::ORTHOGONAL; + camera->size=p_size; + camera->znear=p_z_near; + camera->zfar=p_z_far; +} + +void VisualServerScene::camera_set_transform(RID p_camera,const Transform& p_transform) { + + Camera *camera = camera_owner.get( p_camera ); + ERR_FAIL_COND(!camera); + camera->transform=p_transform.orthonormalized(); + + +} + +void VisualServerScene::camera_set_cull_mask(RID p_camera,uint32_t p_layers) { + + + Camera *camera = camera_owner.get( p_camera ); + ERR_FAIL_COND(!camera); + + camera->visible_layers=p_layers; + +} + +void VisualServerScene::camera_set_environment(RID p_camera,RID p_env) { + + Camera *camera = camera_owner.get( p_camera ); + ERR_FAIL_COND(!camera); + camera->env=p_env; + +} + + +void VisualServerScene::camera_set_use_vertical_aspect(RID p_camera,bool p_enable) { + + Camera *camera = camera_owner.get( p_camera ); + ERR_FAIL_COND(!camera); + camera->vaspect=p_enable; + +} + + + +/* ENVIRONMENT API */ + +RID VisualServerScene::environment_create(){ + + return RID(); +} + +void VisualServerScene::environment_set_background(RID p_env,VS::EnvironmentBG p_bg){ + +} +void VisualServerScene::environment_set_skybox(RID p_env,RID p_skybox,float p_energy){ + +} +void VisualServerScene::environment_set_bg_color(RID p_env,const Color& p_color){ + +} +void VisualServerScene::environment_set_canvas_max_layer(RID p_env,int p_max_layer){ + +} +void VisualServerScene::environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy){ + +} + +void VisualServerScene::environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,VS::EnvironmentGlowBlendMode p_blend_mode){ + +} +void VisualServerScene::environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture){ + +} + +void VisualServerScene::environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,VS::EnvironmentToneMapper p_tone_mapper){ + +} +void VisualServerScene::environment_set_brightness(RID p_env,bool p_enable,float p_brightness){ + +} +void VisualServerScene::environment_set_contrast(RID p_env,bool p_enable,float p_contrast){ + +} +void VisualServerScene::environment_set_saturation(RID p_env,bool p_enable,float p_saturation){ + +} +void VisualServerScene::environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp){ + +} + + +/* SCENARIO API */ + + + +void* VisualServerScene::_instance_pair(void *p_self, OctreeElementID, Instance *p_A,int, OctreeElementID, Instance *p_B,int) { + +// VisualServerScene *self = (VisualServerScene*)p_self; + Instance *A = p_A; + Instance *B = p_B; + + //instance indices are designed so greater always contains lesser + if (A->base_type > B->base_type) { + SWAP(A,B); //lesser always first + } + + if (B->base_type==VS::INSTANCE_LIGHT && (1<base_type)&VS::INSTANCE_GEOMETRY_MASK) { + + InstanceLightData * light = static_cast(B->base_data); + InstanceGeometryData * geom = static_cast(A->base_data); + + + InstanceLightData::PairInfo pinfo; + pinfo.geometry=A; + pinfo.L = geom->lighting.push_back(B); + + List::Element *E = light->geometries.push_back(pinfo); + + light->shadow_sirty=true; + geom->lighting_dirty=true; + + return E; //this element should make freeing faster + } + +#if 0 + if (A->base_type==INSTANCE_PORTAL) { + + ERR_FAIL_COND_V( B->base_type!=INSTANCE_PORTAL,NULL ); + + A->portal_info->candidate_set.insert(B); + B->portal_info->candidate_set.insert(A); + + self->_portal_attempt_connect(A); + //attempt to conncet portal A (will go through B anyway) + //this is a little hackish, but works fine in practice + + } else if (A->base_type==INSTANCE_BAKED_LIGHT || B->base_type==INSTANCE_BAKED_LIGHT) { + + if (B->base_type==INSTANCE_BAKED_LIGHT) { + SWAP(A,B); + } + + ERR_FAIL_COND_V(B->base_type!=INSTANCE_BAKED_LIGHT_SAMPLER,NULL); + B->baked_light_sampler_info->baked_lights.insert(A); + + } else if (A->base_type==INSTANCE_ROOM || B->base_type==INSTANCE_ROOM) { + + if (B->base_type==INSTANCE_ROOM) + SWAP(A,B); + + ERR_FAIL_COND_V(! ((1<base_type)&INSTANCE_GEOMETRY_MASK ),NULL); + + B->auto_rooms.insert(A); + A->room_info->owned_autoroom_geometry.insert(B); + + self->_instance_validate_autorooms(B); + + + } else { + + if (B->base_type==INSTANCE_LIGHT) { + + SWAP(A,B); + } else if (A->base_type!=INSTANCE_LIGHT) { + return NULL; + } + + + A->light_info->affected.insert(B); + B->lights.insert(A); + B->light_cache_dirty=true; + + + } +#endif + + return NULL; + +} +void VisualServerScene::_instance_unpair(void *p_self, OctreeElementID, Instance *p_A,int, OctreeElementID, Instance *p_B,int,void* udata) { + +// VisualServerScene *self = (VisualServerScene*)p_self; + Instance *A = p_A; + Instance *B = p_B; + + //instance indices are designed so greater always contains lesser + if (A->base_type > B->base_type) { + SWAP(A,B); //lesser always first + } + + + + if (B->base_type==VS::INSTANCE_LIGHT && (1<base_type)&VS::INSTANCE_GEOMETRY_MASK) { + + InstanceLightData * light = static_cast(B->base_data); + InstanceGeometryData * geom = static_cast(A->base_data); + + List::Element *E = reinterpret_cast::Element*>(udata); + + geom->lighting.erase(E->get().L); + light->geometries.erase(E); + + light->shadow_sirty=true; + geom->lighting_dirty=true; + + + } +#if 0 + if (A->base_type==INSTANCE_PORTAL) { + + ERR_FAIL_COND( B->base_type!=INSTANCE_PORTAL ); + + + A->portal_info->candidate_set.erase(B); + B->portal_info->candidate_set.erase(A); + + //after disconnecting them, see if they can connect again + self->_portal_attempt_connect(A); + self->_portal_attempt_connect(B); + + } else if (A->base_type==INSTANCE_BAKED_LIGHT || B->base_type==INSTANCE_BAKED_LIGHT) { + + if (B->base_type==INSTANCE_BAKED_LIGHT) { + SWAP(A,B); + } + + ERR_FAIL_COND(B->base_type!=INSTANCE_BAKED_LIGHT_SAMPLER); + B->baked_light_sampler_info->baked_lights.erase(A); + + } else if (A->base_type==INSTANCE_ROOM || B->base_type==INSTANCE_ROOM) { + + if (B->base_type==INSTANCE_ROOM) + SWAP(A,B); + + ERR_FAIL_COND(! ((1<base_type)&INSTANCE_GEOMETRY_MASK )); + + B->auto_rooms.erase(A); + B->valid_auto_rooms.erase(A); + A->room_info->owned_autoroom_geometry.erase(B); + + }else { + + + + if (B->base_type==INSTANCE_LIGHT) { + + SWAP(A,B); + } else if (A->base_type!=INSTANCE_LIGHT) { + return; + } + + + A->light_info->affected.erase(B); + B->lights.erase(A); + B->light_cache_dirty=true; + + } +#endif +} + +RID VisualServerScene::scenario_create() { + + Scenario *scenario = memnew( Scenario ); + ERR_FAIL_COND_V(!scenario,RID()); + RID scenario_rid = scenario_owner.make_rid( scenario ); + scenario->self=scenario_rid; + + scenario->octree.set_pair_callback(_instance_pair,this); + scenario->octree.set_unpair_callback(_instance_unpair,this); + + return scenario_rid; +} + +void VisualServerScene::scenario_set_debug(RID p_scenario,VS::ScenarioDebugMode p_debug_mode) { + + Scenario *scenario = scenario_owner.get(p_scenario); + ERR_FAIL_COND(!scenario); + scenario->debug=p_debug_mode; +} + +void VisualServerScene::scenario_set_environment(RID p_scenario, RID p_environment) { + + Scenario *scenario = scenario_owner.get(p_scenario); + ERR_FAIL_COND(!scenario); + scenario->environment=p_environment; + +} + +void VisualServerScene::scenario_set_fallback_environment(RID p_scenario, RID p_environment) { + + + Scenario *scenario = scenario_owner.get(p_scenario); + ERR_FAIL_COND(!scenario); + scenario->fallback_environment=p_environment; + + +} + + + +/* INSTANCING API */ + +void VisualServerScene::_instance_queue_update(Instance *p_instance,bool p_update_aabb,bool p_update_materials) { + + if (p_update_aabb) + p_instance->update_aabb=true; + if (p_update_materials) + p_instance->update_materials=true; + + if (p_instance->update_item.in_list()) + return; + + _instance_update_list.add(&p_instance->update_item); + + +} + +// from can be mesh, light, area and portal so far. +RID VisualServerScene::instance_create(){ + + Instance *instance = memnew( Instance ); + ERR_FAIL_COND_V(!instance,RID()); + + RID instance_rid = instance_owner.make_rid(instance); + instance->self=instance_rid; + + + return instance_rid; + + +} + +void VisualServerScene::instance_set_base(RID p_instance, RID p_base){ + + Instance *instance = instance_owner.get( p_instance ); + ERR_FAIL_COND( !instance ); + + Scenario *scenario = instance->scenario; + + if (instance->base_type!=VS::INSTANCE_NONE) { + //free anything related to that base + + VSG::storage->instance_remove_dependency(instance->base,instance); + + if (scenario && instance->octree_id) { + scenario->octree.erase(instance->octree_id); //make dependencies generated by the octree go away + instance->octree_id=0; + } + + switch(instance->base_type) { + case VS::INSTANCE_LIGHT: { + + InstanceLightData *light = static_cast(instance->base_data); + + if (instance->scenario && light->D) { + instance->scenario->directional_lights.erase( light->D ); + light->D=NULL; + } + VSG::scene_render->free(light->instance); + + } + } + + if (instance->base_data) { + memdelete( instance->base_data ); + instance->base_data=NULL; + } + + instance->morph_values.clear(); + instance->materials.clear(); + +#if 0 + if (instance->light_info) { + + if (instance->scenario && instance->light_info->D) + instance->scenario->directional_lights.erase( instance->light_info->D ); + rasterizer->free(instance->light_info->instance); + memdelete(instance->light_info); + instance->light_info=NULL; + } + + + + if ( instance->room ) { + + instance_set_room(p_instance,RID()); + /* + if((1<base_type)&INSTANCE_GEOMETRY_MASK) + instance->room->room_info->owned_geometry_instances.erase(instance->RE); + else if (instance->base_type==INSTANCE_PORTAL) { + print_line("freeing portal, is it there? "+itos(instance->room->room_info->owned_portal_instances.(instance->RE))); + instance->room->room_info->owned_portal_instances.erase(instance->RE); + } else if (instance->base_type==INSTANCE_ROOM) + instance->room->room_info->owned_room_instances.erase(instance->RE); + else if (instance->base_type==INSTANCE_LIGHT) + instance->room->room_info->owned_light_instances.erase(instance->RE); + + instance->RE=NULL;*/ + } + + + + + + + if (instance->portal_info) { + + _portal_disconnect(instance,true); + memdelete(instance->portal_info); + instance->portal_info=NULL; + + } + + if (instance->baked_light_info) { + + while(instance->baked_light_info->owned_instances.size()) { + + Instance *owned=instance->baked_light_info->owned_instances.front()->get(); + owned->baked_light=NULL; + owned->data.baked_light=NULL; + owned->data.baked_light_octree_xform=NULL; + owned->BLE=NULL; + instance->baked_light_info->owned_instances.pop_front(); + } + + memdelete(instance->baked_light_info); + instance->baked_light_info=NULL; + + } + + if (instance->scenario && instance->octree_id) { + instance->scenario->octree.erase( instance->octree_id ); + instance->octree_id=0; + } + + + if (instance->room_info) { + + for(List::Element *E=instance->room_info->owned_geometry_instances.front();E;E=E->next()) { + + Instance *owned = E->get(); + owned->room=NULL; + owned->RE=NULL; + } + for(List::Element *E=instance->room_info->owned_portal_instances.front();E;E=E->next()) { + + _portal_disconnect(E->get(),true); + Instance *owned = E->get(); + owned->room=NULL; + owned->RE=NULL; + } + + for(List::Element *E=instance->room_info->owned_room_instances.front();E;E=E->next()) { + + Instance *owned = E->get(); + owned->room=NULL; + owned->RE=NULL; + } + + if (instance->room_info->disconnected_child_portals.size()) { + ERR_PRINT("BUG: Disconnected portals remain!"); + } + memdelete(instance->room_info); + instance->room_info=NULL; + + } + + if (instance->particles_info) { + + rasterizer->free( instance->particles_info->instance ); + memdelete(instance->particles_info); + instance->particles_info=NULL; + + } + + if (instance->baked_light_sampler_info) { + + while (instance->baked_light_sampler_info->owned_instances.size()) { + + instance_geometry_set_baked_light_sampler(instance->baked_light_sampler_info->owned_instances.front()->get()->self,RID()); + } + + if (instance->baked_light_sampler_info->sampled_light.is_valid()) { + rasterizer->free(instance->baked_light_sampler_info->sampled_light); + } + memdelete( instance->baked_light_sampler_info ); + instance->baked_light_sampler_info=NULL; + } +#endif + + } + + + instance->base_type=VS::INSTANCE_NONE; + instance->base=RID(); + + + if (p_base.is_valid()) { + + instance->base_type=VSG::storage->get_base_type(p_base); + ERR_FAIL_COND(instance->base_type==VS::INSTANCE_NONE); + + switch(instance->base_type) { + case VS::INSTANCE_LIGHT: { + + InstanceLightData *light = memnew( InstanceLightData ); + + if (scenario && VSG::storage->light_get_type(p_base)==VS::LIGHT_DIRECTIONAL) { + light->D = scenario->directional_lights.push_back(instance); + } + + light->instance = VSG::scene_render->light_instance_create(p_base); + + instance->base_data=light; + } + case VS::INSTANCE_MESH: { + + InstanceGeometryData *geom = memnew( InstanceGeometryData ); + instance->base_data=geom; + } + + } + + VSG::storage->instance_add_dependency(p_base,instance); + + instance->base=p_base; + + if (scenario) + _instance_queue_update(instance,true,true); + + +#if 0 + if (rasterizer->is_mesh(p_base)) { + instance->base_type=INSTANCE_MESH; + instance->data.morph_values.resize( rasterizer->mesh_get_morph_target_count(p_base)); + instance->data.materials.resize( rasterizer->mesh_get_surface_count(p_base)); + } else if (rasterizer->is_multimesh(p_base)) { + instance->base_type=INSTANCE_MULTIMESH; + } else if (rasterizer->is_immediate(p_base)) { + instance->base_type=INSTANCE_IMMEDIATE; + } else if (rasterizer->is_particles(p_base)) { + instance->base_type=INSTANCE_PARTICLES; + instance->particles_info=memnew( Instance::ParticlesInfo ); + instance->particles_info->instance = rasterizer->particles_instance_create( p_base ); + } else if (rasterizer->is_light(p_base)) { + + instance->base_type=INSTANCE_LIGHT; + instance->light_info = memnew( Instance::LightInfo ); + instance->light_info->instance = rasterizer->light_instance_create(p_base); + if (instance->scenario && rasterizer->light_get_type(p_base)==LIGHT_DIRECTIONAL) { + + instance->light_info->D = instance->scenario->directional_lights.push_back(instance->self); + } + + } else if (room_owner.owns(p_base)) { + instance->base_type=INSTANCE_ROOM; + instance->room_info = memnew( Instance::RoomInfo ); + instance->room_info->room=room_owner.get(p_base); + } else if (portal_owner.owns(p_base)) { + + instance->base_type=INSTANCE_PORTAL; + instance->portal_info = memnew(Instance::PortalInfo); + instance->portal_info->portal=portal_owner.get(p_base); + } else if (baked_light_owner.owns(p_base)) { + + instance->base_type=INSTANCE_BAKED_LIGHT; + instance->baked_light_info=memnew(Instance::BakedLightInfo); + instance->baked_light_info->baked_light=baked_light_owner.get(p_base); + + //instance->portal_info = memnew(Instance::PortalInfo); + //instance->portal_info->portal=portal_owner.get(p_base); + } else if (baked_light_sampler_owner.owns(p_base)) { + + + instance->base_type=INSTANCE_BAKED_LIGHT_SAMPLER; + instance->baked_light_sampler_info=memnew( Instance::BakedLightSamplerInfo); + instance->baked_light_sampler_info->sampler=baked_light_sampler_owner.get(p_base); + + //instance->portal_info = memnew(Instance::PortalInfo); + //instance->portal_info->portal=portal_owner.get(p_base); + + } else { + ERR_EXPLAIN("Invalid base RID for instance!") + ERR_FAIL(); + } + + instance_dependency_map[ p_base ].insert( instance->self ); +#endif + + + } +} +void VisualServerScene::instance_set_scenario(RID p_instance, RID p_scenario){ + + Instance *instance = instance_owner.get( p_instance ); + ERR_FAIL_COND( !instance ); + + if (instance->scenario) { + + instance->scenario->instances.remove( &instance->scenario_item ); + + if (instance->octree_id) { + instance->scenario->octree.erase(instance->octree_id); //make dependencies generated by the octree go away + instance->octree_id=0; + } + + + switch(instance->base_type) { + + case VS::INSTANCE_LIGHT: { + + + InstanceLightData *light = static_cast(instance->base_data); + + if (light->D) { + instance->scenario->directional_lights.erase( light->D ); + light->D=NULL; + } + } + } + + instance->scenario=NULL; + } + + + if (p_scenario.is_valid()) { + + Scenario *scenario = scenario_owner.get( p_scenario ); + ERR_FAIL_COND(!scenario); + + instance->scenario=scenario; + + scenario->instances.add( &instance->scenario_item ); + + + switch(instance->base_type) { + + case VS::INSTANCE_LIGHT: { + + + InstanceLightData *light = static_cast(instance->base_data); + + if (VSG::storage->light_get_type(instance->base)==VS::LIGHT_DIRECTIONAL) { + light->D = scenario->directional_lights.push_back(instance); + } + } + } + + _instance_queue_update(instance,true,true); + } +} +void VisualServerScene::instance_set_layer_mask(RID p_instance, uint32_t p_mask){ + + + Instance *instance = instance_owner.get( p_instance ); + ERR_FAIL_COND( !instance ); + + instance->layer_mask=p_mask; +} +void VisualServerScene::instance_set_transform(RID p_instance, const Transform& p_transform){ + + Instance *instance = instance_owner.get( p_instance ); + ERR_FAIL_COND( !instance ); + + if (instance->transform==p_transform) + return; //must be checked to avoid worst evil + + instance->transform=p_transform; + _instance_queue_update(instance,true); +} +void VisualServerScene::instance_attach_object_instance_ID(RID p_instance,ObjectID p_ID){ + + Instance *instance = instance_owner.get( p_instance ); + ERR_FAIL_COND( !instance ); + + instance->object_ID=p_ID; + +} +void VisualServerScene::instance_set_morph_target_weight(RID p_instance,int p_shape, float p_weight){ + +} +void VisualServerScene::instance_set_surface_material(RID p_instance,int p_surface, RID p_material){ + + Instance *instance = instance_owner.get( p_instance ); + ERR_FAIL_COND( !instance ); + + _update_dirty_instance(instance); + + ERR_FAIL_INDEX(p_surface,instance->materials.size()); + + instance->materials[p_surface]=p_material; + +} + +void VisualServerScene::instance_attach_skeleton(RID p_instance,RID p_skeleton){ + + Instance *instance = instance_owner.get( p_instance ); + ERR_FAIL_COND( !instance ); + + instance->skeleton=p_skeleton; + + _instance_queue_update(instance,true); +} + +void VisualServerScene::instance_set_exterior( RID p_instance, bool p_enabled ){ + +} +void VisualServerScene::instance_set_room( RID p_instance, RID p_room ){ + +} + +void VisualServerScene::instance_set_extra_visibility_margin( RID p_instance, real_t p_margin ){ + +} + +// don't use these in a game! +Vector VisualServerScene::instances_cull_aabb(const AABB& p_aabb, RID p_scenario) const{ + + return Vector(); +} + +Vector VisualServerScene::instances_cull_ray(const Vector3& p_from, const Vector3& p_to, RID p_scenario) const{ + + return Vector(); +} +Vector VisualServerScene::instances_cull_convex(const Vector& p_convex, RID p_scenario) const { + + return Vector(); +} + + +void VisualServerScene::instance_geometry_set_flag(RID p_instance,VS::InstanceFlags p_flags,bool p_enabled){ + +} +void VisualServerScene::instance_geometry_set_cast_shadows_setting(RID p_instance, VS::ShadowCastingSetting p_shadow_casting_setting) { + +} +void VisualServerScene::instance_geometry_set_material_override(RID p_instance, RID p_material){ + +} + + +void VisualServerScene::instance_geometry_set_draw_range(RID p_instance,float p_min,float p_max,float p_min_margin,float p_max_margin){ + +} +void VisualServerScene::instance_geometry_set_as_instance_lod(RID p_instance,RID p_as_lod_of_instance){ + +} + + +void VisualServerScene::_update_instance(Instance *p_instance) { + + p_instance->version++; + + if (p_instance->base_type == VS::INSTANCE_LIGHT) { + + InstanceLightData *light = static_cast(p_instance->base_data); + + VSG::scene_render->light_instance_set_transform( light->instance, p_instance->transform ); + + } + + + if (p_instance->aabb.has_no_surface()) + return; + +#if 0 + if (p_instance->base_type == VS::INSTANCE_PARTICLES) { + + rasterizer->particles_instance_set_transform( p_instance->particles_info->instance, p_instance->data.transform ); + } + +#endif + if ((1<base_type)&VS::INSTANCE_GEOMETRY_MASK) { + + InstanceGeometryData *geom = static_cast(p_instance->base_data); + //make sure lights are updated + + for (List::Element *E=geom->lighting.front();E;E=E->next()) { + InstanceLightData *light = static_cast(E->get()->base_data); + light->shadow_sirty=true; + } + + } +#if 0 + else if (p_instance->base_type == INSTANCE_ROOM) { + + p_instance->room_info->affine_inverse=p_instance->data.transform.affine_inverse(); + } else if (p_instance->base_type == INSTANCE_BAKED_LIGHT) { + + Transform scale; + scale.basis.scale(p_instance->baked_light_info->baked_light->octree_aabb.size); + scale.origin=p_instance->baked_light_info->baked_light->octree_aabb.pos; + //print_line("scale: "+scale); + p_instance->baked_light_info->affine_inverse=(p_instance->data.transform*scale).affine_inverse(); + } + + +#endif + + p_instance->mirror = p_instance->transform.basis.determinant() < 0.0; + + AABB new_aabb; +#if 0 + if (p_instance->base_type==INSTANCE_PORTAL) { + + //portals need to be transformed in a special way, so they don't become too wide if they have scale.. + Transform portal_xform = p_instance->data.transform; + portal_xform.basis.set_axis(2,portal_xform.basis.get_axis(2).normalized()); + + p_instance->portal_info->plane_cache=Plane( p_instance->data.transform.origin, portal_xform.basis.get_axis(2)); + int point_count=p_instance->portal_info->portal->shape.size(); + p_instance->portal_info->transformed_point_cache.resize(point_count); + + AABB portal_aabb; + + for(int i=0;iportal_info->portal->shape[i]; + Vector3 point = portal_xform.xform(Vector3(src.x,src.y,0)); + p_instance->portal_info->transformed_point_cache[i]=point; + if (i==0) + portal_aabb.pos=point; + else + portal_aabb.expand_to(point); + } + + portal_aabb.grow_by(p_instance->portal_info->portal->connect_range); + + new_aabb = portal_aabb; + + } else { +#endif + new_aabb = p_instance->transform.xform(p_instance->aabb); +#if 0 + } +#endif + + + p_instance->transformed_aabb=new_aabb; + + if (!p_instance->scenario) { + + return; + } + + + + if (p_instance->octree_id==0) { + + uint32_t base_type = 1<base_type; + uint32_t pairable_mask=0; + bool pairable=false; + + if (p_instance->base_type == VS::INSTANCE_LIGHT) { + + pairable_mask=p_instance->visible?VS::INSTANCE_GEOMETRY_MASK:0; + pairable=true; + } +#if 0 + + if (p_instance->base_type == VS::INSTANCE_PORTAL) { + + pairable_mask=(1<base_type == VS::INSTANCE_BAKED_LIGHT_SAMPLER) { + + pairable_mask=(1<room && (1<base_type)&VS::INSTANCE_GEOMETRY_MASK) { + + base_type|=VS::INSTANCE_ROOMLESS_MASK; + } + + if (p_instance->base_type == VS::INSTANCE_ROOM) { + + pairable_mask=INSTANCE_ROOMLESS_MASK; + pairable=true; + } +#endif + + // not inside octree + p_instance->octree_id = p_instance->scenario->octree.create(p_instance,new_aabb,0,pairable,base_type,pairable_mask); + + } else { + + // if (new_aabb==p_instance->data.transformed_aabb) + // return; + + p_instance->scenario->octree.move(p_instance->octree_id,new_aabb); + } +#if 0 + if (p_instance->base_type==INSTANCE_PORTAL) { + + _portal_attempt_connect(p_instance); + } + + if (!p_instance->room && (1<base_type)&INSTANCE_GEOMETRY_MASK) { + + _instance_validate_autorooms(p_instance); + } + + if (p_instance->base_type == INSTANCE_ROOM) { + + for(Set::Element *E=p_instance->room_info->owned_autoroom_geometry.front();E;E=E->next()) + _instance_validate_autorooms(E->get()); + } +#endif + +} + +void VisualServerScene::_update_instance_aabb(Instance *p_instance) { + + AABB new_aabb; + + ERR_FAIL_COND(p_instance->base_type!=VS::INSTANCE_NONE && !p_instance->base.is_valid()); + + switch(p_instance->base_type) { + case VisualServer::INSTANCE_NONE: { + + // do nothing + } break; + case VisualServer::INSTANCE_MESH: { + + new_aabb = VSG::storage->mesh_get_aabb(p_instance->base,p_instance->skeleton); + + } break; +#if 0 + case VisualServer::INSTANCE_MULTIMESH: { + + new_aabb = rasterizer->multimesh_get_aabb(p_instance->base); + + } break; + case VisualServer::INSTANCE_IMMEDIATE: { + + new_aabb = rasterizer->immediate_get_aabb(p_instance->base); + + + } break; + case VisualServer::INSTANCE_PARTICLES: { + + new_aabb = rasterizer->particles_get_aabb(p_instance->base); + + + } break; +#endif + case VisualServer::INSTANCE_LIGHT: { + + new_aabb = VSG::storage->light_get_aabb(p_instance->base); + + } break; +#if 0 + case VisualServer::INSTANCE_ROOM: { + + Room *room = room_owner.get( p_instance->base ); + ERR_FAIL_COND(!room); + new_aabb=room->bounds.get_aabb(); + + } break; + case VisualServer::INSTANCE_PORTAL: { + + Portal *portal = portal_owner.get( p_instance->base ); + ERR_FAIL_COND(!portal); + for (int i=0;ishape.size();i++) { + + Vector3 point( portal->shape[i].x, portal->shape[i].y, 0 ); + if (i==0) { + + new_aabb.pos=point; + new_aabb.size.z=0.01; // make it not flat for octree + } else { + + new_aabb.expand_to(point); + } + } + + } break; + case VisualServer::INSTANCE_BAKED_LIGHT: { + + BakedLight *baked_light = baked_light_owner.get( p_instance->base ); + ERR_FAIL_COND(!baked_light); + new_aabb=baked_light->octree_aabb; + + } break; + case VisualServer::INSTANCE_BAKED_LIGHT_SAMPLER: { + + BakedLightSampler *baked_light_sampler = baked_light_sampler_owner.get( p_instance->base ); + ERR_FAIL_COND(!baked_light_sampler); + float radius = baked_light_sampler->params[VS::BAKED_LIGHT_SAMPLER_RADIUS]; + + new_aabb=AABB(Vector3(-radius,-radius,-radius),Vector3(radius*2,radius*2,radius*2)); + + } break; +#endif + default: {} + } + + if (p_instance->extra_margin) + new_aabb.grow_by(p_instance->extra_margin); + + p_instance->aabb=new_aabb; + +} + + + +void VisualServerScene::render_camera(RID p_camera, RID p_scenario,Size2 p_viewport_size) { + + + + Camera *camera = camera_owner.getornull(p_camera); + ERR_FAIL_COND(!camera); + + Scenario *scenario = scenario_owner.getornull(p_scenario); + + render_pass++; + uint32_t camera_layer_mask=camera->visible_layers; + + + /* STEP 1 - SETUP CAMERA */ + CameraMatrix camera_matrix; + bool ortho=false; + + switch(camera->type) { + case Camera::ORTHOGONAL: { + + camera_matrix.set_orthogonal( + camera->size, + p_viewport_size.width / (float)p_viewport_size.height, + camera->znear, + camera->zfar, + camera->vaspect + + ); + ortho=true; + } break; + case Camera::PERSPECTIVE: { + + camera_matrix.set_perspective( + camera->fov, + p_viewport_size.width / (float)p_viewport_size.height, + camera->znear, + camera->zfar, + camera->vaspect + + ); + ortho=false; + + } break; + } + + +// rasterizer->set_camera(camera->transform, camera_matrix,ortho); + + Vector planes = camera_matrix.get_projection_planes(camera->transform); + + Plane near_plane(camera->transform.origin,-camera->transform.basis.get_axis(2).normalized()); + + /* STEP 2 - CULL */ + int cull_count = scenario->octree.cull_convex(planes,instance_cull_result,MAX_INSTANCE_CULL); + light_cull_count=0; +// light_samplers_culled=0; + +/* print_line("OT: "+rtos( (OS::get_singleton()->get_ticks_usec()-t)/1000.0)); + print_line("OTO: "+itos(p_scenario->octree.get_octant_count())); +// print_line("OTE: "+itos(p_scenario->octree.get_elem_count())); + print_line("OTP: "+itos(p_scenario->octree.get_pair_count())); +*/ + + /* STEP 3 - PROCESS PORTALS, VALIDATE ROOMS */ + + + // compute portals +#if 0 + exterior_visited=false; + exterior_portal_cull_count=0; + + if (room_cull_enabled) { + for(int i=0;ilast_render_pass=render_pass; + + if (ins->base_type!=INSTANCE_PORTAL) + continue; + + if (ins->room) + continue; + + ERR_CONTINUE(exterior_portal_cull_count>=MAX_EXTERIOR_PORTALS); + exterior_portal_cull_result[exterior_portal_cull_count++]=ins; + + } + + room_cull_count = p_scenario->octree.cull_point(camera->transform.origin,room_cull_result,MAX_ROOM_CULL,NULL,(1< current_rooms; + Set portal_rooms; + //add to set + for(int i=0;ibase_type==INSTANCE_ROOM) { + current_rooms.insert(room_cull_result[i]); + } + if (room_cull_result[i]->base_type==INSTANCE_PORTAL) { + //assume inside that room if also inside the portal.. + if (room_cull_result[i]->room) { + portal_rooms.insert(room_cull_result[i]->room); + } + + SWAP(room_cull_result[i],room_cull_result[room_cull_count-1]); + room_cull_count--; + i--; + } + } + + //remove from set if it has a parent room or BSP doesn't contain + for(int i=0;iroom_info->affine_inverse.xform( camera->transform.origin ); + + if (!portal_rooms.has(r) && !r->room_info->room->bounds.point_is_inside(room_local_point)) { + + current_rooms.erase(r); + continue; + } + + //check parent + while (r->room) {// has parent room + + current_rooms.erase(r); + r=r->room; + } + + } + + if (current_rooms.size()) { + //camera is inside a room + // go through rooms + for(Set::Element *E=current_rooms.front();E;E=E->next()) { + _cull_room(camera,E->get()); + } + + } else { + //start from exterior + _cull_room(camera,NULL); + + } + } + +#endif + /* STEP 4 - REMOVE FURTHER CULLED OBJECTS, ADD LIGHTS */ + + for(int i=0;ilayer_mask)==0) { + + //failure + } else if (ins->base_type==VS::INSTANCE_LIGHT && ins->visible) { + + if (light_cull_count(ins->base_data); + + if (!light->geometries.empty()) { + //do not add this light if no geometry is affected by it.. + light_cull_result[light_cull_count]=ins; + light_instance_cull_result[light_cull_count]=light->instance; + + light_cull_count++; + } + +// rasterizer->light_instance_set_active_hint(ins->light_info->instance); + } + + } else if ((1<base_type)&VS::INSTANCE_GEOMETRY_MASK && ins->visible && ins->cast_shadows!=VS::SHADOW_CASTING_SETTING_SHADOWS_ONLY) { + + keep=true; +#if 0 + bool discarded=false; + + if (ins->draw_range_end>0) { + + float d = cull_range.nearp.distance_to(ins->data.transform.origin); + if (d<0) + d=0; + discarded=(ddraw_range_begin || d>=ins->draw_range_end); + + + } + + if (!discarded) { + + // test if this geometry should be visible + + if (room_cull_enabled) { + + + if (ins->visible_in_all_rooms) { + keep=true; + } else if (ins->room) { + + if (ins->room->room_info->last_visited_pass==render_pass) + keep=true; + } else if (ins->auto_rooms.size()) { + + + for(Set::Element *E=ins->auto_rooms.front();E;E=E->next()) { + + if (E->get()->room_info->last_visited_pass==render_pass) { + keep=true; + break; + } + } + } else if(exterior_visited) + keep=true; + } else { + + keep=true; + } + + + } + + + if (keep) { + // update cull range + float min,max; + ins->transformed_aabb.project_range_in_plane(cull_range.nearp,min,max); + + if (mincull_range.max) + cull_range.max=max; + + if (ins->sampled_light && ins->sampled_light->baked_light_sampler_info->last_pass!=render_pass) { + if (light_samplers_culledsampled_light; + ins->sampled_light->baked_light_sampler_info->last_pass=render_pass; + } + } + } +#endif + + + InstanceGeometryData * geom = static_cast(ins->base_data); + + if (geom->lighting_dirty) { + int l=0; + //only called when lights AABB enter/exit this geometry + ins->light_instances.resize(geom->lighting.size()); + + for (List::Element *E=geom->lighting.front();E;E=E->next()) { + + InstanceLightData * light = static_cast(E->get()->base_data); + + ins->light_instances[l++]=light->instance; + } + + geom->lighting_dirty=false; + } + + } + + if (!keep) { + // remove, no reason to keep + cull_count--; + SWAP( instance_cull_result[i], instance_cull_result[ cull_count ] ); + i--; + ins->last_render_pass=0; // make invalid + } else { + + ins->last_render_pass=render_pass; + } + } + + /* STEP 5 - PROCESS LIGHTS */ + + RID *directional_light_ptr=&light_instance_cull_result[light_cull_count]; + int directional_light_count=0; + + // directional lights + { + for (List::Element *E=scenario->directional_lights.front();E;E=E->next()) { + + if (light_cull_count+directional_light_count>=MAX_LIGHTS_CULLED) { + break; + } + + if (!E->get()->visible) + continue; + + InstanceLightData * light = static_cast(E->get()->base_data); + + + //check shadow.. + + +/* if (light && light->light_info->enabled && rasterizer->light_has_shadow(light->base_rid)) { + //rasterizer->light_instance_set_active_hint(light->light_info->instance); + _light_instance_update_shadow(light,p_scenario,camera,cull_range); + } +*/ + + //add to list + + + directional_light_ptr[directional_light_count++]=light->instance; + + } + } + +#if 0 + { //this should eventually change to + //assign shadows by distance to camera + SortArray sorter; + sorter.sort(light_cull_result,light_cull_count); + for (int i=0;ilight_has_shadow(ins->base_rid) || !shadows_enabled) + continue; + + /* for far shadows? + if (ins->version == ins->light_info->last_version && rasterizer->light_instance_has_far_shadow(ins->light_info->instance)) + continue; // didn't change + */ + + _light_instance_update_shadow(ins,p_scenario,camera,cull_range); + ins->light_info->last_version=ins->version; + } + } +#endif + /* ENVIRONMENT */ + + RID environment; + if (camera->env.is_valid()) //camera has more environment priority + environment=camera->env; + else if (scenario->environment.is_valid()) + environment=scenario->environment; + else + environment=scenario->fallback_environment; + +#if 0 + /* STEP 6 - SAMPLE BAKED LIGHT */ + + bool islinear =false; + if (environment.is_valid()) { + islinear = rasterizer->environment_is_fx_enabled(environment,VS::ENV_FX_SRGB); + } + + for(int i=0;itransform,light_sampler_cull_result[i],islinear); + } +#endif + /* STEP 7 - PROCESS GEOMETRY AND DRAW SCENE*/ + +#if 0 + // add lights + + { + List::Element *E=p_scenario->directional_lights.front(); + + + for(;E;E=E->next()) { + Instance *light = E->get().is_valid()?instance_owner.get(E->get()):NULL; + + ERR_CONTINUE(!light); + if (!light->light_info->enabled) + continue; + + rasterizer->add_light(light->light_info->instance); + light->light_info->last_add_pass=render_pass; + } + + for (int i=0;iadd_light(ins->light_info->instance); + ins->light_info->last_add_pass=render_pass; + } + } + // add geometry +#endif + + + VSG::scene_render->render_scene(camera->transform, camera_matrix,ortho,(RasterizerScene::InstanceBase**)instance_cull_result,cull_count,light_instance_cull_result,light_cull_count,directional_light_ptr,directional_light_count,environment); + +} + + + +void VisualServerScene::_update_dirty_instance(Instance *p_instance) { + + + if (p_instance->update_aabb) + _update_instance_aabb(p_instance); + + if (p_instance->update_materials) { + if (p_instance->base_type==VS::INSTANCE_MESH) { + p_instance->materials.resize(VSG::storage->mesh_get_surface_count(p_instance->base)); + } + } + + _update_instance(p_instance); + + p_instance->update_aabb=false; + p_instance->update_materials=false; + + _instance_update_list.remove( &p_instance->update_item ); +} + + +void VisualServerScene::update_dirty_instances() { + + while(_instance_update_list.first()) { + + _update_dirty_instance( _instance_update_list.first()->self() ); + } +} + +bool VisualServerScene::free(RID p_rid) { + + if (camera_owner.owns(p_rid)) { + + Camera *camera = camera_owner.get( p_rid ); + + camera_owner.free(p_rid); + memdelete(camera); + + } else if (scenario_owner.owns(p_rid)) { + + Scenario *scenario = scenario_owner.get( p_rid ); + + while(scenario->instances.first()) { + instance_set_scenario(scenario->instances.first()->self()->self,RID()); + } + + scenario_owner.free(p_rid); + memdelete(scenario); + + } else if (instance_owner.owns(p_rid)) { + // delete the instance + + update_dirty_instances(); + + Instance *instance = instance_owner.get(p_rid); + + instance_set_room(p_rid,RID()); + instance_set_scenario(p_rid,RID()); + instance_set_base(p_rid,RID()); + + if (instance->skeleton.is_valid()) + instance_attach_skeleton(p_rid,RID()); + + instance_owner.free(p_rid); + memdelete(instance); + } else { + return false; + } + + + return true; +} + +VisualServerScene *VisualServerScene::singleton=NULL; + +VisualServerScene::VisualServerScene() { + + + render_pass=1; + singleton=this; + +} diff --git a/servers/visual/visual_server_scene.h b/servers/visual/visual_server_scene.h new file mode 100644 index 00000000000..ceec4af5989 --- /dev/null +++ b/servers/visual/visual_server_scene.h @@ -0,0 +1,390 @@ +#ifndef VISUALSERVERSCENE_H +#define VISUALSERVERSCENE_H + +#include "servers/visual/rasterizer.h" + +#include "geometry.h" +#include "allocators.h" +#include "octree.h" +#include "self_list.h" + +class VisualServerScene { +public: + + + enum { + + MAX_INSTANCE_CULL=65536, + MAX_LIGHTS_CULLED=4096, + MAX_ROOM_CULL=32, + MAX_EXTERIOR_PORTALS=128, + }; + + uint64_t render_pass; + + + static VisualServerScene *singleton; +#if 0 + struct Portal { + + bool enabled; + float disable_distance; + Color disable_color; + float connect_range; + Vector shape; + Rect2 bounds; + + + Portal() { enabled=true; disable_distance=50; disable_color=Color(); connect_range=0.8; } + }; + + struct BakedLight { + + Rasterizer::BakedLightData data; + DVector sampler; + AABB octree_aabb; + Size2i octree_tex_size; + Size2i light_tex_size; + + }; + + struct BakedLightSampler { + + float params[BAKED_LIGHT_SAMPLER_MAX]; + int resolution; + Vector dp_cache; + + BakedLightSampler() { + params[BAKED_LIGHT_SAMPLER_STRENGTH]=1.0; + params[BAKED_LIGHT_SAMPLER_ATTENUATION]=1.0; + params[BAKED_LIGHT_SAMPLER_RADIUS]=1.0; + params[BAKED_LIGHT_SAMPLER_DETAIL_RATIO]=0.1; + resolution=16; + } + }; + + void _update_baked_light_sampler_dp_cache(BakedLightSampler * blsamp); + +#endif + + + struct Camera : public RID_Data { + + enum Type { + PERSPECTIVE, + ORTHOGONAL + }; + Type type; + float fov; + float znear,zfar; + float size; + uint32_t visible_layers; + bool vaspect; + RID env; + + Transform transform; + + Camera() { + + visible_layers=0xFFFFFFFF; + fov=60; + type=PERSPECTIVE; + znear=0.1; zfar=100; + size=1.0; + vaspect=false; + + } + }; + + mutable RID_Owner camera_owner; + + virtual RID camera_create(); + virtual void camera_set_perspective(RID p_camera,float p_fovy_degrees, float p_z_near, float p_z_far); + virtual void camera_set_orthogonal(RID p_camera,float p_size, float p_z_near, float p_z_far); + virtual void camera_set_transform(RID p_camera,const Transform& p_transform); + virtual void camera_set_cull_mask(RID p_camera,uint32_t p_layers); + virtual void camera_set_environment(RID p_camera,RID p_env); + virtual void camera_set_use_vertical_aspect(RID p_camera,bool p_enable); + + + /* + + struct RoomInfo { + + Transform affine_inverse; + Room *room; + List owned_geometry_instances; + List owned_portal_instances; + List owned_room_instances; + List owned_light_instances; //not used, but just for the sake of it + Set disconnected_child_portals; + Set owned_autoroom_geometry; + uint64_t last_visited_pass; + RoomInfo() { last_visited_pass=0; } + + }; + + struct InstancePortal { + + Portal *portal; + Set candidate_set; + Instance *connected; + uint64_t last_visited_pass; + + Plane plane_cache; + Vector transformed_point_cache; + + + PortalInfo() { connected=NULL; last_visited_pass=0;} + }; +*/ + + /* ENVIRONMENT API */ + + virtual RID environment_create(); + + virtual void environment_set_background(RID p_env,VS::EnvironmentBG p_bg); + virtual void environment_set_skybox(RID p_env,RID p_skybox,float p_energy=1.0); + virtual void environment_set_bg_color(RID p_env,const Color& p_color); + virtual void environment_set_canvas_max_layer(RID p_env,int p_max_layer); + virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0); + + virtual void environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,VS::EnvironmentGlowBlendMode p_blend_mode); + virtual void environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture); + + virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,VS::EnvironmentToneMapper p_tone_mapper); + virtual void environment_set_brightness(RID p_env,bool p_enable,float p_brightness); + virtual void environment_set_contrast(RID p_env,bool p_enable,float p_contrast); + virtual void environment_set_saturation(RID p_env,bool p_enable,float p_saturation); + virtual void environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp); + + + /* SCENARIO API */ + + struct Instance; + + struct Scenario : RID_Data { + + + VS::ScenarioDebugMode debug; + RID self; + // well wtf, balloon allocator is slower? + + Octree octree; + + List directional_lights; + RID environment; + RID fallback_environment; + + SelfList::List instances; + + Scenario() { debug=VS::SCENARIO_DEBUG_DISABLED; } + }; + + RID_Owner scenario_owner; + + static void* _instance_pair(void *p_self, OctreeElementID, Instance *p_A,int, OctreeElementID, Instance *p_B,int); + static void _instance_unpair(void *p_self, OctreeElementID, Instance *p_A,int, OctreeElementID, Instance *p_B,int,void*); + + virtual RID scenario_create(); + + virtual void scenario_set_debug(RID p_scenario,VS::ScenarioDebugMode p_debug_mode); + virtual void scenario_set_environment(RID p_scenario, RID p_environment); + virtual void scenario_set_fallback_environment(RID p_scenario, RID p_environment); + + + /* INSTANCING API */ + + struct InstanceBaseData { + + + virtual ~InstanceBaseData() {} + }; + + + + struct Instance : RasterizerScene::InstanceBase { + + RID self; + //scenario stuff + OctreeElementID octree_id; + Scenario *scenario; + SelfList scenario_item; + + //aabb stuff + bool update_aabb; + bool update_materials; + SelfList update_item; + + + AABB aabb; + AABB transformed_aabb; + float extra_margin; + uint32_t object_ID; + bool visible; + uint32_t layer_mask; + + float lod_begin; + float lod_end; + float lod_begin_hysteresis; + float lod_end_hysteresis; + RID lod_instance; + + Instance *room; + SelfList room_item; + bool visible_in_all_rooms; + + uint64_t last_render_pass; + uint64_t last_frame_pass; + + uint64_t version; // changes to this, and changes to base increase version + + InstanceBaseData *base_data; + + virtual void base_removed() { + + singleton->instance_set_base(self,RID()); + } + + virtual void base_changed() { + + singleton->_instance_queue_update(this,true,true); + } + + + Instance() : scenario_item(this), update_item(this), room_item(this) { + + octree_id=0; + scenario=NULL; + + + update_aabb=false; + update_materials=false; + + extra_margin=0; + + + object_ID=0; + visible=true; + layer_mask=1; + + lod_begin=0; + lod_end=0; + lod_begin_hysteresis=0; + lod_end_hysteresis=0; + + room=NULL; + visible_in_all_rooms=false; + + + + last_render_pass=0; + last_frame_pass=0; + version=1; + base_data=NULL; + + } + + ~Instance() { + + if (base_data) + memdelete(base_data); + + } + }; + + SelfList::List _instance_update_list; + void _instance_queue_update(Instance *p_instance,bool p_update_aabb,bool p_update_materials=false); + + + struct InstanceGeometryData : public InstanceBaseData { + + List lighting; + bool lighting_dirty; + + InstanceGeometryData() { + + lighting_dirty=false; + } + }; + + + struct InstanceLightData : public InstanceBaseData { + + struct PairInfo { + List::Element *L; //light iterator in geometry + Instance *geometry; + }; + + RID instance; + uint64_t last_hash; + List::Element *D; // directional light in scenario + + bool shadow_sirty; + + List geometries; + + InstanceLightData() { + + shadow_sirty=true; + D=NULL; + last_hash=0; + } + }; + + + Instance *instance_cull_result[MAX_INSTANCE_CULL]; + Instance *instance_shadow_cull_result[MAX_INSTANCE_CULL]; //used for generating shadowmaps + Instance *light_cull_result[MAX_LIGHTS_CULLED]; + RID light_instance_cull_result[MAX_LIGHTS_CULLED]; + int light_cull_count; + + + RID_Owner instance_owner; + + // from can be mesh, light, area and portal so far. + virtual RID instance_create(); // from can be mesh, light, poly, area and portal so far. + + virtual void instance_set_base(RID p_instance, RID p_base); // from can be mesh, light, poly, area and portal so far. + virtual void instance_set_scenario(RID p_instance, RID p_scenario); // from can be mesh, light, poly, area and portal so far. + virtual void instance_set_layer_mask(RID p_instance, uint32_t p_mask); + virtual void instance_set_transform(RID p_instance, const Transform& p_transform); + virtual void instance_attach_object_instance_ID(RID p_instance,ObjectID p_ID); + virtual void instance_set_morph_target_weight(RID p_instance,int p_shape, float p_weight); + virtual void instance_set_surface_material(RID p_instance,int p_surface, RID p_material); + + virtual void instance_attach_skeleton(RID p_instance,RID p_skeleton); + virtual void instance_set_exterior( RID p_instance, bool p_enabled ); + virtual void instance_set_room( RID p_instance, RID p_room ); + + virtual void instance_set_extra_visibility_margin( RID p_instance, real_t p_margin ); + + + // don't use these in a game! + virtual Vector instances_cull_aabb(const AABB& p_aabb, RID p_scenario=RID()) const; + virtual Vector instances_cull_ray(const Vector3& p_from, const Vector3& p_to, RID p_scenario=RID()) const; + virtual Vector instances_cull_convex(const Vector& p_convex, RID p_scenario=RID()) const; + + + virtual void instance_geometry_set_flag(RID p_instance,VS::InstanceFlags p_flags,bool p_enabled); + virtual void instance_geometry_set_cast_shadows_setting(RID p_instance, VS::ShadowCastingSetting p_shadow_casting_setting); + virtual void instance_geometry_set_material_override(RID p_instance, RID p_material); + + + virtual void instance_geometry_set_draw_range(RID p_instance,float p_min,float p_max,float p_min_margin,float p_max_margin); + virtual void instance_geometry_set_as_instance_lod(RID p_instance,RID p_as_lod_of_instance); + + + _FORCE_INLINE_ void _update_instance(Instance *p_instance); + _FORCE_INLINE_ void _update_instance_aabb(Instance *p_instance); + _FORCE_INLINE_ void _update_dirty_instance(Instance *p_instance); + + + void render_camera(RID p_camera, RID p_scenario, Size2 p_viewport_size); + void update_dirty_instances(); + bool free(RID p_rid); + + VisualServerScene(); +}; + +#endif // VISUALSERVERSCENE_H diff --git a/servers/visual/visual_server_viewport.cpp b/servers/visual/visual_server_viewport.cpp index baf18b7a562..583b42bfc16 100644 --- a/servers/visual/visual_server_viewport.cpp +++ b/servers/visual/visual_server_viewport.cpp @@ -1,8 +1,11 @@ #include "visual_server_viewport.h" #include "visual_server_global.h" #include "visual_server_canvas.h" +#include "visual_server_scene.h" #include "globals.h" + + void VisualServerViewport::_draw_viewport(Viewport *p_viewport) { /* Camera should always be BEFORE any other 3D */ @@ -58,6 +61,12 @@ void VisualServerViewport::_draw_viewport(Viewport *p_viewport) { } } + + if (!p_viewport->disable_3d && p_viewport->camera.is_valid()) { + + VSG::scene->render_camera(p_viewport->camera,p_viewport->scenario,p_viewport->size); + } + if (!p_viewport->hide_canvas) { int i=0; @@ -248,6 +257,11 @@ void VisualServerViewport::draw_viewports() { ERR_CONTINUE( !vp->render_target.is_valid() ); + bool visible = vp->viewport_to_screen_rect!=Rect2() || vp->update_mode==VS::VIEWPORT_UPDATE_ALWAYS || vp->update_mode==VS::VIEWPORT_UPDATE_ONCE; + + if (!visible) + continue; + VSG::rasterizer->set_current_render_target(vp->render_target); _draw_viewport(vp); diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index 175dfd47d1d..97825c172b5 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -345,8 +345,754 @@ RID VisualServer::get_white_texture() { } +Error VisualServer::_surface_set_data(Array p_arrays,uint32_t p_format,uint32_t *p_offsets,uint32_t p_stride,DVector &r_vertex_array,int p_vertex_array_len,DVector &r_index_array,int p_index_array_len,AABB &r_aabb,Vector r_bone_aabb) { + + DVector::Write vw = r_vertex_array.write(); + + DVector::Write iw; + if (r_index_array.size()) { + iw=r_index_array.write(); + } + + int max_bone=0; + + + for(int ai=0;ai array = p_arrays[ai]; + ERR_FAIL_COND_V( array.size() != p_vertex_array_len, ERR_INVALID_PARAMETER ); + + + DVector::Read read = array.read(); + const Vector2* src=read.ptr(); + + // setting vertices means regenerating the AABB + Rect2 aabb; + + + if (p_format&ARRAY_COMPRESS_VERTEX) { + + for (int i=0;i array = p_arrays[ai]; + ERR_FAIL_COND_V( array.size() != p_vertex_array_len, ERR_INVALID_PARAMETER ); + + + DVector::Read read = array.read(); + const Vector3* src=read.ptr(); + + // setting vertices means regenerating the AABB + AABB aabb; + + + if (p_format&ARRAY_COMPRESS_VERTEX) { + + for (int i=0;i array = p_arrays[ai]; + ERR_FAIL_COND_V( array.size() != p_vertex_array_len, ERR_INVALID_PARAMETER ); + + + DVector::Read read = array.read(); + const Vector3* src=read.ptr(); + + // setting vertices means regenerating the AABB + + if (p_format&ARRAY_COMPRESS_NORMAL) { + + for (int i=0;i array = p_arrays[ai]; + + ERR_FAIL_COND_V( array.size() != p_vertex_array_len*4, ERR_INVALID_PARAMETER ); + + + DVector::Read read = array.read(); + const real_t* src = read.ptr(); + + if (p_format&ARRAY_COMPRESS_TANGENT) { + + for (int i=0;i array = p_arrays[ai]; + + ERR_FAIL_COND_V( array.size() != p_vertex_array_len, ERR_INVALID_PARAMETER ); + + + DVector::Read read = array.read(); + const Color* src = read.ptr(); + + if (p_format&ARRAY_COMPRESS_COLOR) { + + for (int i=0;i array = p_arrays[ai]; + + ERR_FAIL_COND_V( array.size() != p_vertex_array_len , ERR_INVALID_PARAMETER); + + DVector::Read read = array.read(); + + const Vector2 * src=read.ptr(); + + + + if (p_format&ARRAY_COMPRESS_TEX_UV) { + + for (int i=0;i array = p_arrays[ai]; + + ERR_FAIL_COND_V( array.size() != p_vertex_array_len , ERR_INVALID_PARAMETER); + + DVector::Read read = array.read(); + + const Vector2 * src=read.ptr(); + + + + if (p_format&ARRAY_COMPRESS_TEX_UV2) { + + for (int i=0;i array = p_arrays[ai]; + + ERR_FAIL_COND_V( array.size() != p_vertex_array_len*VS::ARRAY_WEIGHTS_SIZE, ERR_INVALID_PARAMETER ); + + + DVector::Read read = array.read(); + + const real_t * src = read.ptr(); + + if (p_format&ARRAY_COMPRESS_WEIGHTS) { + + for (int i=0;i array = p_arrays[ai]; + + ERR_FAIL_COND_V( array.size() != p_vertex_array_len*VS::ARRAY_WEIGHTS_SIZE, ERR_INVALID_PARAMETER ); + + + DVector::Read read = array.read(); + + const int * src = read.ptr(); + + + if (!(p_format&ARRAY_FLAG_USE_16_BIT_BONES)) { + + for (int i=0;i indices = p_arrays[ai]; + ERR_FAIL_COND_V( indices.size() == 0, ERR_INVALID_PARAMETER ); + ERR_FAIL_COND_V( indices.size() != p_index_array_len, ERR_INVALID_PARAMETER ); + + /* determine wether using 16 or 32 bits indices */ + + DVector::Read read = indices.read(); + const int *src=read.ptr(); + + for (int i=0;i vertices = p_arrays[VS::ARRAY_VERTEX]; + DVector bones = p_arrays[VS::ARRAY_BONES]; + DVector weights = p_arrays[VS::ARRAY_WEIGHTS]; + + bool any_valid=false; + + if (vertices.size() && bones.size()==vertices.size()*4 && weights.size()==bones.size()) { + + int vs = vertices.size(); + DVector::Read rv =vertices.read(); + DVector::Read rb=bones.read(); + DVector::Read rw=weights.read(); + + AABB *bptr = r_bone_aabb.ptr(); + + for(int i=0;isize.x<0) { + //first + bptr[idx]=AABB(); + bptr[idx].pos=v; + any_valid=true; + } else { + bptr[idx].expand_to(v); + } + } + } + } + + if (!any_valid && first) { + + r_bone_aabb.clear(); + } + } + return OK; +} + + void VisualServer::mesh_add_surface_from_arrays(RID p_mesh,PrimitiveType p_primitive,const Array& p_arrays,const Array& p_blend_shapes,uint32_t p_compress_format) { + ERR_FAIL_INDEX( p_primitive, VS::PRIMITIVE_MAX ); + ERR_FAIL_COND(p_arrays.size()!=VS::ARRAY_MAX); + + uint32_t format=0; + + // validation + int index_array_len=0; + int array_len=0; + + for(int i=0;i v2 = var; + array_len=v2.size(); + } break; + case Variant::VECTOR3_ARRAY: { + DVector v3 = var; + array_len=v3.size(); + } break; + default: { + Array v = var; + array_len=v.size(); + } break; + } + + array_len=Vector3Array(p_arrays[i]).size(); + ERR_FAIL_COND(array_len==0); + } else if (i==VS::ARRAY_INDEX) { + + index_array_len=IntArray(p_arrays[i]).size(); + } + } + + ERR_FAIL_COND((format&VS::ARRAY_FORMAT_VERTEX)==0); // mandatory + + + if (p_blend_shapes.size()) { + //validate format for morphs + for(int i=0;i(1<<16)) { + + elem_size=4; + + } else { + elem_size=2; + } + offsets[i]=elem_size; + continue; + } break; + default: { + ERR_FAIL( ); + } + } + + print_line("type "+itos(i)+" size: "+itos(elem_size)+" offset "+itos(total_elem_size)); + offsets[i]=total_elem_size; + total_elem_size+=elem_size; + + + } + + print_line("total elemn size: "+itos(total_elem_size)); + + uint32_t mask = (1< vertex_array; + vertex_array.resize(array_size); + + int index_array_size = offsets[VS::ARRAY_INDEX]*index_array_len; + + print_line("index array size: "+itos(index_array_size)); + + DVector index_array; + index_array.resize(index_array_size); + + AABB aabb; + Vector bone_aabb; + + Error err = _surface_set_data(p_arrays,format,offsets,total_elem_size,vertex_array,array_len,index_array,index_array_len,aabb,bone_aabb); + + if (err) { + ERR_EXPLAIN("Invalid array format for surface"); + ERR_FAIL_COND(err!=OK); + } + + Vector > blend_shape_data; + + for(int i=0;i vertex_array_shape; + vertex_array_shape.resize(array_size); + DVector noindex; + + AABB laabb; + Error err = _surface_set_data(p_blend_shapes[i],format&~ARRAY_FORMAT_INDEX,offsets,total_elem_size,vertex_array,array_len,noindex,0,laabb,bone_aabb); + aabb.merge_with(laabb); + if (err) { + ERR_EXPLAIN("Invalid blend shape array format for surface"); + ERR_FAIL_COND(err!=OK); + } + + blend_shape_data.push_back(vertex_array_shape); + } + + mesh_add_surface(p_mesh,format,p_primitive,vertex_array,array_len,index_array,index_array_len,aabb,blend_shape_data,bone_aabb); } diff --git a/servers/visual_server.h b/servers/visual_server.h index 90e99710fd8..eb20355efba 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -59,6 +59,9 @@ protected: RID test_material; RID material_2d[16]; + + Error _surface_set_data(Array p_arrays,uint32_t p_format,uint32_t *p_offsets,uint32_t p_stride,DVector &r_vertex_array,int p_vertex_array_len,DVector &r_index_array,int p_index_array_len,AABB &r_aabb,Vector r_bone_aabb); + static VisualServer* (*create_func)(); static void _bind_methods(); public: @@ -207,6 +210,7 @@ public: ARRAY_COMPRESS_INDEX=1<<(ARRAY_INDEX+ARRAY_COMPRESS_BASE), ARRAY_FLAG_USE_2D_VERTICES=ARRAY_COMPRESS_INDEX<<1, + ARRAY_FLAG_USE_16_BIT_BONES=ARRAY_COMPRESS_INDEX<<2, ARRAY_COMPRESS_DEFAULT=ARRAY_COMPRESS_VERTEX|ARRAY_COMPRESS_NORMAL|ARRAY_COMPRESS_TANGENT|ARRAY_COMPRESS_COLOR|ARRAY_COMPRESS_TEX_UV|ARRAY_COMPRESS_TEX_UV2|ARRAY_COMPRESS_BONES|ARRAY_COMPRESS_WEIGHTS|ARRAY_COMPRESS_INDEX @@ -228,7 +232,7 @@ public: virtual void mesh_add_surface_from_arrays(RID p_mesh,PrimitiveType p_primitive,const Array& p_arrays,const Array& p_blend_shapes=Array(),uint32_t p_compress_format=ARRAY_COMPRESS_DEFAULT); - virtual void mesh_add_surface(RID p_mesh,uint32_t p_format,PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const Vector >& p_blend_shapes=Vector >())=0; + virtual void mesh_add_surface(RID p_mesh,uint32_t p_format,PrimitiveType p_primitive,const DVector& p_array,int p_vertex_count,const DVector& p_index_array,int p_index_count,const AABB& p_aabb,const Vector >& p_blend_shapes=Vector >(),const Vector& p_bone_aabbs=Vector())=0; virtual void mesh_set_morph_target_count(RID p_mesh,int p_amount)=0; virtual int mesh_get_morph_target_count(RID p_mesh) const=0; @@ -530,7 +534,6 @@ public: virtual void scenario_set_debug(RID p_scenario,ScenarioDebugMode p_debug_mode)=0; virtual void scenario_set_environment(RID p_scenario, RID p_environment)=0; - virtual RID scenario_get_environment(RID p_scenario, RID p_environment) const=0; virtual void scenario_set_fallback_environment(RID p_scenario, RID p_environment)=0; @@ -546,6 +549,7 @@ public: INSTANCE_REFLECTION_PROBE, INSTANCE_ROOM, INSTANCE_PORTAL, + INSTANCE_MAX, /*INSTANCE_BAKED_LIGHT, INSTANCE_BAKED_LIGHT_SAMPLER,*/ diff --git a/tools/editor/plugins/spatial_editor_plugin.cpp b/tools/editor/plugins/spatial_editor_plugin.cpp index 55a4c6c9347..38a7706acc1 100644 --- a/tools/editor/plugins/spatial_editor_plugin.cpp +++ b/tools/editor/plugins/spatial_editor_plugin.cpp @@ -2349,6 +2349,7 @@ SpatialEditorViewport::SpatialEditorViewport(SpatialEditor *p_spatial_editor, Ed spatial_editor=p_spatial_editor; ViewportContainer *c=memnew(ViewportContainer); + c->set_stretch(true); add_child(c); c->set_area_as_parent_rect(); viewport = memnew( Viewport ); From 17d7e6a142500a80ba6628a32eca792c44bbbdb7 Mon Sep 17 00:00:00 2001 From: Fabio Alessandrelli Date: Mon, 17 Oct 2016 03:57:32 +0200 Subject: [PATCH 011/223] Fix Keyboard Input Hangs when using modifiers Main input parsing loop only update actions for keyboard if the state has changed. `InputMap::event_is_action` now ignores keyboard modifiers if the event is not pressed. Clarify difference between `InputMap::action_has_event` and `InputMap::event_is_action` in docs. Fixes #6388. --- core/input_map.cpp | 6 +++--- core/input_map.h | 2 +- doc/base/classes.xml | 2 +- main/input_default.cpp | 14 +++++++------- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/core/input_map.cpp b/core/input_map.cpp index 09cb7ce426e..ad1403a64be 100644 --- a/core/input_map.cpp +++ b/core/input_map.cpp @@ -106,7 +106,7 @@ List InputMap::get_actions() const { return actions; } -List::Element *InputMap::_find_event(List &p_list,const InputEvent& p_event) const { +List::Element *InputMap::_find_event(List &p_list,const InputEvent& p_event, bool p_mod_ignore=false) const { for (List::Element *E=p_list.front();E;E=E->next()) { @@ -122,7 +122,7 @@ List::Element *InputMap::_find_event(List &p_list,const case InputEvent::KEY: { - same=(e.key.scancode==p_event.key.scancode && e.key.mod == p_event.key.mod); + same=(e.key.scancode==p_event.key.scancode && (p_mod_ignore || e.key.mod == p_event.key.mod)); } break; case InputEvent::JOYSTICK_BUTTON: { @@ -229,7 +229,7 @@ bool InputMap::event_is_action(const InputEvent& p_event, const StringName& p_ac return p_event.action.action==E->get().id; } - return _find_event(E->get().inputs,p_event)!=NULL; + return _find_event(E->get().inputs,p_event,!p_event.is_pressed())!=NULL; } const Map& InputMap::get_action_map() const { diff --git a/core/input_map.h b/core/input_map.h index 21c479588d1..a974f6f1036 100644 --- a/core/input_map.h +++ b/core/input_map.h @@ -46,7 +46,7 @@ private: mutable Map input_map; mutable Map input_id_map; - List::Element *_find_event(List &p_list,const InputEvent& p_event) const; + List::Element *_find_event(List &p_list,const InputEvent& p_event, bool p_mod_ignore) const; Array _get_action_list(const StringName& p_action); Array _get_actions(); diff --git a/doc/base/classes.xml b/doc/base/classes.xml index 4e0ba53f482..a711f591c66 100644 --- a/doc/base/classes.xml +++ b/doc/base/classes.xml @@ -18019,7 +18019,7 @@ - Return whether the given event is part of an existing action. + Return whether the given event is part of an existing action. This method ignores keyboard modifiers if the given [InputEvent] is not pressed (for proper release detection). See [method action_has_event] if you don't want this behavior. diff --git a/main/input_default.cpp b/main/input_default.cpp index c60fcd22434..6c2c6ebbb3a 100644 --- a/main/input_default.cpp +++ b/main/input_default.cpp @@ -377,13 +377,13 @@ void InputDefault::parse_input_event(const InputEvent& p_event) { if (InputMap::get_singleton()->event_is_action(p_event,E->key())) { - Action action; - action.fixed_frame=OS::get_singleton()->get_fixed_frames(); - action.idle_frame=OS::get_singleton()->get_idle_frames(); - action.pressed=p_event.is_pressed(); - - action_state[E->key()]=action; - + if(is_action_pressed(E->key()) != p_event.is_pressed()) { + Action action; + action.fixed_frame=OS::get_singleton()->get_fixed_frames(); + action.idle_frame=OS::get_singleton()->get_idle_frames(); + action.pressed=p_event.is_pressed(); + action_state[E->key()]=action; + } } } } From fa219e02d74f9b249270b6610a88319925908f89 Mon Sep 17 00:00:00 2001 From: eska Date: Fri, 21 Oct 2016 00:02:44 +0200 Subject: [PATCH 012/223] Remove JavaScript compilation option `compress`. This functionality has been removed from Emscripten in version 1.36.13, server-side compression is recommended instead. --- platform/javascript/detect.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/platform/javascript/detect.py b/platform/javascript/detect.py index aeff5a1a340..7144476302f 100644 --- a/platform/javascript/detect.py +++ b/platform/javascript/detect.py @@ -18,8 +18,7 @@ def can_build(): def get_opts(): return [ - ['compress','Compress JS Executable','no'], - ['javascript_eval','Enable JavaScript eval interface','yes'] + ['javascript_eval','Enable JavaScript eval interface','yes'], ] def get_flags(): @@ -94,12 +93,6 @@ def configure(env): if env['javascript_eval'] == 'yes': env.Append(CPPFLAGS=['-DJAVASCRIPT_EVAL_ENABLED']) - if (env["compress"]=="yes"): - lzma_binpath = em_path+"/third_party/lzma.js/lzma-native" - lzma_decoder = em_path+"/third_party/lzma.js/lzma-decoder.js" - lzma_dec = "LZMA.decompress" - env.Append(LINKFLAGS=['--compression',lzma_binpath+","+lzma_decoder+","+lzma_dec]) - env.Append(LINKFLAGS=['-s','ASM_JS=1']) env.Append(LINKFLAGS=['-O2']) #env.Append(LINKFLAGS=['-g4']) From cb34b70df13ad9f7942b0c363edc71cfd417bb21 Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Fri, 21 Oct 2016 07:27:13 -0300 Subject: [PATCH 013/223] More scene work, can display a skybox --- drivers/gles3/rasterizer_canvas_gles3.cpp | 1 - drivers/gles3/rasterizer_scene_gles3.cpp | 225 ++++++++++++++++++++- drivers/gles3/rasterizer_scene_gles3.h | 66 +++++- drivers/gles3/rasterizer_storage_gles3.cpp | 137 +++++++++++++ drivers/gles3/rasterizer_storage_gles3.h | 8 + drivers/gles3/shaders/SCsub | 1 + drivers/gles3/shaders/copy.glsl | 17 +- drivers/gles3/shaders/cubemap_filter.glsl | 143 +++++++++++++ scene/resources/environment.cpp | 175 +++++++++++++++- scene/resources/environment.h | 56 ++++- servers/visual/rasterizer.h | 21 ++ servers/visual/visual_server_raster.h | 12 +- servers/visual/visual_server_scene.cpp | 48 ----- servers/visual/visual_server_scene.h | 19 -- servers/visual_server.h | 16 +- 15 files changed, 861 insertions(+), 84 deletions(-) create mode 100644 drivers/gles3/shaders/cubemap_filter.glsl diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index 237b3ec3fc6..b2228a6cfa0 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -117,7 +117,6 @@ void RasterizerCanvasGLES3::canvas_begin(){ glClearColor( storage->frame.clear_request_color.r, storage->frame.clear_request_color.g, storage->frame.clear_request_color.b, storage->frame.clear_request_color.a ); glClear(GL_COLOR_BUFFER_BIT); storage->frame.clear_request=false; - print_line("canvas clear?"); } diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index 121620594d0..4ce2bd2f37e 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -1,5 +1,10 @@ #include "rasterizer_scene_gles3.h" #include "globals.h" + + + + + static _FORCE_INLINE_ void store_matrix32(const Matrix32& p_mtx, float* p_array) { p_array[ 0]=p_mtx.elements[0][0]; @@ -52,6 +57,121 @@ static _FORCE_INLINE_ void store_camera(const CameraMatrix& p_mtx, float* p_arra + +/* ENVIRONMENT API */ + +RID RasterizerSceneGLES3::environment_create(){ + + + Environment *env = memnew( Environment ); + + return environment_owner.make_rid(env); +} + +void RasterizerSceneGLES3::environment_set_background(RID p_env,VS::EnvironmentBG p_bg){ + + Environment *env=environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + env->bg_mode=p_bg; +} + +void RasterizerSceneGLES3::environment_set_skybox(RID p_env, RID p_skybox, int p_radiance_size, int p_irradiance_size){ + + Environment *env=environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + if (env->skybox_color.is_valid()) { + env->skybox_color=RID(); + } + if (env->skybox_radiance.is_valid()) { + storage->free(env->skybox_radiance); + env->skybox_radiance=RID(); + } + if (env->skybox_irradiance.is_valid()) { + storage->free(env->skybox_irradiance); + env->skybox_irradiance=RID(); + } + + if (p_skybox.is_valid()) { + + env->skybox_color=p_skybox; + // env->skybox_radiance=storage->texture_create_pbr_cubemap(p_skybox,VS::PBR_CUBEMAP_RADIANCE,p_radiance_size); + //env->skybox_irradiance=storage->texture_create_pbr_cubemap(p_skybox,VS::PBR_CUBEMAP_IRRADIANCE,p_irradiance_size); + } + +} + +void RasterizerSceneGLES3::environment_set_skybox_scale(RID p_env,float p_scale) { + + Environment *env=environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + env->skybox_scale=p_scale; + +} + +void RasterizerSceneGLES3::environment_set_bg_color(RID p_env,const Color& p_color){ + + Environment *env=environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + env->bg_color=p_color; + +} +void RasterizerSceneGLES3::environment_set_bg_energy(RID p_env,float p_energy) { + + Environment *env=environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + env->energy=p_energy; + +} + +void RasterizerSceneGLES3::environment_set_canvas_max_layer(RID p_env,int p_max_layer){ + + Environment *env=environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + env->canvas_max_layer=p_max_layer; + +} +void RasterizerSceneGLES3::environment_set_ambient_light(RID p_env, const Color& p_color, float p_energy, float p_skybox_energy){ + + Environment *env=environment_owner.getornull(p_env); + ERR_FAIL_COND(!env); + + env->ambient_color=p_color; + env->ambient_anergy=p_energy; + env->skybox_ambient=p_skybox_energy; + +} + +void RasterizerSceneGLES3::environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,VS::EnvironmentGlowBlendMode p_blend_mode){ + +} +void RasterizerSceneGLES3::environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture){ + +} + +void RasterizerSceneGLES3::environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,VS::EnvironmentToneMapper p_tone_mapper){ + +} +void RasterizerSceneGLES3::environment_set_brightness(RID p_env,bool p_enable,float p_brightness){ + +} +void RasterizerSceneGLES3::environment_set_contrast(RID p_env,bool p_enable,float p_contrast){ + +} +void RasterizerSceneGLES3::environment_set_saturation(RID p_env,bool p_enable,float p_saturation){ + +} +void RasterizerSceneGLES3::environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp){ + +} + + + + RID RasterizerSceneGLES3::light_instance_create(RID p_light) { @@ -493,6 +613,70 @@ void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_g #endif } +void RasterizerSceneGLES3::_draw_skybox(RID p_skybox,CameraMatrix& p_projection,const Transform& p_transform,bool p_vflip,float p_scale) { + + RasterizerStorageGLES3::Texture *tex = storage->texture_owner.getornull(p_skybox); + + ERR_FAIL_COND(!tex); + glActiveTexture(GL_TEXTURE0); + glBindTexture(tex->target,tex->tex_id); + + glDepthMask(GL_TRUE); + glEnable(GL_DEPTH_TEST); + glDisable(GL_CULL_FACE); + glDisable(GL_BLEND); + glColorMask(1,1,1,1); + + float flip_sign = p_vflip?-1:1; + + Vector3 vertices[8]={ + Vector3(-1,-1*flip_sign,0.1), + Vector3( 0, 1, 0), + Vector3( 1,-1*flip_sign,0.1), + Vector3( 1, 1, 0), + Vector3( 1, 1*flip_sign,0.1), + Vector3( 1, 0, 0), + Vector3(-1, 1*flip_sign,0.1), + Vector3( 0, 0, 0), + + }; + + + + //skybox uv vectors + float vw,vh,zn; + p_projection.get_viewport_size(vw,vh); + zn=p_projection.get_z_near(); + + float scale=p_scale; + + for(int i=0;i<4;i++) { + + Vector3 uv=vertices[i*2+1]; + uv.x=(uv.x*2.0-1.0)*vw*scale; + uv.y=-(uv.y*2.0-1.0)*vh*scale; + uv.z=-zn; + vertices[i*2+1] = p_transform.basis.xform(uv).normalized(); + vertices[i*2+1].z = -vertices[i*2+1].z; + } + + glBindBuffer(GL_ARRAY_BUFFER,state.skybox_verts); + glBufferSubData(GL_ARRAY_BUFFER,0,sizeof(Vector3)*8,vertices); + glBindBuffer(GL_ARRAY_BUFFER,0); //unbind + + glBindVertexArray(state.skybox_array); + + storage->shaders.copy.set_conditional(CopyShaderGLES3::USE_CUBEMAP,true); + storage->shaders.copy.bind(); + + glDrawArrays(GL_TRIANGLE_FAN,0,4); + + glBindVertexArray(0); + + storage->shaders.copy.set_conditional(CopyShaderGLES3::USE_CUBEMAP,false); + +} + void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraMatrix& p_cam_projection,bool p_cam_ortogonal,InstanceBase** p_cull_result,int p_cull_count,RID* p_light_cull_result,int p_light_cull_count,RID* p_directional_lights,int p_directional_light_count,RID p_environment){ @@ -561,7 +745,9 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->front.fbo); - if (true) { + Environment *env = environment_owner.getornull(p_environment); + + if (!env || env->bg_mode==VS::ENV_BG_CLEAR_COLOR) { if (storage->frame.clear_request) { @@ -570,6 +756,16 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM storage->frame.clear_request=false; } + } else if (env->bg_mode==VS::ENV_BG_COLOR) { + + + glClearColor( env->bg_color.r, env->bg_color.g, env->bg_color.b, env->bg_color.a ); + glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); + storage->frame.clear_request=false; + } else { + glClear(GL_DEPTH_BUFFER_BIT); + storage->frame.clear_request=false; + } state.current_depth_test=true; @@ -616,6 +812,12 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM _render_list(render_list.elements,render_list.element_count,p_cam_transform,p_cam_projection,false,false); + + if (env && env->bg_mode==VS::ENV_BG_SKYBOX) { + + _draw_skybox(env->skybox_color,p_cam_projection,p_cam_transform,storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP],env->skybox_scale); + } + //_render_list_forward(&alpha_render_list,camera_transform,camera_transform_inverse,camera_projection,false,fragment_lighting,true); //glColorMask(1,1,1,1); @@ -793,6 +995,27 @@ void RasterizerSceneGLES3::initialize() { if (render_list.max_elements<1024) render_list.max_elements=1024; + + + { + //quad buffers + + glGenBuffers(1,&state.skybox_verts); + glBindBuffer(GL_ARRAY_BUFFER,state.skybox_verts); + glBufferData(GL_ARRAY_BUFFER,sizeof(Vector3)*8,NULL,GL_DYNAMIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER,0); //unbind + + + glGenVertexArrays(1,&state.skybox_array); + glBindVertexArray(state.skybox_array); + glBindBuffer(GL_ARRAY_BUFFER,state.skybox_verts); + glVertexAttribPointer(VS::ARRAY_VERTEX,3,GL_FLOAT,GL_FALSE,sizeof(Vector3)*2,0); + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glVertexAttribPointer(VS::ARRAY_TEX_UV,3,GL_FLOAT,GL_FALSE,sizeof(Vector3)*2,((uint8_t*)NULL)+sizeof(Vector3)); + glEnableVertexAttribArray(VS::ARRAY_TEX_UV); + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER,0); //unbind + } render_list.init(); } diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h index 6fba777fc89..53088deb0d7 100644 --- a/drivers/gles3/rasterizer_scene_gles3.h +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -16,8 +16,6 @@ public: RasterizerStorageGLES3 *storage; - - struct State { bool current_depth_test; @@ -40,10 +38,70 @@ public: GLuint scene_ubo; - + GLuint skybox_verts; + GLuint skybox_array; } state; + + + + /* ENVIRONMENT API */ + + struct Environment : public RID_Data { + + VS::EnvironmentBG bg_mode; + + RID skybox_color; + RID skybox_radiance; + RID skybox_irradiance; + float skybox_scale; + + Color bg_color; + float energy; + float skybox_ambient; + + Color ambient_color; + float ambient_anergy; + float ambient_skybox_energy; + + int canvas_max_layer; + + + Environment() { + bg_mode=VS::ENV_BG_CLEAR_COLOR; + skybox_scale=1.0; + energy=1.0; + skybox_ambient=0; + ambient_anergy=1.0; + ambient_skybox_energy=0.0; + canvas_max_layer=0; + } + }; + + RID_Owner environment_owner; + + virtual RID environment_create(); + + virtual void environment_set_background(RID p_env,VS::EnvironmentBG p_bg); + virtual void environment_set_skybox(RID p_env,RID p_skybox,int p_radiance_size,int p_irradiance_size); + virtual void environment_set_skybox_scale(RID p_env,float p_scale); + virtual void environment_set_bg_color(RID p_env,const Color& p_color); + virtual void environment_set_bg_energy(RID p_env,float p_energy); + virtual void environment_set_canvas_max_layer(RID p_env,int p_max_layer); + virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0,float p_skybox_energy=0.0); + + virtual void environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,VS::EnvironmentGlowBlendMode p_blend_mode); + virtual void environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture); + + virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,VS::EnvironmentToneMapper p_tone_mapper); + virtual void environment_set_brightness(RID p_env,bool p_enable,float p_brightness); + virtual void environment_set_contrast(RID p_env,bool p_enable,float p_contrast); + virtual void environment_set_saturation(RID p_env,bool p_enable,float p_saturation); + virtual void environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp); + + /* RENDER LIST */ + struct RenderList { enum { @@ -166,6 +224,8 @@ public: _FORCE_INLINE_ void _add_geometry( RasterizerStorageGLES3::Geometry* p_geometry, InstanceBase *p_instance, RasterizerStorageGLES3::GeometryOwner *p_owner,int p_material); + void _draw_skybox(RID p_skybox, CameraMatrix& p_projection, const Transform& p_transform, bool p_vflip, float p_scale); + virtual void render_scene(const Transform& p_cam_transform,CameraMatrix& p_cam_projection,bool p_cam_ortogonal,InstanceBase** p_cull_result,int p_cull_count,RID* p_light_cull_result,int p_light_cull_count,RID* p_directional_lights,int p_directional_light_count,RID p_environment); virtual bool free(RID p_rid); diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index f633ef21cc8..94ad2afabee 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -995,6 +995,104 @@ void RasterizerStorageGLES3::texture_set_shrink_all_x2_on_set_data(bool p_enable config.shrink_textures_x2=p_enable; } +RID RasterizerStorageGLES3::texture_create_pbr_cubemap(RID p_source,VS::PBRCubeMapMode p_mode,int p_resolution) const { + + Texture * texture = texture_owner.get(p_source); + ERR_FAIL_COND_V(!texture,RID()); + ERR_FAIL_COND_V(!(texture->flags&VS::TEXTURE_FLAG_CUBEMAP),RID()); + + bool use_float=true; + + if (p_resolution<0) { + p_resolution=texture->width; + } + + + glBindVertexArray(0); + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_SCISSOR_TEST); +#ifdef GLEW_ENABLED + glDisable(GL_POINT_SPRITE); + glDisable(GL_VERTEX_PROGRAM_POINT_SIZE); +#endif + glDisable(GL_BLEND); + + + glActiveTexture(GL_TEXTURE1); + glBindTexture(texture->target, texture->tex_id); + + glActiveTexture(GL_TEXTURE0); + GLuint new_cubemap; + glGenTextures(1, &new_cubemap); + + + GLuint tmp_fb; + + glGenFramebuffers(1, &tmp_fb); + glBindFramebuffer(GL_FRAMEBUFFER, tmp_fb); + + + int w = texture->width; + int h = texture->height; + + int lod=0; + + shaders.cubemap_filter.bind(); + + int mipmaps=6; + + int mm_level=mipmaps; + + while(mm_level) { + + for(int i=0;i<6;i++) { + glTexImage2D(_cube_side_enum[i], lod, use_float?GL_RGBA16F:GL_RGB10_A2, w, h, 0, GL_RGBA, use_float?GL_HALF_FLOAT:GL_UNSIGNED_INT_2_10_10_10_REV, NULL); + glTexParameteri(_cube_side_enum[i], GL_TEXTURE_BASE_LEVEL, lod); + glTexParameteri(_cube_side_enum[i], GL_TEXTURE_MAX_LEVEL, lod); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, _cube_side_enum[i], new_cubemap, 0); + + glViewport(0,0,w,h); + glBindVertexArray(resources.quadie_array); + + shaders.cubemap_filter.set_uniform(CubemapFilterShaderGLES3::FACE_ID,i); + shaders.cubemap_filter.set_uniform(CubemapFilterShaderGLES3::ROUGHNESS,lod/float(mipmaps)); + + + glDrawArrays(GL_TRIANGLE_FAN,0,4); + glBindVertexArray(0); +#ifdef DEBUG_ENABLED + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + ERR_CONTINUE(status!=GL_FRAMEBUFFER_COMPLETE); +#endif + } + + + + if (w>1) + w>>=1; + if (h>1) + h>>=1; + + lod++; + mm_level--; + + } + + + for(int i=0;i<6;i++) { + //restore ranges + glTexParameteri(_cube_side_enum[i], GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(_cube_side_enum[i], GL_TEXTURE_MAX_LEVEL, lod); + + } + + glBindFramebuffer(GL_FRAMEBUFFER, config.system_fbo); + glDeleteFramebuffers(1, &tmp_fb); + + + return RID(); +} /* SHADER API */ @@ -3649,6 +3747,45 @@ void RasterizerStorageGLES3::initialize() { #else config.use_rgba_2d_shadows=true; #endif + + + //generic quadie for copying + + { + //quad buffers + + glGenBuffers(1,&resources.quadie); + glBindBuffer(GL_ARRAY_BUFFER,resources.quadie); + { + const float qv[16]={ + -1,-1, + 0, 0, + -1, 1, + 0, 1, + 1, 1, + 1, 1, + 1,-1, + 1, 0, + }; + + glBufferData(GL_ARRAY_BUFFER,sizeof(float)*16,qv,GL_STATIC_DRAW); + } + + glBindBuffer(GL_ARRAY_BUFFER,0); //unbind + + + glGenVertexArrays(1,&resources.quadie_array); + glBindVertexArray(resources.quadie_array); + glBindBuffer(GL_ARRAY_BUFFER,resources.quadie); + glVertexAttribPointer(0,2,GL_FLOAT,GL_FALSE,sizeof(float)*4,0); + glEnableVertexAttribArray(0); + glVertexAttribPointer(1,2,GL_FLOAT,GL_FALSE,sizeof(float)*4,((uint8_t*)NULL)+8); + glEnableVertexAttribArray(1); + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER,0); //unbind + } + + shaders.cubemap_filter.init(); } void RasterizerStorageGLES3::finalize() { diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index 950d65b9d06..c3022b3ac07 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -6,6 +6,7 @@ #include "shader_gles3.h" #include "shaders/copy.glsl.h" #include "shaders/canvas.glsl.h" +#include "shaders/cubemap_filter.glsl.h" #include "self_list.h" #include "shader_compiler_gles3.h" @@ -60,6 +61,8 @@ public: ShaderCompilerGLES3 compiler; + CubemapFilterShaderGLES3 cubemap_filter; + ShaderCompilerGLES3::IdentifierActions actions_canvas; ShaderCompilerGLES3::IdentifierActions actions_scene; } shaders; @@ -70,6 +73,9 @@ public: GLuint black_tex; GLuint normal_tex; + GLuint quadie; + GLuint quadie_array; + } resources; struct Info { @@ -178,6 +184,8 @@ public: virtual void texture_debug_usage(List *r_info); + virtual RID texture_create_pbr_cubemap(RID p_source,VS::PBRCubeMapMode p_mode,int p_resolution=-1) const; + /* SHADER API */ diff --git a/drivers/gles3/shaders/SCsub b/drivers/gles3/shaders/SCsub index 0fa0e3b73a7..afffe103168 100644 --- a/drivers/gles3/shaders/SCsub +++ b/drivers/gles3/shaders/SCsub @@ -5,4 +5,5 @@ if env['BUILDERS'].has_key('GLES3_GLSL'): env.GLES3_GLSL('canvas.glsl'); env.GLES3_GLSL('canvas_shadow.glsl'); env.GLES3_GLSL('scene.glsl'); + env.GLES3_GLSL('cubemap_filter.glsl'); diff --git a/drivers/gles3/shaders/copy.glsl b/drivers/gles3/shaders/copy.glsl index aba280186ac..eb58d664315 100644 --- a/drivers/gles3/shaders/copy.glsl +++ b/drivers/gles3/shaders/copy.glsl @@ -33,20 +33,31 @@ void main() { #ifdef USE_CUBEMAP in vec3 cube_interp; -uniform samplerCube source_cube; +uniform samplerCube source_cube; //texunit:0 #else in vec2 uv_interp; -uniform sampler2D source; +uniform sampler2D source; //texunit:0 #endif + +uniform float stuff; + in vec2 uv2_interp; -layout(location = 0) vec4 frag_color; //color:0 +layout(location = 0) out vec4 frag_color; void main() { //vec4 color = color_interp; +#ifdef USE_CUBEMAP + vec4 color = texture( source_cube, normalize(cube_interp) ); + +#else + vec4 color = texture( source, uv_interp ); +#endif + + frag_color = color; } diff --git a/drivers/gles3/shaders/cubemap_filter.glsl b/drivers/gles3/shaders/cubemap_filter.glsl new file mode 100644 index 00000000000..f450f341133 --- /dev/null +++ b/drivers/gles3/shaders/cubemap_filter.glsl @@ -0,0 +1,143 @@ +[vertex] + + +layout(location=0) in highp vec2 vertex; + +layout(location=1) in highp vec2 uv; + +out highp vec2 uv_interp; + +void main() { + + uv_interp=uv; + gl_Position=vec4(vertex,0,1); +} + +[fragment] + + +uniform samplerCube source_cube; //texunit:1 +uniform int face_id; +uniform float roughness; +in highp vec2 uv_interp; + + +layout(location = 0) vec4 frag_color; + + +vec3 texelCoordToVec(vec2 uv, int faceID) +{ + mat3 faceUvVectors[6]; + + // -x + faceUvVectors[1][0] = vec3(0.0, 0.0, 1.0); // u -> +z + faceUvVectors[1][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[1][2] = vec3(-1.0, 0.0, 0.0); // -x face + + // +x + faceUvVectors[0][0] = vec3(0.0, 0.0, -1.0); // u -> -z + faceUvVectors[0][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[0][2] = vec3(1.0, 0.0, 0.0); // +x face + + // -y + faceUvVectors[3][0] = vec3(1.0, 0.0, 0.0); // u -> +x + faceUvVectors[3][1] = vec3(0.0, 0.0, -1.0); // v -> -z + faceUvVectors[3][2] = vec3(0.0, -1.0, 0.0); // -y face + + // +y + faceUvVectors[2][0] = vec3(1.0, 0.0, 0.0); // u -> +x + faceUvVectors[2][1] = vec3(0.0, 0.0, 1.0); // v -> +z + faceUvVectors[2][2] = vec3(0.0, 1.0, 0.0); // +y face + + // -z + faceUvVectors[5][0] = vec3(-1.0, 0.0, 0.0); // u -> -x + faceUvVectors[5][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[5][2] = vec3(0.0, 0.0, -1.0); // -z face + + // +z + faceUvVectors[4][0] = vec3(1.0, 0.0, 0.0); // u -> +x + faceUvVectors[4][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[4][2] = vec3(0.0, 0.0, 1.0); // +z face + + // out = u * s_faceUv[0] + v * s_faceUv[1] + s_faceUv[2]. + vec3 result = (faceUvVectors[faceID][0] * uv.x) + (faceUvVectors[faceID][1] * uv.y) + faceUvVectors[faceID][2]; + return normalize(result); +} + +vec3 ImportanceSampleGGX(vec2 Xi, float Roughness, vec3 N) +{ + float a = Roughness * Roughness; // DISNEY'S ROUGHNESS [see Burley'12 siggraph] + + // Compute distribution direction + float Phi = 2.0 * M_PI * Xi.x; + float CosTheta = sqrt((1.0 - Xi.y) / (1.0 + (a*a - 1.0) * Xi.y)); + float SinTheta = sqrt(1.0 - CosTheta * CosTheta); + + // Convert to spherical direction + vec3 H; + H.x = SinTheta * cos(Phi); + H.y = SinTheta * sin(Phi); + H.z = CosTheta; + + vec3 UpVector = abs(N.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0); + vec3 TangentX = normalize(cross(UpVector, N)); + vec3 TangentY = cross(N, TangentX); + + // Tangent to world space + return TangentX * H.x + TangentY * H.y + N * H.z; +} + +// http://graphicrants.blogspot.com.au/2013/08/specular-brdf-reference.html +float GGX(float NdotV, float a) +{ + float k = a / 2.0; + return NdotV / (NdotV * (1.0 - k) + k); +} + +// http://graphicrants.blogspot.com.au/2013/08/specular-brdf-reference.html +float G_Smith(float a, float nDotV, float nDotL) +{ + return GGX(nDotL, a * a) * GGX(nDotV, a * a); +} + +float radicalInverse_VdC(uint bits) { + bits = (bits << 16u) | (bits >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + return float(bits) * 2.3283064365386963e-10; // / 0x100000000 +} + +vec2 Hammersley(uint i, uint N) { + return vec2(float(i)/float(N), radicalInverse_VdC(i)); +} + +#define SAMPLE_COUNT 1024 + +void main() { + + vec2 uv = (uv_interp * 2.0) - 1.0; + vec3 N = texelCoordToVec(uv, face_id); + + //vec4 color = color_interp; + vec4 sum = vec4(0.0, 0.0, 0.0, 0.0); + + for(int sampleNum = 0; sampleNum < SAMPLE_COUNT; sampleNum++) { + vec2 xi = Hammersley(sampleNum, SAMPLE_COUNT); + vec2 xi = texture2DLod(Texture0, vec2(float(sampleNum) / float(SAMPLE_COUNT), 0.5), 0.0).xy; + + vec3 H = ImportanceSampleGGX( xi, roughness, N ); + vec3 V = N; + vec3 L = normalize(2.0 * dot( V, H ) * H - V); + + float ndotl = max(0.0, dot(N, L)); + vec3 s = textureCubeLod(u_skyCube, H, 0.0).rgb * ndotl; + + sum += vec4(s, 1.0); + } + sum /= sum.w; + + frag_color = vec4(sum.rgb, 1.0); +} + diff --git a/scene/resources/environment.cpp b/scene/resources/environment.cpp index 6fecdef59d1..85d16da9151 100644 --- a/scene/resources/environment.cpp +++ b/scene/resources/environment.cpp @@ -28,20 +28,193 @@ /*************************************************************************/ #include "environment.h" #include "texture.h" - +#include "globals.h" +#include "servers/visual_server.h" RID Environment::get_rid() const { return environment; } + +void Environment::set_background(BGMode p_bg) { + + bg_mode=p_bg; + VS::get_singleton()->environment_set_background(environment,VS::EnvironmentBG(p_bg)); + _change_notify(); +} + +void Environment::set_skybox(const Ref& p_skybox){ + + bg_skybox=p_skybox; + + RID sb_rid; + if (bg_skybox.is_valid()) + sb_rid=bg_skybox->get_rid(); + print_line("skybox valid: "+itos(sb_rid.is_valid())); + + VS::get_singleton()->environment_set_skybox(environment,sb_rid,Globals::get_singleton()->get("rendering/skybox/radiance_cube_resolution"),Globals::get_singleton()->get("rendering/skybox/iradiance_cube_resolution")); +} + +void Environment::set_skybox_scale(float p_scale) { + + bg_skybox_scale=p_scale; + VS::get_singleton()->environment_set_skybox_scale(environment,p_scale); +} + +void Environment::set_bg_color(const Color& p_color){ + + bg_color=p_color; + VS::get_singleton()->environment_set_bg_color(environment,p_color); +} +void Environment::set_bg_energy(float p_energy){ + + bg_energy=p_energy; + VS::get_singleton()->environment_set_bg_energy(environment,p_energy); +} +void Environment::set_canvas_max_layer(int p_max_layer){ + + bg_canvas_max_layer=p_max_layer; + VS::get_singleton()->environment_set_canvas_max_layer(environment,p_max_layer); +} +void Environment::set_ambient_light_color(const Color& p_color){ + + ambient_color=p_color; + VS::get_singleton()->environment_set_ambient_light(environment,ambient_color,ambient_energy,ambient_skybox_energy); +} +void Environment::set_ambient_light_energy(float p_energy){ + + ambient_energy=p_energy; + VS::get_singleton()->environment_set_ambient_light(environment,ambient_color,ambient_energy,ambient_skybox_energy); +} +void Environment::set_ambient_light_skybox_energy(float p_energy){ + + ambient_skybox_energy=p_energy; + VS::get_singleton()->environment_set_ambient_light(environment,ambient_color,ambient_energy,ambient_skybox_energy); +} + +Environment::BGMode Environment::get_background() const{ + + return bg_mode; +} +Ref Environment::get_skybox() const{ + + return bg_skybox; +} + +float Environment::get_skybox_scale() const { + + return bg_skybox_scale; +} + +Color Environment::get_bg_color() const{ + + return bg_color; +} +float Environment::get_bg_energy() const{ + + return bg_energy; +} +int Environment::get_canvas_max_layer() const{ + + return bg_canvas_max_layer; +} +Color Environment::get_ambient_light_color() const{ + + return ambient_color; +} +float Environment::get_ambient_light_energy() const{ + + return ambient_energy; +} +float Environment::get_ambient_light_skybox_energy() const{ + + return ambient_skybox_energy; +} + + + +void Environment::_validate_property(PropertyInfo& property) const { + + if (property.name=="background/skybox" || property.name=="ambient_light/skybox_energy") { + if (bg_mode!=BG_SKYBOX) { + property.usage=PROPERTY_USAGE_NOEDITOR; + } + } + + if (property.name=="background/color") { + if (bg_mode!=BG_COLOR) { + property.usage=PROPERTY_USAGE_NOEDITOR; + } + } + + if (property.name=="background/canvas_max_layer") { + if (bg_mode!=BG_CANVAS) { + property.usage=PROPERTY_USAGE_NOEDITOR; + } + } + +} + void Environment::_bind_methods() { + ObjectTypeDB::bind_method(_MD("set_background","mode"),&Environment::set_background); + ObjectTypeDB::bind_method(_MD("set_skybox","skybox:CubeMap"),&Environment::set_skybox); + ObjectTypeDB::bind_method(_MD("set_skybox_scale","scale"),&Environment::set_skybox_scale); + ObjectTypeDB::bind_method(_MD("set_bg_color","color"),&Environment::set_bg_color); + ObjectTypeDB::bind_method(_MD("set_bg_energy","energy"),&Environment::set_bg_energy); + ObjectTypeDB::bind_method(_MD("set_canvas_max_layer","layer"),&Environment::set_canvas_max_layer); + ObjectTypeDB::bind_method(_MD("set_ambient_light_color","color"),&Environment::set_ambient_light_color); + ObjectTypeDB::bind_method(_MD("set_ambient_light_energy","energy"),&Environment::set_ambient_light_energy); + ObjectTypeDB::bind_method(_MD("set_ambient_light_skybox_energy","energy"),&Environment::set_ambient_light_skybox_energy); + + + ObjectTypeDB::bind_method(_MD("get_background"),&Environment::get_background); + ObjectTypeDB::bind_method(_MD("get_skybox:CubeMap"),&Environment::get_skybox); + ObjectTypeDB::bind_method(_MD("get_skybox_scale"),&Environment::get_skybox_scale); + ObjectTypeDB::bind_method(_MD("get_bg_color"),&Environment::get_bg_color); + ObjectTypeDB::bind_method(_MD("get_bg_energy"),&Environment::get_bg_energy); + ObjectTypeDB::bind_method(_MD("get_canvas_max_layer"),&Environment::get_canvas_max_layer); + ObjectTypeDB::bind_method(_MD("get_ambient_light_color"),&Environment::get_ambient_light_color); + ObjectTypeDB::bind_method(_MD("get_ambient_light_energy"),&Environment::get_ambient_light_energy); + ObjectTypeDB::bind_method(_MD("get_ambient_light_skybox_energy"),&Environment::get_ambient_light_skybox_energy); + + + ADD_PROPERTY(PropertyInfo(Variant::INT,"background/mode",PROPERTY_HINT_ENUM,"Clear Color,Custom Color,Skybox,Canvas,Keep"),_SCS("set_background"),_SCS("get_background") ); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT,"background/skybox",PROPERTY_HINT_RESOURCE_TYPE,"CubeMap"),_SCS("set_skybox"),_SCS("get_skybox") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"background/skybox_scale",PROPERTY_HINT_RANGE,"0,32,0.01"),_SCS("set_skybox_scale"),_SCS("get_skybox_scale") ); + ADD_PROPERTY(PropertyInfo(Variant::COLOR,"background/color"),_SCS("set_bg_color"),_SCS("get_bg_color") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"background/energy",PROPERTY_HINT_RANGE,"0,16,0.01"),_SCS("set_bg_energy"),_SCS("get_bg_energy") ); + ADD_PROPERTY(PropertyInfo(Variant::INT,"background/canvas_max_layer",PROPERTY_HINT_RANGE,"-1000,1000,1"),_SCS("set_canvas_max_layer"),_SCS("get_canvas_max_layer") ); + ADD_PROPERTY(PropertyInfo(Variant::COLOR,"ambient_light/color"),_SCS("set_ambient_light_color"),_SCS("get_ambient_light_color") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"ambient_light/energy",PROPERTY_HINT_RANGE,"0,16,0.01"),_SCS("set_ambient_light_energy"),_SCS("get_ambient_light_energy") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"ambient_light/skybox_energy",PROPERTY_HINT_RANGE,"0,16,0.01"),_SCS("set_ambient_light_skybox_energy"),_SCS("get_ambient_light_skybox_energy") ); + + GLOBAL_DEF("rendering/skybox/irradiance_cube_resolution",256); + GLOBAL_DEF("rendering/skybox/radiance_cube_resolution",64); + + BIND_CONSTANT(BG_KEEP); + BIND_CONSTANT(BG_CLEAR_COLOR); + BIND_CONSTANT(BG_COLOR); + BIND_CONSTANT(BG_SKYBOX); + BIND_CONSTANT(BG_CANVAS); + BIND_CONSTANT(BG_MAX); + BIND_CONSTANT(GLOW_BLEND_MODE_ADDITIVE); + BIND_CONSTANT(GLOW_BLEND_MODE_SCREEN); + BIND_CONSTANT(GLOW_BLEND_MODE_SOFTLIGHT); + BIND_CONSTANT(GLOW_BLEND_MODE_DISABLED); } Environment::Environment() { + bg_mode=BG_CLEAR_COLOR; + bg_energy=1.0; + bg_canvas_max_layer=0; + ambient_energy=1.0; + ambient_skybox_energy=0; + + environment = VS::get_singleton()->environment_create(); } diff --git a/scene/resources/environment.h b/scene/resources/environment.h index f8ea0cb36f0..0435ffc6cb8 100644 --- a/scene/resources/environment.h +++ b/scene/resources/environment.h @@ -31,28 +31,80 @@ #include "resource.h" #include "servers/visual_server.h" +#include "scene/resources/texture.h" class Environment : public Resource { OBJ_TYPE(Environment,Resource); public: + enum BGMode { + + BG_CLEAR_COLOR, + BG_COLOR, + BG_SKYBOX, + BG_CANVAS, + BG_KEEP, + BG_MAX + }; + + enum GlowBlendMode { + GLOW_BLEND_MODE_ADDITIVE, + GLOW_BLEND_MODE_SCREEN, + GLOW_BLEND_MODE_SOFTLIGHT, + GLOW_BLEND_MODE_DISABLED, + }; private: - - RID environment; + BGMode bg_mode; + Ref bg_skybox; + float bg_skybox_scale; + Color bg_color; + float bg_energy; + int bg_canvas_max_layer; + Color ambient_color; + float ambient_energy; + float ambient_skybox_energy; + protected: static void _bind_methods(); + virtual void _validate_property(PropertyInfo& property) const; + public: + + void set_background(BGMode p_bg); + void set_skybox(const Ref& p_skybox); + void set_skybox_scale(float p_scale); + void set_bg_color(const Color& p_color); + void set_bg_energy(float p_energy); + void set_canvas_max_layer(int p_max_layer); + void set_ambient_light_color(const Color& p_color); + void set_ambient_light_energy(float p_energy); + void set_ambient_light_skybox_energy(float p_energy); + + BGMode get_background() const; + Ref get_skybox() const; + float get_skybox_scale() const; + Color get_bg_color() const; + float get_bg_energy() const; + int get_canvas_max_layer() const; + Color get_ambient_light_color() const; + float get_ambient_light_energy() const; + float get_ambient_light_skybox_energy() const; + + virtual RID get_rid() const; Environment(); ~Environment(); }; +VARIANT_ENUM_CAST(Environment::BGMode) +VARIANT_ENUM_CAST(Environment::GlowBlendMode) + #endif // ENVIRONMENT_H diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h index d3f27687caf..81cdcf6a25e 100644 --- a/servers/visual/rasterizer.h +++ b/servers/visual/rasterizer.h @@ -39,6 +39,26 @@ class RasterizerScene { public: + /* ENVIRONMENT API */ + + virtual RID environment_create()=0; + + virtual void environment_set_background(RID p_env,VS::EnvironmentBG p_bg)=0; + virtual void environment_set_skybox(RID p_env,RID p_skybox,int p_radiance_size,int p_irradiance_size)=0; + virtual void environment_set_skybox_scale(RID p_env,float p_scale)=0; + virtual void environment_set_bg_color(RID p_env,const Color& p_color)=0; + virtual void environment_set_bg_energy(RID p_env,float p_energy)=0; + virtual void environment_set_canvas_max_layer(RID p_env,int p_max_layer)=0; + virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0,float p_skybox_energy=0.0)=0; + + virtual void environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,VS::EnvironmentGlowBlendMode p_blend_mode)=0; + virtual void environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture)=0; + + virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,VS::EnvironmentToneMapper p_tone_mapper)=0; + virtual void environment_set_brightness(RID p_env,bool p_enable,float p_brightness)=0; + virtual void environment_set_contrast(RID p_env,bool p_enable,float p_contrast)=0; + virtual void environment_set_saturation(RID p_env,bool p_enable,float p_saturation)=0; + virtual void environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp)=0; struct InstanceBase : RID_Data { @@ -126,6 +146,7 @@ public: virtual void texture_debug_usage(List *r_info)=0; + virtual RID texture_create_pbr_cubemap(RID p_source,VS::PBRCubeMapMode p_mode,int p_resolution=-1) const=0; /* SHADER API */ diff --git a/servers/visual/visual_server_raster.h b/servers/visual/visual_server_raster.h index 62d45f8206d..a2502e89f9d 100644 --- a/servers/visual/visual_server_raster.h +++ b/servers/visual/visual_server_raster.h @@ -620,6 +620,7 @@ public: BIND1RC(uint32_t,texture_get_width,RID) BIND1RC(uint32_t,texture_get_height,RID) BIND3(texture_set_size_override,RID,int,int) + BIND3RC(RID,texture_create_pbr_cubemap,RID,PBRCubeMapMode,int) @@ -848,15 +849,17 @@ public: #undef BINDBASE //from now on, calls forwarded to this singleton -#define BINDBASE VSG::scene +#define BINDBASE VSG::scene_render BIND0R(RID,environment_create) BIND2(environment_set_background,RID ,EnvironmentBG ) - BIND3(environment_set_skybox,RID,RID ,float ) + BIND4(environment_set_skybox,RID,RID ,int,int ) + BIND2(environment_set_skybox_scale,RID,float) BIND2(environment_set_bg_color,RID,const Color& ) + BIND2(environment_set_bg_energy,RID,float ) BIND2(environment_set_canvas_max_layer,RID,int ) - BIND3(environment_set_ambient_light,RID,const Color& ,float ) + BIND4(environment_set_ambient_light,RID,const Color& ,float,float ) BIND7(environment_set_glow,RID,bool ,int ,float ,float ,float ,EnvironmentGlowBlendMode ) BIND5(environment_set_fog,RID,bool ,float ,float ,RID ) @@ -871,6 +874,9 @@ public: /* SCENARIO API */ +#undef BINDBASE +#define BINDBASE VSG::scene + BIND0R(RID,scenario_create) BIND2(scenario_set_debug,RID,ScenarioDebugMode ) diff --git a/servers/visual/visual_server_scene.cpp b/servers/visual/visual_server_scene.cpp index a3b1d76fb8e..8fdf3d160d0 100644 --- a/servers/visual/visual_server_scene.cpp +++ b/servers/visual/visual_server_scene.cpp @@ -69,54 +69,6 @@ void VisualServerScene::camera_set_use_vertical_aspect(RID p_camera,bool p_enabl } - -/* ENVIRONMENT API */ - -RID VisualServerScene::environment_create(){ - - return RID(); -} - -void VisualServerScene::environment_set_background(RID p_env,VS::EnvironmentBG p_bg){ - -} -void VisualServerScene::environment_set_skybox(RID p_env,RID p_skybox,float p_energy){ - -} -void VisualServerScene::environment_set_bg_color(RID p_env,const Color& p_color){ - -} -void VisualServerScene::environment_set_canvas_max_layer(RID p_env,int p_max_layer){ - -} -void VisualServerScene::environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy){ - -} - -void VisualServerScene::environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,VS::EnvironmentGlowBlendMode p_blend_mode){ - -} -void VisualServerScene::environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture){ - -} - -void VisualServerScene::environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,VS::EnvironmentToneMapper p_tone_mapper){ - -} -void VisualServerScene::environment_set_brightness(RID p_env,bool p_enable,float p_brightness){ - -} -void VisualServerScene::environment_set_contrast(RID p_env,bool p_enable,float p_contrast){ - -} -void VisualServerScene::environment_set_saturation(RID p_env,bool p_enable,float p_saturation){ - -} -void VisualServerScene::environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp){ - -} - - /* SCENARIO API */ diff --git a/servers/visual/visual_server_scene.h b/servers/visual/visual_server_scene.h index ceec4af5989..515d52db7bc 100644 --- a/servers/visual/visual_server_scene.h +++ b/servers/visual/visual_server_scene.h @@ -139,25 +139,6 @@ public: }; */ - /* ENVIRONMENT API */ - - virtual RID environment_create(); - - virtual void environment_set_background(RID p_env,VS::EnvironmentBG p_bg); - virtual void environment_set_skybox(RID p_env,RID p_skybox,float p_energy=1.0); - virtual void environment_set_bg_color(RID p_env,const Color& p_color); - virtual void environment_set_canvas_max_layer(RID p_env,int p_max_layer); - virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0); - - virtual void environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,VS::EnvironmentGlowBlendMode p_blend_mode); - virtual void environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture); - - virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,VS::EnvironmentToneMapper p_tone_mapper); - virtual void environment_set_brightness(RID p_env,bool p_enable,float p_brightness); - virtual void environment_set_contrast(RID p_env,bool p_enable,float p_contrast); - virtual void environment_set_saturation(RID p_env,bool p_enable,float p_saturation); - virtual void environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp); - /* SCENARIO API */ diff --git a/servers/visual_server.h b/servers/visual_server.h index eb20355efba..11828c7d265 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -125,6 +125,13 @@ public: virtual void texture_set_shrink_all_x2_on_set_data(bool p_enable)=0; + enum PBRCubeMapMode { + PBR_CUBEMAP_RADIANCE, + PBR_CUBEMAP_IRRADIANCE, + }; + + virtual RID texture_create_pbr_cubemap(RID p_source,PBRCubeMapMode p_mode,int p_resolution=-1) const=0; + struct TextureInfo { RID texture; Size2 size; @@ -478,18 +485,21 @@ public: enum EnvironmentBG { - ENV_BG_KEEP, + ENV_BG_CLEAR_COLOR, ENV_BG_COLOR, ENV_BG_SKYBOX, ENV_BG_CANVAS, + ENV_BG_KEEP, ENV_BG_MAX }; virtual void environment_set_background(RID p_env,EnvironmentBG p_bg)=0; - virtual void environment_set_skybox(RID p_env,RID p_skybox,float p_energy=1.0)=0; + virtual void environment_set_skybox(RID p_env,RID p_skybox,int p_radiance_size,int p_irradiance_size)=0; + virtual void environment_set_skybox_scale(RID p_env,float p_scale)=0; virtual void environment_set_bg_color(RID p_env,const Color& p_color)=0; + virtual void environment_set_bg_energy(RID p_env,float p_energy)=0; virtual void environment_set_canvas_max_layer(RID p_env,int p_max_layer)=0; - virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0)=0; + virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0,float p_skybox_energy=0.0)=0; //set default SSAO options //set default SSR options From 6a4b4c7db45281ecbaeee3f144f77d72858f53d4 Mon Sep 17 00:00:00 2001 From: "Henrique L. Alves" Date: Wed, 12 Oct 2016 18:43:59 -0300 Subject: [PATCH 014/223] Added small modification on parser for '+' --- core/variant.h | 1 + core/variant_op.cpp | 54 ++++++++++++++++++++++++++++++++-- modules/gdscript/gd_parser.cpp | 7 +++-- modules/gdscript/gd_parser.h | 1 + 4 files changed, 59 insertions(+), 4 deletions(-) diff --git a/core/variant.h b/core/variant.h index 90be593bd97..ccb8c5e3dfd 100644 --- a/core/variant.h +++ b/core/variant.h @@ -336,6 +336,7 @@ public: OP_MULTIPLY, OP_DIVIDE, OP_NEGATE, + OP_POSITIVE, OP_MODULE, OP_STRING_CONCAT, //bitwise diff --git a/core/variant_op.cpp b/core/variant_op.cpp index fd64b58bd53..73ed85db127 100644 --- a/core/variant_op.cpp +++ b/core/variant_op.cpp @@ -97,6 +97,12 @@ case m_name: {\ _RETURN( -p_a._data.m_type);\ }; +#define DEFAULT_OP_NUM_POS(m_name,m_type)\ +case m_name: {\ +\ + _RETURN( p_a._data.m_type);\ +}; + #define DEFAULT_OP_NUM_VEC(m_op,m_name,m_type)\ case m_name: {\ switch(p_b.type) {\ @@ -136,6 +142,10 @@ case m_name: {\ _RETURN( -*reinterpret_cast(p_a._data._mem));\ } +#define DEFAULT_OP_LOCALMEM_POS(m_name,m_type)\ +case m_name: {\ + _RETURN( *reinterpret_cast(p_a._data._mem));\ +} #define DEFAULT_OP_LOCALMEM_NUM(m_op,m_name,m_type)\ case m_name: {switch(p_b.type) {\ @@ -740,6 +750,48 @@ void Variant::evaluate(const Operator& p_op, const Variant& p_a, const Variant& } } break; + case OP_POSITIVE: { + // Simple case when user defines variable as +value. + switch(p_a.type) { + + DEFAULT_OP_FAIL(NIL); + DEFAULT_OP_FAIL(STRING); + DEFAULT_OP_FAIL(RECT2); + DEFAULT_OP_FAIL(MATRIX32); + DEFAULT_OP_FAIL(_AABB); + DEFAULT_OP_FAIL(MATRIX3); + DEFAULT_OP_FAIL(TRANSFORM); + DEFAULT_OP_NUM_POS(BOOL,_bool); + DEFAULT_OP_NUM_POS(INT,_int); + DEFAULT_OP_NUM_POS(REAL,_real); + DEFAULT_OP_LOCALMEM_POS(VECTOR3,Vector3); + DEFAULT_OP_LOCALMEM_POS(PLANE,Plane); + DEFAULT_OP_LOCALMEM_POS(QUAT,Quat); + DEFAULT_OP_LOCALMEM_POS(VECTOR2,Vector2); + + DEFAULT_OP_FAIL(COLOR); + DEFAULT_OP_FAIL(IMAGE); + DEFAULT_OP_FAIL(NODE_PATH); + DEFAULT_OP_FAIL(_RID); + DEFAULT_OP_FAIL(OBJECT); + DEFAULT_OP_FAIL(INPUT_EVENT); + DEFAULT_OP_FAIL(DICTIONARY); + DEFAULT_OP_FAIL(ARRAY); + DEFAULT_OP_FAIL(RAW_ARRAY); + DEFAULT_OP_FAIL(INT_ARRAY); + DEFAULT_OP_FAIL(REAL_ARRAY); + DEFAULT_OP_FAIL(STRING_ARRAY); + DEFAULT_OP_FAIL(VECTOR2_ARRAY); + DEFAULT_OP_FAIL(VECTOR3_ARRAY); + DEFAULT_OP_FAIL(COLOR_ARRAY); + case VARIANT_MAX: { + r_valid=false; + return; + + } break; + + } + } break; case OP_NEGATE: { switch(p_a.type) { @@ -778,9 +830,7 @@ void Variant::evaluate(const Operator& p_op, const Variant& p_a, const Variant& return; } break; - } - } break; case OP_MODULE: { if (p_a.type==INT && p_b.type==INT) { diff --git a/modules/gdscript/gd_parser.cpp b/modules/gdscript/gd_parser.cpp index e2d284ae022..80d77b6f1b6 100644 --- a/modules/gdscript/gd_parser.cpp +++ b/modules/gdscript/gd_parser.cpp @@ -544,14 +544,15 @@ GDParser::Node* GDParser::_parse_expression(Node *p_parent,bool p_static,bool p_ expr = id; } - } else if (/*tokenizer->get_token()==GDTokenizer::TK_OP_ADD ||*/ tokenizer->get_token()==GDTokenizer::TK_OP_SUB || tokenizer->get_token()==GDTokenizer::TK_OP_NOT || tokenizer->get_token()==GDTokenizer::TK_OP_BIT_INVERT) { + } else if (tokenizer->get_token()==GDTokenizer::TK_OP_ADD || tokenizer->get_token()==GDTokenizer::TK_OP_SUB || tokenizer->get_token()==GDTokenizer::TK_OP_NOT || tokenizer->get_token()==GDTokenizer::TK_OP_BIT_INVERT) { - //single prefix operators like !expr -expr ++expr --expr + //single prefix operators like !expr +expr -expr ++expr --expr alloc_node(); Expression e; e.is_op=true; switch(tokenizer->get_token()) { + case GDTokenizer::TK_OP_ADD: e.op=OperatorNode::OP_POS; break; case GDTokenizer::TK_OP_SUB: e.op=OperatorNode::OP_NEG; break; case GDTokenizer::TK_OP_NOT: e.op=OperatorNode::OP_NOT; break; case GDTokenizer::TK_OP_BIT_INVERT: e.op=OperatorNode::OP_BIT_INVERT;; break; @@ -999,6 +1000,7 @@ GDParser::Node* GDParser::_parse_expression(Node *p_parent,bool p_static,bool p_ case OperatorNode::OP_BIT_INVERT: priority=0; unary=true; break; case OperatorNode::OP_NEG: priority=1; unary=true; break; + case OperatorNode::OP_POS: priority=1; unary=true; break; case OperatorNode::OP_MUL: priority=2; break; case OperatorNode::OP_DIV: priority=2; break; @@ -1516,6 +1518,7 @@ GDParser::Node* GDParser::_reduce_expression(Node *p_node,bool p_to_const) { //unary operators case OperatorNode::OP_NEG: { _REDUCE_UNARY(Variant::OP_NEGATE); } break; + case OperatorNode::OP_POS: { _REDUCE_UNARY(Variant::OP_POSITIVE); } break; case OperatorNode::OP_NOT: { _REDUCE_UNARY(Variant::OP_NOT); } break; case OperatorNode::OP_BIT_INVERT: { _REDUCE_UNARY(Variant::OP_BIT_NEGATE); } break; //binary operators (in precedence order) diff --git a/modules/gdscript/gd_parser.h b/modules/gdscript/gd_parser.h index 75653e09160..000fb702959 100644 --- a/modules/gdscript/gd_parser.h +++ b/modules/gdscript/gd_parser.h @@ -209,6 +209,7 @@ public: OP_INDEX_NAMED, //unary operators OP_NEG, + OP_POS, OP_NOT, OP_BIT_INVERT, OP_PREINC, From d710b265f8c9c94f3315d2d2ae2267c7437eb179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82a=C5=BCej=20Szczygie=C5=82?= Date: Mon, 17 Oct 2016 17:14:07 +0200 Subject: [PATCH 015/223] Add WebM module Use already existing libraries: libvorbis and libopus. Also use newly added libraries: libvpx, libwebm, libsimplewebm. --- SConstruct | 1 + modules/theora/yuv2rgb.h | 6 +- modules/webm/SCsub | 34 +++ modules/webm/config.py | 6 + modules/webm/libvpx/SCsub | 390 ++++++++++++++++++++++++ modules/webm/libvpx/yasm_osx_fat.py | 38 +++ modules/webm/register_types.cpp | 45 +++ modules/webm/register_types.h | 30 ++ modules/webm/video_stream_webm.cpp | 446 ++++++++++++++++++++++++++++ modules/webm/video_stream_webm.h | 128 ++++++++ platform/x11/detect.py | 3 + 11 files changed, 1124 insertions(+), 3 deletions(-) create mode 100644 modules/webm/SCsub create mode 100644 modules/webm/config.py create mode 100644 modules/webm/libvpx/SCsub create mode 100644 modules/webm/libvpx/yasm_osx_fat.py create mode 100644 modules/webm/register_types.cpp create mode 100644 modules/webm/register_types.h create mode 100644 modules/webm/video_stream_webm.cpp create mode 100644 modules/webm/video_stream_webm.h diff --git a/SConstruct b/SConstruct index 14aba28818c..32ef884c49e 100644 --- a/SConstruct +++ b/SConstruct @@ -121,6 +121,7 @@ opts.Add('gdscript','Build GDSCript support: (yes/no)','yes') opts.Add('libogg','Ogg library for ogg container support (system/builtin)','builtin') opts.Add('libvorbis','Ogg Vorbis library for vorbis support (system/builtin)','builtin') opts.Add('libtheora','Theora library for theora module (system/builtin)','builtin') +opts.Add('libvpx','VPX library for webm module (system/builtin)','builtin') opts.Add('opus','Opus and opusfile library for Opus format support: (system/builtin)','builtin') opts.Add('minizip','Build Minizip Archive Support: (yes/no)','yes') opts.Add('squish','Squish library for BC Texture Compression in editor (system/builtin)','builtin') diff --git a/modules/theora/yuv2rgb.h b/modules/theora/yuv2rgb.h index 59101bd0570..431b47e651c 100644 --- a/modules/theora/yuv2rgb.h +++ b/modules/theora/yuv2rgb.h @@ -801,7 +801,7 @@ do { \ *(DSTPTR)++ = 255; \ } while (0 == 1) -void yuv422_2_rgb8888(uint8_t *dst_ptr, +static void yuv422_2_rgb8888(uint8_t *dst_ptr, const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr, @@ -912,7 +912,7 @@ do { \ (DSTPTR) = 0xFF000000 | (Y & 0xFF) | (0xFF00 & (Y>>14)) | (0xFF0000 & (Y<<5));\ } while (0 == 1) -void yuv420_2_rgb8888(uint8_t *dst_ptr_, +static void yuv420_2_rgb8888(uint8_t *dst_ptr_, const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr, @@ -1034,7 +1034,7 @@ do { \ *(DSTPTR)++ = 255; \ } while (0 == 1) -void yuv444_2_rgb8888(uint8_t *dst_ptr, +static void yuv444_2_rgb8888(uint8_t *dst_ptr, const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr, diff --git a/modules/webm/SCsub b/modules/webm/SCsub new file mode 100644 index 00000000000..fbd936a2c39 --- /dev/null +++ b/modules/webm/SCsub @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +Import('env') +Import('env_modules') + +env_webm = env_modules.Clone() + +# Thirdparty source files +thirdparty_libsimplewebm_dir = "#thirdparty/libsimplewebm/" +thirdparty_libsimplewebm_sources = [ + "libwebm/mkvparser/mkvparser.cc", + "OpusVorbisDecoder.cpp", + "VPXDecoder.cpp", + "WebMDemuxer.cpp", +] +thirdparty_libsimplewebm_sources = [thirdparty_libsimplewebm_dir + file for file in thirdparty_libsimplewebm_sources] + +env_webm.add_source_files(env.modules_sources, thirdparty_libsimplewebm_sources) +env_webm.Append(CPPPATH = [thirdparty_libsimplewebm_dir, thirdparty_libsimplewebm_dir + "libwebm/"]) + +# also requires libogg, libvorbis and libopus +if (env["libogg"] != "system"): # builtin + env_webm.Append(CPPPATH = ["#thirdparty/libogg"]) +if (env["libvorbis"] != "system"): # builtin + env_webm.Append(CPPPATH = ["#thirdparty/libvorbis"]) +if (env["opus"] != "system"): # builtin + env_webm.Append(CPPPATH = ["#thirdparty"]) + +if (env["libvpx"] != "system"): # builtin + Export('env_webm') + SConscript("libvpx/SCsub") + +# Godot source files +env_webm.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/webm/config.py b/modules/webm/config.py new file mode 100644 index 00000000000..368e97e152c --- /dev/null +++ b/modules/webm/config.py @@ -0,0 +1,6 @@ + +def can_build(platform): + return True + +def configure(env): + pass diff --git a/modules/webm/libvpx/SCsub b/modules/webm/libvpx/SCsub new file mode 100644 index 00000000000..706db972065 --- /dev/null +++ b/modules/webm/libvpx/SCsub @@ -0,0 +1,390 @@ +#!/usr/bin/env python + +libvpx_dir = "#thirdparty/libvpx/" + +libvpx_sources = [ + "vp8/vp8_dx_iface.c", + + "vp8/common/generic/systemdependent.c", + + "vp8/common/alloccommon.c", + "vp8/common/blockd.c", + "vp8/common/copy_c.c", + "vp8/common/debugmodes.c", + "vp8/common/dequantize.c", + "vp8/common/entropy.c", + "vp8/common/entropymode.c", + "vp8/common/entropymv.c", + "vp8/common/extend.c", + "vp8/common/filter.c", + "vp8/common/findnearmv.c", + "vp8/common/idct_blk.c", + "vp8/common/idctllm.c", + "vp8/common/loopfilter_filters.c", + "vp8/common/mbpitch.c", + "vp8/common/modecont.c", + "vp8/common/quant_common.c", + "vp8/common/reconinter.c", + "vp8/common/reconintra.c", + "vp8/common/reconintra4x4.c", + "vp8/common/rtcd.c", + "vp8/common/setupintrarecon.c", + "vp8/common/swapyv12buffer.c", + "vp8/common/treecoder.c", + "vp8/common/vp8_loopfilter.c", + + "vp8/decoder/dboolhuff.c", + "vp8/decoder/decodeframe.c", + "vp8/decoder/decodemv.c", + "vp8/decoder/detokenize.c", + "vp8/decoder/onyxd_if.c", + "vp8/decoder/threading.c", + + + "vp9/vp9_dx_iface.c", + + "vp9/common/vp9_alloccommon.c", + "vp9/common/vp9_blockd.c", + "vp9/common/vp9_common_data.c", + "vp9/common/vp9_debugmodes.c", + "vp9/common/vp9_entropy.c", + "vp9/common/vp9_entropymode.c", + "vp9/common/vp9_entropymv.c", + "vp9/common/vp9_filter.c", + "vp9/common/vp9_frame_buffers.c", + "vp9/common/vp9_idct.c", + "vp9/common/vp9_loopfilter.c", + "vp9/common/vp9_mvref_common.c", + "vp9/common/vp9_pred_common.c", + "vp9/common/vp9_quant_common.c", + "vp9/common/vp9_reconinter.c", + "vp9/common/vp9_reconintra.c", + "vp9/common/vp9_rtcd.c", + "vp9/common/vp9_scale.c", + "vp9/common/vp9_scan.c", + "vp9/common/vp9_seg_common.c", + "vp9/common/vp9_thread_common.c", + "vp9/common/vp9_tile_common.c", + + "vp9/decoder/vp9_decodeframe.c", + "vp9/decoder/vp9_decodemv.c", + "vp9/decoder/vp9_decoder.c", + "vp9/decoder/vp9_detokenize.c", + "vp9/decoder/vp9_dsubexp.c", + "vp9/decoder/vp9_dthread.c", + + + "vpx/src/vpx_codec.c", + "vpx/src/vpx_decoder.c", + "vpx/src/vpx_image.c", + "vpx/src/vpx_psnr.c", + + + "vpx_dsp/bitreader.c", + "vpx_dsp/bitreader_buffer.c", + "vpx_dsp/intrapred.c", + "vpx_dsp/inv_txfm.c", + "vpx_dsp/loopfilter.c", + "vpx_dsp/prob.c", + "vpx_dsp/vpx_convolve.c", + "vpx_dsp/vpx_dsp_rtcd.c", + + + "vpx_mem/vpx_mem.c", + + + "vpx_scale/vpx_scale_rtcd.c", + + "vpx_scale/generic/yv12config.c", + "vpx_scale/generic/yv12extend.c", + + + "vpx_util/vpx_thread.c" +] + +libvpx_sources_intrin_x86 = [ + "vp8/common/x86/filter_x86.c", + "vp8/common/x86/loopfilter_x86.c", + "vp8/common/x86/vp8_asm_stubs.c", + + + "vpx_dsp/x86/vpx_asm_stubs.c" +] +libvpx_sources_intrin_x86_mmx = [ + "vp8/common/x86/idct_blk_mmx.c", +] +libvpx_sources_intrin_x86_sse2 = [ + "vp8/common/x86/idct_blk_sse2.c", + + + "vp9/common/x86/vp9_idct_intrin_sse2.c", + + + "vpx_dsp/x86/inv_txfm_sse2.c", + "vpx_dsp/x86/loopfilter_sse2.c", +] +libvpx_sources_intrin_x86_ssse3 = [ + "vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c" +] +libvpx_sources_intrin_x86_avx2 = [ + "vpx_dsp/x86/loopfilter_avx2.c", + "vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c" +] +libvpx_sources_x86asm = [ + "vp8/common/x86/copy_sse2.asm", + "vp8/common/x86/copy_sse3.asm", + "vp8/common/x86/dequantize_mmx.asm", + "vp8/common/x86/idctllm_mmx.asm", + "vp8/common/x86/idctllm_sse2.asm", + "vp8/common/x86/iwalsh_mmx.asm", + "vp8/common/x86/iwalsh_sse2.asm", + "vp8/common/x86/loopfilter_sse2.asm", + "vp8/common/x86/recon_mmx.asm", + "vp8/common/x86/recon_sse2.asm", + "vp8/common/x86/subpixel_mmx.asm", + "vp8/common/x86/subpixel_sse2.asm", + "vp8/common/x86/subpixel_ssse3.asm", + "vp8/common/x86/vp8_loopfilter_mmx.asm", + + + "vpx_dsp/x86/intrapred_sse2.asm", + "vpx_dsp/x86/intrapred_ssse3.asm", + "vpx_dsp/x86/inv_wht_sse2.asm", + "vpx_dsp/x86/vpx_convolve_copy_sse2.asm", + "vpx_dsp/x86/vpx_subpixel_8t_sse2.asm", + "vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm", + "vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm", + "vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm", + + + "vpx_ports/emms.asm" +] +libvpx_sources_x86_64asm = [ + "vp8/common/x86/loopfilter_block_sse2_x86_64.asm", + + + "vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm" +] + +libvpx_sources_arm = [ + "vpx_ports/arm_cpudetect.c", + + + "vp8/common/arm/loopfilter_arm.c", +] +libvpx_sources_arm_neon = [ + "vp8/common/arm/neon/bilinearpredict_neon.c", + "vp8/common/arm/neon/copymem_neon.c", + "vp8/common/arm/neon/dc_only_idct_add_neon.c", + "vp8/common/arm/neon/dequant_idct_neon.c", + "vp8/common/arm/neon/dequantizeb_neon.c", + "vp8/common/arm/neon/idct_blk_neon.c", + "vp8/common/arm/neon/idct_dequant_0_2x_neon.c", + "vp8/common/arm/neon/idct_dequant_full_2x_neon.c", + "vp8/common/arm/neon/iwalsh_neon.c", + "vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c", + "vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c", + "vp8/common/arm/neon/mbloopfilter_neon.c", + "vp8/common/arm/neon/shortidct4x4llm_neon.c", + "vp8/common/arm/neon/sixtappredict_neon.c", + "vp8/common/arm/neon/vp8_loopfilter_neon.c", + + + "vp9/common/arm/neon/vp9_iht4x4_add_neon.c", + "vp9/common/arm/neon/vp9_iht8x8_add_neon.c", + + + "vpx_dsp/arm/idct16x16_1_add_neon.c", + "vpx_dsp/arm/idct16x16_add_neon.c", + "vpx_dsp/arm/idct16x16_neon.c", + "vpx_dsp/arm/idct32x32_1_add_neon.c", + "vpx_dsp/arm/idct32x32_add_neon.c", + "vpx_dsp/arm/idct4x4_1_add_neon.c", + "vpx_dsp/arm/idct4x4_add_neon.c", + "vpx_dsp/arm/idct8x8_1_add_neon.c", + "vpx_dsp/arm/idct8x8_add_neon.c", + "vpx_dsp/arm/intrapred_neon.c", + "vpx_dsp/arm/loopfilter_16_neon.c", + "vpx_dsp/arm/loopfilter_4_neon.c", + "vpx_dsp/arm/loopfilter_8_neon.c", + "vpx_dsp/arm/loopfilter_neon.c", + "vpx_dsp/arm/vpx_convolve8_avg_neon.c", + "vpx_dsp/arm/vpx_convolve8_neon.c", + "vpx_dsp/arm/vpx_convolve_avg_neon.c", + "vpx_dsp/arm/vpx_convolve_copy_neon.c", + "vpx_dsp/arm/vpx_convolve_neon.c" +] +libvpx_sources_arm_neon_gas = [ + "vpx_dsp/arm/gas/intrapred_neon_asm.s", + "vpx_dsp/arm/gas/loopfilter_mb_neon.s", + "vpx_dsp/arm/gas/save_reg_neon.s" +] +libvpx_sources_arm_neon_armasm_ms = [ + "vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm", + "vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm", + "vpx_dsp/arm/armasm_ms/save_reg_neon.asm" +] +libvpx_sources_arm_neon_gas_apple = [ + "vpx_dsp/arm/gas_apple/intrapred_neon_asm.s", + "vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s", + "vpx_dsp/arm/gas_apple/save_reg_neon.s" +] + +libvpx_sources = [libvpx_dir + file for file in libvpx_sources] +libvpx_sources_intrin_x86 = [libvpx_dir + file for file in libvpx_sources_intrin_x86] +libvpx_sources_intrin_x86_mmx = [libvpx_dir + file for file in libvpx_sources_intrin_x86_mmx] +libvpx_sources_intrin_x86_sse2 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_sse2] +libvpx_sources_intrin_x86_ssse3 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_ssse3] +libvpx_sources_intrin_x86_avx2 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_avx2] +libvpx_sources_x86asm = [libvpx_dir + file for file in libvpx_sources_x86asm] +libvpx_sources_x86_64asm = [libvpx_dir + file for file in libvpx_sources_x86_64asm] +libvpx_sources_arm = [libvpx_dir + file for file in libvpx_sources_arm] +libvpx_sources_arm_neon = [libvpx_dir + file for file in libvpx_sources_arm_neon] +libvpx_sources_arm_neon_gas = [libvpx_dir + file for file in libvpx_sources_arm_neon_gas] +libvpx_sources_arm_neon_armasm_ms = [libvpx_dir + file for file in libvpx_sources_arm_neon_armasm_ms] +libvpx_sources_arm_neon_gas_apple = [libvpx_dir + file for file in libvpx_sources_arm_neon_gas_apple] + + +Import('env') +Import('env_webm') + +env_webm.Append(CPPPATH = [libvpx_dir]) + +env_libvpx = env.Clone() +env_libvpx.Append(CPPPATH = [libvpx_dir]) + +cpu_bits = env["bits"] +osx_fat = (env["platform"] == 'osx' and cpu_bits == 'fat') +webm_cpu_x86 = False +webm_cpu_arm = False +if env["platform"] == 'winrt': + if 'arm' in env["PROGSUFFIX"]: + webm_cpu_arm = True + else: + webm_cpu_x86 = True +else: + is_android_x86 = (env["platform"] == 'android' and env["android_arch"] == 'x86') + if is_android_x86: + cpu_bits = '32' + if osx_fat: + webm_cpu_x86 = True + else: + webm_cpu_x86 = (cpu_bits == '32' or cpu_bits == '64') and (env["platform"] == 'windows' or env["platform"] == 'x11' or env["platform"] == 'osx' or env["platform"] == 'haiku' or is_android_x86) + webm_cpu_arm = env["platform"] == 'iphone' or env["platform"] == 'bb10' or (env["platform"] == 'android' and env["android_arch"] != 'x86') + +if webm_cpu_x86: + import subprocess + import os + + yasm_paths = [ + "yasm", + "../../../yasm", + ] + + yasm_found = False + + devnull = open(os.devnull) + for yasm_path in yasm_paths: + try: + yasm_found = True + subprocess.Popen([yasm_path, "--version"], stdout=devnull, stderr=devnull).communicate() + except: + yasm_found = False + if yasm_found: + break + + if not yasm_found: + webm_cpu_x86 = False + print "YASM is necessary for WebM SIMD optimizations." + +webm_simd_optimizations = False + +if webm_cpu_x86: + if osx_fat: + #'osx' platform only: run python script which will compile using 'yasm' command and then merge 32-bit and 64-bit using 'lipo' command + env_libvpx["AS"] = 'python modules/webm/libvpx/yasm_osx_fat.py' + env_libvpx["ASFLAGS"] = '-I' + libvpx_dir[1:] + env_libvpx["ASCOM"] = '$AS $ASFLAGS $TARGET $SOURCES' + else: + if env["platform"] == 'windows' or env["platform"] == 'winrt': + env_libvpx["ASFORMAT"] = 'win' + elif env["platform"] == 'osx': + env_libvpx["ASFORMAT"] = 'macho' + else: + env_libvpx["ASFORMAT"] = 'elf' + env_libvpx["ASFORMAT"] += cpu_bits + + env_libvpx["AS"] = 'yasm' + env_libvpx["ASFLAGS"] = '-I' + libvpx_dir[1:] + ' -f $ASFORMAT -D $ASCPU' + env_libvpx["ASCOM"] = '$AS $ASFLAGS -o $TARGET $SOURCES' + + if cpu_bits == '32': + env_libvpx["ASCPU"] = 'X86_32' + elif cpu_bits == '64': + env_libvpx["ASCPU"] = 'X86_64' + + env_libvpx.Append(CCFLAGS=['-DWEBM_X86ASM']) + + webm_simd_optimizations = True + +if webm_cpu_arm: + if env["platform"] == 'iphone': + env_libvpx["ASFLAGS"] = '-arch armv7' + elif env["platform"] == 'android': + env_libvpx["ASFLAGS"] = '-mfpu=neon' + elif env["platform"] == 'winrt': + env_libvpx["AS"] = 'armasm' + env_libvpx["ASFLAGS"] = '' + env_libvpx["ASCOM"] = '$AS $ASFLAGS -o $TARGET $SOURCES' + + env_libvpx.Append(CCFLAGS=['-DWEBM_ARMASM']) + + webm_simd_optimizations = True + +if webm_simd_optimizations == False: + print "WebM SIMD optimizations are disabled. Check if your CPU architecture, CPU bits or platform are supported!" + + +env_libvpx.add_source_files(env.modules_sources, libvpx_sources) +if webm_cpu_x86: + is_clang_or_gcc = ('gcc' in env["CC"]) or ('clang' in env["CC"]) + + env_libvpx_mmx = env_libvpx.Clone() + if cpu_bits == '32' and is_clang_or_gcc: + env_libvpx_mmx.Append(CCFLAGS=['-mmmx']) + env_libvpx_mmx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_mmx) + + env_libvpx_sse2 = env_libvpx.Clone() + if cpu_bits == '32' and is_clang_or_gcc: + env_libvpx_sse2.Append(CCFLAGS=['-msse2']) + env_libvpx_sse2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_sse2) + + env_libvpx_ssse3 = env_libvpx.Clone() + if is_clang_or_gcc: + env_libvpx_ssse3.Append(CCFLAGS=['-mssse3']) + env_libvpx_ssse3.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_ssse3) + + env_libvpx_avx2 = env_libvpx.Clone() + if is_clang_or_gcc: + env_libvpx_avx2.Append(CCFLAGS=['-mavx2']) + env_libvpx_avx2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_avx2) + + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86) + + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86asm) + if cpu_bits == '64' or osx_fat: + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86_64asm) +elif webm_cpu_arm: + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm) + + env_libvpx_neon = env_libvpx.Clone() + if env["platform"] == 'android' and env["android_arch"] == 'armv6': + env_libvpx_neon.Append(CCFLAGS=['-mfpu=neon']) + env_libvpx_neon.add_source_files(env.modules_sources, libvpx_sources_arm_neon) + + if env["platform"] == 'winrt': + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_armasm_ms) + elif env["platform"] == 'iphone': + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas_apple) + else: + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas) diff --git a/modules/webm/libvpx/yasm_osx_fat.py b/modules/webm/libvpx/yasm_osx_fat.py new file mode 100644 index 00000000000..bcee50b3891 --- /dev/null +++ b/modules/webm/libvpx/yasm_osx_fat.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +import sys +import os + +includes = sys.argv[1] +output_file = sys.argv[2] +input_file = sys.argv[3] + +can_remove = {} + +lipo_command = '' + +exit_code = 1 + +for arch in ['32', '64']: + if arch == '32' and input_file.endswith('x86_64.asm'): + can_remove[arch] = False + else: + command = 'yasm ' + includes + ' -f macho' + arch + ' -D X86_' + arch + ' -o ' + output_file + '.' + arch + ' ' + input_file + print(command) + if os.system(command) == 0: + lipo_command += output_file + '.' + arch + ' ' + can_remove[arch] = True + else: + can_remove[arch] = False + +if lipo_command != '': + lipo_command = 'lipo -create ' + lipo_command + '-output ' + output_file + print(lipo_command) + if os.system(lipo_command) == 0: + exit_code = 0 + +for arch in ['32', '64']: + if can_remove[arch]: + os.remove(output_file + '.' + arch) + +sys.exit(exit_code) diff --git a/modules/webm/register_types.cpp b/modules/webm/register_types.cpp new file mode 100644 index 00000000000..07031caad1d --- /dev/null +++ b/modules/webm/register_types.cpp @@ -0,0 +1,45 @@ +/*************************************************************************/ +/* register_types.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +#include "register_types.h" + +#include "video_stream_webm.h" + +static ResourceFormatLoaderVideoStreamWebm *webm_stream_loader = NULL; + +void register_webm_types() { + + webm_stream_loader = memnew(ResourceFormatLoaderVideoStreamWebm); + ResourceLoader::add_resource_format_loader(webm_stream_loader); + ObjectTypeDB::register_type(); +} + +void unregister_webm_types() { + + memdelete(webm_stream_loader); +} diff --git a/modules/webm/register_types.h b/modules/webm/register_types.h new file mode 100644 index 00000000000..62f08646a42 --- /dev/null +++ b/modules/webm/register_types.h @@ -0,0 +1,30 @@ +/*************************************************************************/ +/* register_types.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +void register_webm_types(); +void unregister_webm_types(); diff --git a/modules/webm/video_stream_webm.cpp b/modules/webm/video_stream_webm.cpp new file mode 100644 index 00000000000..dc2266804da --- /dev/null +++ b/modules/webm/video_stream_webm.cpp @@ -0,0 +1,446 @@ +/*************************************************************************/ +/* av_stream_webm.cpp.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +#include "video_stream_webm.h" + +#include "VPXDecoder.hpp" +#include "OpusVorbisDecoder.hpp" + +#include "mkvparser/mkvparser.h" +#include "../theora/yuv2rgb.h" + +#include "os/file_access.h" +#include "globals.h" + +#include + +class MkvReader : public mkvparser::IMkvReader { + +public: + MkvReader(const String &p_file) { + + file = FileAccess::open(p_file, FileAccess::READ); + ERR_FAIL_COND(!file); + } + ~MkvReader() { + + if (file) + memdelete(file); + } + + virtual int Read(long long pos, long len, unsigned char *buf) { + + if (file) { + + if (file->get_pos() != (size_t)pos) + file->seek(pos); + if (file->get_buffer(buf, len) == len) + return 0; + } + return -1; + } + + virtual int Length(long long *total, long long *available) { + + if (file) { + + const size_t len = file->get_len(); + if (total) + *total = len; + if (available) + *available = len; + return 0; + } + return -1; + } + +private: + FileAccess *file; +}; + +/**/ + +VideoStreamPlaybackWebm::VideoStreamPlaybackWebm() : + audio_track(0), + webm(NULL), + video(NULL), + audio(NULL), + video_frames(NULL), audio_frame(NULL), + video_frames_pos(0), video_frames_capacity(0), + num_decoded_samples(0), samples_offset(-1), + mix_callback(NULL), + mix_udata(NULL), + playing(false), paused(false), + delay_compensation(0.0), + time(0.0), video_frame_delay(0.0), video_pos(0.0), + texture(memnew(ImageTexture)), + pcm(NULL) +{} +VideoStreamPlaybackWebm::~VideoStreamPlaybackWebm() { + + delete_pointers(); +} + +bool VideoStreamPlaybackWebm::open_file(const String &p_file) { + + file_name = p_file; + webm = memnew(WebMDemuxer(new MkvReader(file_name), 0, audio_track)); + if (webm->isOpen()) { + + video = memnew(VPXDecoder(*webm, 8)); //TODO: Detect CPU threads + if (video->isOpen()) { + + audio = memnew(OpusVorbisDecoder(*webm)); + if (audio->isOpen()) { + + audio_frame = memnew(WebMFrame); + pcm = (int16_t *)memalloc(sizeof(int16_t) * audio->getBufferSamples() * webm->getChannels()); + } else { + + memdelete(audio); + audio = NULL; + } + + frame_data.resize((webm->getWidth() * webm->getHeight()) << 2); + texture->create(webm->getWidth(), webm->getHeight(), Image::FORMAT_RGBA, Texture::FLAG_FILTER | Texture::FLAG_VIDEO_SURFACE); + + return true; + } + memdelete(video); + video = NULL; + } + memdelete(webm); + webm = NULL; + return false; +} + +void VideoStreamPlaybackWebm::stop() { + + if (playing) { + + delete_pointers(); + + pcm = NULL; + + audio_frame = NULL; + video_frames = NULL; + + video = NULL; + audio = NULL; + + open_file(file_name); //Should not fail here... + + video_frames_capacity = video_frames_pos = 0; + num_decoded_samples = 0; + samples_offset = -1; + video_frame_delay = video_pos = 0.0; + } + time = 0.0; + playing = false; +} +void VideoStreamPlaybackWebm::play() { + + stop(); + + delay_compensation = Globals::get_singleton()->get("audio/video_delay_compensation_ms"); + delay_compensation /= 1000.0; + + playing = true; +} + +bool VideoStreamPlaybackWebm::is_playing() const { + + return playing; +} + +void VideoStreamPlaybackWebm::set_paused(bool p_paused) { + + paused = p_paused; +} +bool VideoStreamPlaybackWebm::is_paused(bool p_paused) const { + + return paused; +} + +void VideoStreamPlaybackWebm::set_loop(bool p_enable) { + + //Empty +} +bool VideoStreamPlaybackWebm::has_loop() const { + + return false; +} + +float VideoStreamPlaybackWebm::get_length() const { + + if (webm) + return webm->getLength(); + return 0.0f; +} + +float VideoStreamPlaybackWebm::get_pos() const { + + return video_pos; +} +void VideoStreamPlaybackWebm::seek_pos(float p_time) { + + //Not implemented +} + +void VideoStreamPlaybackWebm::set_audio_track(int p_idx) { + + audio_track = p_idx; +} + +Ref VideoStreamPlaybackWebm::get_texture() { + + return texture; +} +void VideoStreamPlaybackWebm::update(float p_delta) { + + if ((!playing || paused) || !video) + return; + + bool audio_buffer_full = false; + + if (samples_offset > -1) { + + //Mix remaining samples + const int to_read = num_decoded_samples - samples_offset; + const int mixed = mix_callback(mix_udata, pcm + samples_offset * webm->getChannels(), to_read); + if (mixed != to_read) { + + samples_offset += mixed; + audio_buffer_full = true; + } else { + + samples_offset = -1; + } + } + + const bool hasAudio = (audio && mix_callback); + while ((hasAudio && (!audio_buffer_full || !has_enough_video_frames())) || (!hasAudio && video_frames_pos == 0)) { + + if (hasAudio && !audio_buffer_full && audio_frame->isValid() && audio->getPCMS16(*audio_frame, pcm, num_decoded_samples) && num_decoded_samples > 0) { + + const int mixed = mix_callback(mix_udata, pcm, num_decoded_samples); + if (mixed != num_decoded_samples) { + + samples_offset = mixed; + audio_buffer_full = true; + } + } + + WebMFrame *video_frame; + if (video_frames_pos >= video_frames_capacity) { + + WebMFrame **video_frames_new = (WebMFrame **)memrealloc(video_frames, ++video_frames_capacity * sizeof(void *)); + ERR_FAIL_COND(!video_frames_new); //Out of memory + (video_frames = video_frames_new)[video_frames_capacity - 1] = memnew(WebMFrame); + } + video_frame = video_frames[video_frames_pos]; + + if (!webm->readFrame(video_frame, audio_frame)) //This will invalidate frames + break; //Can't demux, EOS? + + if (video_frame->isValid()) + ++video_frames_pos; + }; + + const double video_delay = video->getFramesDelay() * video_frame_delay; + + bool want_this_frame = false; + while (video_frames_pos > 0 && !want_this_frame) { + + WebMFrame *video_frame = video_frames[0]; + if (video_frame->time <= time + video_delay) { + + if (video->decode(*video_frame)) { + + VPXDecoder::IMAGE_ERROR err; + VPXDecoder::Image image; + + while ((err = video->getImage(image)) != VPXDecoder::NO_FRAME) { + + want_this_frame = (time - video_frame->time <= video_frame_delay); + + if (want_this_frame) { + + if (err == VPXDecoder::NO_ERROR && image.w == webm->getWidth() && image.h == webm->getHeight()) { + + DVector::Write w = frame_data.write(); + bool converted = false; + + if (image.chromaShiftW == 1 && image.chromaShiftH == 1) { + + yuv420_2_rgb8888(w.ptr(), image.planes[0], image.planes[2], image.planes[1], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2, 0); +// libyuv::I420ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); + converted = true; + } else if (image.chromaShiftW == 1 && image.chromaShiftH == 0) { + + yuv422_2_rgb8888(w.ptr(), image.planes[0], image.planes[2], image.planes[1], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2, 0); +// libyuv::I422ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); + converted = true; + } else if (image.chromaShiftW == 0 && image.chromaShiftH == 0) { + + yuv444_2_rgb8888(w.ptr(), image.planes[0], image.planes[2], image.planes[1], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2, 0); +// libyuv::I444ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); + converted = true; + } else if (image.chromaShiftW == 2 && image.chromaShiftH == 0) { + +// libyuv::I411ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); +// converted = true; + } + + if (converted) + texture->set_data(Image(image.w, image.h, 0, Image::FORMAT_RGBA, frame_data)); //Zero copy send to visual server + } + + break; + } + } + } + + video_frame_delay = video_frame->time - video_pos; + video_pos = video_frame->time; + + memmove(video_frames, video_frames + 1, (--video_frames_pos) * sizeof(void *)); + video_frames[video_frames_pos] = video_frame; + } else { + + break; + } + } + + time += p_delta; + + if (video_frames_pos == 0 && webm->isEOS()) + stop(); +} + +void VideoStreamPlaybackWebm::set_mix_callback(VideoStreamPlayback::AudioMixCallback p_callback, void *p_userdata) { + + mix_callback = p_callback; + mix_udata = p_userdata; +} +int VideoStreamPlaybackWebm::get_channels() const { + + if (audio) + return webm->getChannels(); + return 0; +} +int VideoStreamPlaybackWebm::get_mix_rate() const { + + if (audio) + return webm->getSampleRate(); + return 0; +} + +inline bool VideoStreamPlaybackWebm::has_enough_video_frames() const +{ + if (video_frames_pos > 0) { + + const double audio_delay = AudioServer::get_singleton()->get_output_delay(); + const double video_time = video_frames[video_frames_pos - 1]->time; + return video_time >= time + audio_delay + delay_compensation; + } + return false; +} + +void VideoStreamPlaybackWebm::delete_pointers() { + + if (pcm) + memfree(pcm); + + if (audio_frame) + memdelete(audio_frame); + for (int i = 0; i < video_frames_capacity; ++i) + memdelete(video_frames[i]); + if (video_frames) + memfree(video_frames); + + if (video) + memdelete(video); + if (audio) + memdelete(audio); + + if (webm) + memdelete(webm); +} + +/**/ + +RES ResourceFormatLoaderVideoStreamWebm::load(const String &p_path, const String &p_original_path, Error *r_error) { + + Ref stream = memnew(VideoStreamWebm); + stream->set_file(p_path); + if (r_error) + *r_error = OK; + return stream; +} + +void ResourceFormatLoaderVideoStreamWebm::get_recognized_extensions(List *p_extensions) const { + + p_extensions->push_back("webm"); +} +bool ResourceFormatLoaderVideoStreamWebm::handles_type(const String &p_type) const { + + return (p_type == "VideoStream" || p_type == "VideoStreamWebm"); +} + +String ResourceFormatLoaderVideoStreamWebm::get_resource_type(const String &p_path) const { + + const String exl = p_path.extension().to_lower(); + if (exl == "webm") + return "VideoStreamWebm"; + return ""; +} + +/**/ + +VideoStreamWebm::VideoStreamWebm() : + audio_track(0) +{} + +Ref VideoStreamWebm::instance_playback() { + + Ref pb = memnew(VideoStreamPlaybackWebm); + pb->set_audio_track(audio_track); + if (pb->open_file(file)) + return pb; + return NULL; +} + +void VideoStreamWebm::set_file(const String &p_file) { + + file = p_file; +} +void VideoStreamWebm::set_audio_track(int p_track) { + + audio_track = p_track; +} diff --git a/modules/webm/video_stream_webm.h b/modules/webm/video_stream_webm.h new file mode 100644 index 00000000000..d1e02d06633 --- /dev/null +++ b/modules/webm/video_stream_webm.h @@ -0,0 +1,128 @@ +/*************************************************************************/ +/* av_stream_webm.cpp.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +#include "scene/resources/video_stream.h" +#include "io/resource_loader.h" + +class WebMFrame; +class WebMDemuxer; +class VPXDecoder; +class OpusVorbisDecoder; + +class VideoStreamPlaybackWebm : public VideoStreamPlayback { + + OBJ_TYPE(VideoStreamPlaybackWebm, VideoStreamPlayback) + + String file_name; + int audio_track; + + WebMDemuxer *webm; + VPXDecoder *video; + OpusVorbisDecoder *audio; + + WebMFrame **video_frames, *audio_frame; + int video_frames_pos, video_frames_capacity; + + int num_decoded_samples, samples_offset; + AudioMixCallback mix_callback; + void *mix_udata; + + bool playing, paused; + double delay_compensation; + double time, video_frame_delay, video_pos; + + DVector frame_data; + Ref texture; + + int16_t *pcm; + +public: + VideoStreamPlaybackWebm(); + ~VideoStreamPlaybackWebm(); + + bool open_file(const String &p_file); + + virtual void stop(); + virtual void play(); + + virtual bool is_playing() const; + + virtual void set_paused(bool p_paused); + virtual bool is_paused(bool p_paused) const; + + virtual void set_loop(bool p_enable); + virtual bool has_loop() const; + + virtual float get_length() const; + + virtual float get_pos() const; + virtual void seek_pos(float p_time); + + virtual void set_audio_track(int p_idx); + + virtual Ref get_texture(); + virtual void update(float p_delta); + + virtual void set_mix_callback(AudioMixCallback p_callback, void *p_userdata); + virtual int get_channels() const; + virtual int get_mix_rate() const; + +private: + inline bool has_enough_video_frames() const; + + void delete_pointers(); +}; + +/**/ + +class VideoStreamWebm : public VideoStream { + + OBJ_TYPE(VideoStreamWebm, VideoStream) + + String file; + int audio_track; + +public: + VideoStreamWebm(); + + virtual Ref instance_playback(); + + virtual void set_file(const String &p_file); + virtual void set_audio_track(int p_track); +}; + +/**/ + +class ResourceFormatLoaderVideoStreamWebm : public ResourceFormatLoader { + +public: + virtual RES load(const String &p_path, const String &p_original_path, Error *r_error); + virtual void get_recognized_extensions(List *p_extensions) const; + virtual bool handles_type(const String &p_type) const; + virtual String get_resource_type(const String &p_path) const; +}; diff --git a/platform/x11/detect.py b/platform/x11/detect.py index eb71ac74094..554a461d1bb 100644 --- a/platform/x11/detect.py +++ b/platform/x11/detect.py @@ -164,6 +164,9 @@ def configure(env): env["libvorbis"] = "system" # Needed to link against system libtheora env.ParseConfig('pkg-config theora theoradec --cflags --libs') + if (env["libvpx"] == "system"): + env.ParseConfig('pkg-config vpx --cflags --libs') + if (env["libvorbis"] == "system"): env["libogg"] = "system" # Needed to link against system libvorbis env.ParseConfig('pkg-config vorbis vorbisfile --cflags --libs') From 53d8f2b1ec1d86b189800b7fe156c464fdf9e380 Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Thu, 27 Oct 2016 11:50:26 -0300 Subject: [PATCH 016/223] PBR more or less working, still working on bringing gizmos back --- doc/base/classes.xml | 4 +- drivers/gles3/rasterizer_scene_gles3.cpp | 711 ++++++++++++++-- drivers/gles3/rasterizer_scene_gles3.h | 138 +++- drivers/gles3/rasterizer_storage_gles3.cpp | 224 ++++- drivers/gles3/rasterizer_storage_gles3.h | 26 +- drivers/gles3/shader_compiler_gles3.cpp | 81 +- drivers/gles3/shader_gles3.cpp | 1 + drivers/gles3/shader_gles3.h | 1 + drivers/gles3/shaders/cubemap_filter.glsl | 59 +- drivers/gles3/shaders/scene.glsl | 284 ++++++- modules/gridmap/grid_map.cpp | 4 +- platform/iphone/rasterizer_iphone.cpp | 16 +- platform/iphone/rasterizer_iphone.h | 20 +- scene/3d/light.cpp | 150 +++- scene/3d/light.h | 41 +- scene/3d/particles.cpp | 10 +- scene/main/scene_main_loop.cpp | 52 +- scene/main/scene_main_loop.h | 12 + scene/main/viewport.cpp | 2 +- scene/register_scene_types.cpp | 11 +- scene/resources/environment.cpp | 27 +- scene/resources/environment.h | 6 +- scene/resources/material.cpp | 768 +++++++++++++++++- scene/resources/material.h | 291 ++++++- servers/visual/rasterizer.cpp | 98 +-- servers/visual/rasterizer.h | 60 +- servers/visual/shader_language.cpp | 42 +- servers/visual/shader_types.cpp | 16 +- servers/visual/visual_server_raster.cpp | 20 +- servers/visual/visual_server_raster.h | 4 +- servers/visual/visual_server_scene.cpp | 63 +- servers/visual_server.cpp | 7 - servers/visual_server.h | 19 +- .../io_plugins/editor_import_collada.cpp | 26 +- .../io_plugins/editor_scene_import_plugin.cpp | 30 +- .../editor_scene_importer_fbxconv.cpp | 24 +- tools/editor/plugins/baked_light_baker.cpp | 10 +- .../collision_polygon_editor_plugin.cpp | 20 +- .../plugins/collision_polygon_editor_plugin.h | 4 +- tools/editor/plugins/path_editor_plugin.cpp | 12 +- tools/editor/plugins/path_editor_plugin.h | 4 +- .../editor/plugins/spatial_editor_plugin.cpp | 56 +- tools/editor/plugins/spatial_editor_plugin.h | 8 +- tools/editor/spatial_editor_gizmos.cpp | 98 +-- tools/editor/spatial_editor_gizmos.h | 48 +- 45 files changed, 3021 insertions(+), 587 deletions(-) diff --git a/doc/base/classes.xml b/doc/base/classes.xml index 9453cdc77d3..df1d0262ac0 100644 --- a/doc/base/classes.xml +++ b/doc/base/classes.xml @@ -13434,12 +13434,12 @@
- + Simple Material with a fixed parameter set. - FixedMaterial is a simple type of material [Resource], which contains a fixed amount of parameters. It is the only type of material supported in fixed-pipeline devices and APIs. It is also an often a better alternative to [ShaderMaterial] for most simple use cases. + FixedSpatialMaterial is a simple type of material [Resource], which contains a fixed amount of parameters. It is the only type of material supported in fixed-pipeline devices and APIs. It is also an often a better alternative to [ShaderMaterial] for most simple use cases. diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index 4ce2bd2f37e..aadf9e63360 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -75,7 +75,7 @@ void RasterizerSceneGLES3::environment_set_background(RID p_env,VS::EnvironmentB env->bg_mode=p_bg; } -void RasterizerSceneGLES3::environment_set_skybox(RID p_env, RID p_skybox, int p_radiance_size, int p_irradiance_size){ +void RasterizerSceneGLES3::environment_set_skybox(RID p_env, RID p_skybox, int p_radiance_size){ Environment *env=environment_owner.getornull(p_env); ERR_FAIL_COND(!env); @@ -87,15 +87,12 @@ void RasterizerSceneGLES3::environment_set_skybox(RID p_env, RID p_skybox, int p storage->free(env->skybox_radiance); env->skybox_radiance=RID(); } - if (env->skybox_irradiance.is_valid()) { - storage->free(env->skybox_irradiance); - env->skybox_irradiance=RID(); - } + if (p_skybox.is_valid()) { env->skybox_color=p_skybox; - // env->skybox_radiance=storage->texture_create_pbr_cubemap(p_skybox,VS::PBR_CUBEMAP_RADIANCE,p_radiance_size); + env->skybox_radiance=storage->texture_create_radiance_cubemap(p_skybox,p_radiance_size); //env->skybox_irradiance=storage->texture_create_pbr_cubemap(p_skybox,VS::PBR_CUBEMAP_IRRADIANCE,p_irradiance_size); } @@ -123,7 +120,7 @@ void RasterizerSceneGLES3::environment_set_bg_energy(RID p_env,float p_energy) { Environment *env=environment_owner.getornull(p_env); ERR_FAIL_COND(!env); - env->energy=p_energy; + env->bg_energy=p_energy; } @@ -135,14 +132,14 @@ void RasterizerSceneGLES3::environment_set_canvas_max_layer(RID p_env,int p_max_ env->canvas_max_layer=p_max_layer; } -void RasterizerSceneGLES3::environment_set_ambient_light(RID p_env, const Color& p_color, float p_energy, float p_skybox_energy){ +void RasterizerSceneGLES3::environment_set_ambient_light(RID p_env, const Color& p_color, float p_energy, float p_skybox_contribution){ Environment *env=environment_owner.getornull(p_env); ERR_FAIL_COND(!env); env->ambient_color=p_color; - env->ambient_anergy=p_energy; - env->skybox_ambient=p_skybox_energy; + env->ambient_energy=p_energy; + env->ambient_skybox_contribution=p_skybox_contribution; } @@ -174,13 +171,28 @@ void RasterizerSceneGLES3::environment_set_color_correction(RID p_env,bool p_ena RID RasterizerSceneGLES3::light_instance_create(RID p_light) { + LightInstance *light_instance = memnew( LightInstance ); - return RID(); + light_instance->light=p_light; + light_instance->light_ptr=storage->light_owner.getornull(p_light); + + glGenBuffers(1, &light_instance->light_ubo); + glBindBuffer(GL_UNIFORM_BUFFER, light_instance->light_ubo); + glBufferData(GL_UNIFORM_BUFFER, sizeof(LightInstance::LightDataUBO), NULL, GL_DYNAMIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + + ERR_FAIL_COND_V(!light_instance->light_ptr,RID()); + + return light_instance_owner.make_rid(light_instance); } void RasterizerSceneGLES3::light_instance_set_transform(RID p_light_instance,const Transform& p_transform){ + LightInstance *light_instance = light_instance_owner.getornull(p_light_instance); + ERR_FAIL_COND(!light_instance); + light_instance->transform=p_transform; } @@ -247,11 +259,13 @@ bool RasterizerSceneGLES3::_setup_material(RasterizerStorageGLES3::Material* p_m //material parameters + state.scene_shader.set_custom_shader(p_material->shader->custom_code_id); bool rebind = state.scene_shader.bind(); if (p_material->ubo_id) { + glBindBufferBase(GL_UNIFORM_BUFFER,1,p_material->ubo_id); } @@ -267,6 +281,7 @@ bool RasterizerSceneGLES3::_setup_material(RasterizerStorageGLES3::Material* p_m RasterizerStorageGLES3::Texture *t = storage->texture_owner.getornull( textures[i] ); if (!t) { //check hints + glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); continue; } @@ -328,7 +343,14 @@ void RasterizerSceneGLES3::_render_geometry(RenderList::Element *e) { } -void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements,int p_element_count,const Transform& p_view_transform,const CameraMatrix& p_projection,bool p_reverse_cull,bool p_alpha_pass) { +void RasterizerSceneGLES3::_setup_light(LightInstance *p_light) { + + + glBindBufferBase(GL_UNIFORM_BUFFER,3,p_light->light_ubo); //bind light uniform +} + + +void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements,int p_element_count,const Transform& p_view_transform,const CameraMatrix& p_projection,RasterizerStorageGLES3::Texture* p_base_env,bool p_reverse_cull,bool p_alpha_pass) { if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) { //p_reverse_cull=!p_reverse_cull; @@ -337,8 +359,22 @@ void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements,int p_e glFrontFace(GL_CW); } + bool shadow=false; + glBindBufferBase(GL_UNIFORM_BUFFER,0,state.scene_ubo); //bind globals ubo + + glBindBufferBase(GL_UNIFORM_BUFFER,2,state.env_radiance_ubo); //bind environment radiance info + glActiveTexture(GL_TEXTURE0+storage->config.max_texture_image_units-1); + glBindTexture(GL_TEXTURE_2D,state.brdf_texture); + + if (p_base_env) { + glActiveTexture(GL_TEXTURE0+storage->config.max_texture_image_units-2); + glBindTexture(p_base_env->target,p_base_env->tex_id); + state.scene_shader.set_conditional(SceneShaderGLES3::USE_RADIANCE_CUBEMAP,true); + } + + state.scene_shader.set_conditional(SceneShaderGLES3::USE_SKELETON,false); state.current_blend_mode=-1; @@ -349,6 +385,11 @@ void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements,int p_e RasterizerStorageGLES3::Geometry* prev_geometry=NULL; VS::InstanceType prev_base_type = VS::INSTANCE_MAX; + int prev_light_type=-1; + int prev_light_index=-1; + int prev_blend=-1; + int current_blend_mode=-1; + for (int i=0;isort_key>>RenderList::SORT_KEY_LIGHT_TYPE_SHIFT)&0xF; + int light_index=(e->sort_key>>RenderList::SORT_KEY_LIGHT_INDEX_SHIFT)&0xFFFF; + + bool additive=false; + + if (!shadow) { +#if 0 + if (texscreen_used && !texscreen_copied && material->shader_cache && material->shader_cache->valid && material->shader_cache->has_texscreen) { + texscreen_copied=true; + _copy_to_texscreen(); + + //force reset state + prev_material=NULL; + prev_light=0x777E; + prev_geometry_cmp=NULL; + prev_light_type=0xEF; + prev_skeleton =NULL; + prev_sort_flags=0xFF; + prev_morph_values=NULL; + prev_receive_shadows_state=-1; + glEnable(GL_BLEND); + glDepthMask(GL_TRUE); + glEnable(GL_DEPTH_TEST); + glDisable(GL_SCISSOR_TEST); + + } +#endif + if (light_type!=prev_light_type /* || receive_shadows_state!=prev_receive_shadows_state*/) { + + if (material->shader->spatial.unshaded/* || current_debug==VS::SCENARIO_DEBUG_SHADELESS*/) { + state.scene_shader.set_conditional(SceneShaderGLES3::USE_FORWARD_LIGHTING,false); + state.scene_shader.set_conditional(SceneShaderGLES3::USE_FORWARD_DIRECTIONAL,false); + state.scene_shader.set_conditional(SceneShaderGLES3::USE_FORWARD_OMNI,false); + state.scene_shader.set_conditional(SceneShaderGLES3::USE_FORWARD_SPOT,false); + state.scene_shader.set_conditional(SceneShaderGLES3::SHADELESS,true); + + //state.scene_shader.set_conditional(SceneShaderGLES3::SHADELESS,true); + } else { + state.scene_shader.set_conditional(SceneShaderGLES3::SHADELESS,false); + state.scene_shader.set_conditional(SceneShaderGLES3::USE_FORWARD_LIGHTING,light_type!=0xF); + state.scene_shader.set_conditional(SceneShaderGLES3::USE_FORWARD_DIRECTIONAL,light_type==VS::LIGHT_DIRECTIONAL); + state.scene_shader.set_conditional(SceneShaderGLES3::USE_FORWARD_OMNI,light_type==VS::LIGHT_OMNI); + state.scene_shader.set_conditional(SceneShaderGLES3::USE_FORWARD_SPOT,light_type==VS::LIGHT_SPOT); + /* + if (receive_shadows_state==1) { + state.scene_shader.set_conditional(SceneShaderGLES3::LIGHT_USE_SHADOW,(light_type&0x8)); + state.scene_shader.set_conditional(SceneShaderGLES3::LIGHT_USE_PSSM,(light_type&0x10)); + state.scene_shader.set_conditional(SceneShaderGLES3::LIGHT_USE_PSSM4,(light_type&0x20)); + } + else { + state.scene_shader.set_conditional(SceneShaderGLES3::LIGHT_USE_SHADOW,false); + state.scene_shader.set_conditional(SceneShaderGLES3::LIGHT_USE_PSSM,false); + state.scene_shader.set_conditional(SceneShaderGLES3::LIGHT_USE_PSSM4,false); + } + state.scene_shader.set_conditional(SceneShaderGLES3::SHADELESS,false); + */ + } + + rebind=true; + } + + + if (!*e->additive_ptr) { + + additive=false; + *e->additive_ptr=true; + } else { + additive=true; + } + + bool desired_blend=false; + int desired_blend_mode=RasterizerStorageGLES3::Shader::Spatial::BLEND_MODE_MIX; + + if (additive) { + desired_blend=true; + desired_blend_mode=RasterizerStorageGLES3::Shader::Spatial::BLEND_MODE_ADD; + } else { + desired_blend=p_alpha_pass; + desired_blend_mode=material->shader->spatial.blend_mode; + } + + if (prev_blend!=desired_blend) { + + if (desired_blend) { + glEnable(GL_BLEND); + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glColorMask(1,1,1,0); + } + } else { + glDisable(GL_BLEND); + glColorMask(1,1,1,1); + } + + prev_blend=desired_blend; + } + + if (desired_blend && desired_blend_mode!=current_blend_mode) { + + + switch(desired_blend_mode) { + + case RasterizerStorageGLES3::Shader::Spatial::BLEND_MODE_MIX: { + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } + else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + + } break; + case RasterizerStorageGLES3::Shader::Spatial::BLEND_MODE_ADD: { + + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(p_alpha_pass?GL_SRC_ALPHA:GL_ONE,GL_ONE); + + } break; + case RasterizerStorageGLES3::Shader::Spatial::BLEND_MODE_SUB: { + + glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); + glBlendFunc(GL_SRC_ALPHA,GL_ONE); + } break; + case RasterizerStorageGLES3::Shader::Spatial::BLEND_MODE_MUL: { + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } + else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + + } break; + + } + + current_blend_mode=desired_blend_mode; + } + } + + if (light_index!=prev_light_index) { + if (light_index!=0xFFFF) { //not unshaded + _setup_light(light_instances[light_index]); + } + } + if (material!=prev_material || rebind) { rebind = _setup_material(material,p_alpha_pass); @@ -396,6 +582,7 @@ void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_g RasterizerStorageGLES3::Material *m=NULL; RID m_src=p_instance->material_override.is_valid() ? p_instance->material_override :(p_material>=0?p_instance->materials[p_material]:p_geometry->material); + /* #ifdef DEBUG_ENABLED if (current_debug==VS::SCENARIO_DEBUG_OVERDRAW) { @@ -404,8 +591,10 @@ void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_g #endif */ + if (m_src.is_valid()) { m=storage->material_owner.getornull( m_src ); + if (!m->shader) { m=NULL; } @@ -419,9 +608,10 @@ void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_g - //bool has_base_alpha=(m->shader_cache && m->shader_cache->has_alpha); - //bool has_blend_alpha=m->blend_mode!=VS::MATERIAL_BLEND_MODE_MIX || m->flags[VS::MATERIAL_FLAG_ONTOP]; - bool has_alpha = false; //has_base_alpha || has_blend_alpha; + bool has_base_alpha=(m->shader->spatial.uses_alpha); + bool has_blend_alpha=m->shader->spatial.blend_mode!=RasterizerStorageGLES3::Shader::Spatial::BLEND_MODE_MIX || m->shader->spatial.ontop; + bool has_alpha = has_base_alpha || has_blend_alpha; + bool shadow = false; #if 0 if (shadow) { @@ -488,12 +678,13 @@ void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_g e->additive_ptr=&e->additive; e->sort_key=0; + if (e->geometry->last_pass!=render_pass) { e->geometry->last_pass=render_pass; e->geometry->index=current_geometry_index++; } - e->sort_key|=uint64_t(e->instance->base_type)<sort_key|=uint64_t(e->geometry->index)<sort_key|=uint64_t(e->instance->base_type)<material->last_pass!=render_pass) { @@ -502,7 +693,6 @@ void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_g } e->sort_key|=uint64_t(e->material->index)<sort_key|=uint64_t(e->instance->depth_layer)<geometry->type==RasterizerStorageGLES3::Geometry::GEOMETRY_MULTISURFACE) @@ -510,42 +700,45 @@ void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_g bool mirror = e->instance->mirror; -// if (m->flags[VS::MATERIAL_FLAG_INVERT_FACES]) -// e->mirror=!e->mirror; + if (m->shader->spatial.cull_mode==RasterizerStorageGLES3::Shader::Spatial::CULL_MODE_FRONT) { + mirror=!mirror; + } if (mirror) { e->sort_key|=RenderList::SORT_KEY_MIRROR_FLAG; } //e->light_type=0xFF; // no lights! - e->sort_key|=uint64_t(0xF)<sort_key|=uint64_t(0xFFFF)<depth_draw_mode==VS::MATERIAL_DEPTH_DRAW_OPAQUE_PRE_PASS_ALPHA) { + + if (!shadow && !has_blend_alpha && has_alpha && m->shader->spatial.depth_draw_mode==RasterizerStorageGLES3::Shader::Spatial::DEPTH_DRAW_ALPHA_PREPASS) { //if nothing exists, add this element as opaque too - RenderList::Element *oe = opaque_render_list.add_element(); + RenderList::Element *oe = render_list.add_element(); if (!oe) return; - memcpy(oe,e,sizeof(RenderList::Element)); + copymem(oe,e,sizeof(RenderList::Element)); oe->additive_ptr=&oe->additive; } -*/ -#if 0 - if (shadow || m->flags[VS::MATERIAL_FLAG_UNSHADED] || current_debug==VS::SCENARIO_DEBUG_SHADELESS) { - e->light_type=0x7F; //unshaded is zero + + + if (shadow || m->shader->spatial.unshaded /*|| current_debug==VS::SCENARIO_DEBUG_SHADELESS*/) { + + e->sort_key=RenderList::SORT_KEY_LIGHT_INDEX_UNSHADED; + e->sort_key|=uint64_t(0xF)<sort_key|=uint64_t(0xFFFF)<sort_key; - uint8_t light_type = VS::LIGHT_DIRECTIONAL; + for(int i=0;ibase->shadow_enabled) { light_type|=0x8; if (directional_lights[i]->base->directional_shadow_mode==VS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_2_SPLITS) @@ -554,22 +747,24 @@ void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_g light_type|=0x30; } +*/ RenderList::Element *ec; if (duplicate) { - - ec = render_list->add_element(); - memcpy(ec,e,sizeof(RenderList::Element)); + ec = render_list.add_element(); + copymem(ec,e,sizeof(RenderList::Element)); } else { ec=e; duplicate=true; } - ec->light_type=light_type; - ec->light=sort_key; ec->additive_ptr=&e->additive; + ec->sort_key|=uint64_t(directional_light_instances[i]->light_index) << RenderList::SORT_KEY_LIGHT_INDEX_SHIFT; + ec->sort_key|=uint64_t(VS::LIGHT_DIRECTIONAL) << RenderList::SORT_KEY_LIGHT_TYPE_SHIFT; + + lighted=true; } @@ -580,37 +775,45 @@ void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_g for(int i=0;ilast_pass!=scene_pass) //lit by light not in visible scene + LightInstance *li=light_instance_owner.getptr( liptr[i] ); + + if (!li || li->last_pass!=render_pass) //lit by light not in visible scene continue; - uint8_t light_type=li->base->type|0x40; //penalty to ensure directionals always go first - if (li->base->shadow_enabled) { - light_type|=0x8; - } - uint16_t sort_key =li->sort_key; + + +// if (li->base->shadow_enabled) { +// light_type|=0x8; +// } RenderList::Element *ec; if (duplicate) { - ec = render_list->add_element(); - memcpy(ec,e,sizeof(RenderList::Element)); + ec = render_list.add_element(); + copymem(ec,e,sizeof(RenderList::Element)); } else { duplicate=true; ec=e; } - ec->light_type=light_type; - ec->light=sort_key; ec->additive_ptr=&e->additive; + ec->sort_key|=uint64_t(li->light_index) << RenderList::SORT_KEY_LIGHT_INDEX_SHIFT; + ec->sort_key|=uint64_t(li->light_ptr->type) << RenderList::SORT_KEY_LIGHT_TYPE_SHIFT; + + lighted=true; } + if (!lighted) { + e->sort_key|=uint64_t(0xE)<sort_key|=uint64_t(0xFFFF)<frame.time[i]; } + //bg and ambient + if (env) { + state.ubo_data.bg_energy=env->bg_energy; + state.ubo_data.ambient_energy=env->ambient_energy; + Color linear_ambient_color = env->ambient_color.to_linear(); + state.ubo_data.ambient_light_color[0]=linear_ambient_color.r; + state.ubo_data.ambient_light_color[1]=linear_ambient_color.g; + state.ubo_data.ambient_light_color[2]=linear_ambient_color.b; + state.ubo_data.ambient_light_color[3]=linear_ambient_color.a; + + Color bg_color; + + switch(env->bg_mode) { + case VS::ENV_BG_CLEAR_COLOR: { + bg_color=storage->frame.clear_request_color.to_linear(); + } break; + case VS::ENV_BG_COLOR: { + bg_color=env->bg_color.to_linear(); + } break; + default: { + bg_color=Color(0,0,0,1); + } break; + } + + state.ubo_data.bg_color[0]=bg_color.r; + state.ubo_data.bg_color[1]=bg_color.g; + state.ubo_data.bg_color[2]=bg_color.b; + state.ubo_data.bg_color[3]=bg_color.a; + + state.env_radiance_data.ambient_contribution=env->ambient_skybox_contribution; + } else { + state.ubo_data.bg_energy=1.0; + state.ubo_data.ambient_energy=1.0; + //use from clear color instead, since there is no ambient + Color linear_ambient_color = storage->frame.clear_request_color.to_linear(); + state.ubo_data.ambient_light_color[0]=linear_ambient_color.r; + state.ubo_data.ambient_light_color[1]=linear_ambient_color.g; + state.ubo_data.ambient_light_color[2]=linear_ambient_color.b; + state.ubo_data.ambient_light_color[3]=linear_ambient_color.a; + + state.ubo_data.bg_color[0]=linear_ambient_color.r; + state.ubo_data.bg_color[1]=linear_ambient_color.g; + state.ubo_data.bg_color[2]=linear_ambient_color.b; + state.ubo_data.bg_color[3]=linear_ambient_color.a; + + state.env_radiance_data.ambient_contribution=0; + + } glBindBuffer(GL_UNIFORM_BUFFER, state.scene_ubo); glBufferSubData(GL_UNIFORM_BUFFER, 0,sizeof(State::SceneDataUBO), &state.ubo_data); glBindBuffer(GL_UNIFORM_BUFFER, 0); + //fill up environment + + store_transform(p_cam_transform,state.env_radiance_data.transform); + + + glBindBuffer(GL_UNIFORM_BUFFER, state.env_radiance_ubo); + glBufferSubData(GL_UNIFORM_BUFFER, 0,sizeof(State::EnvironmentRadianceUBO), &state.env_radiance_data); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + +} + +void RasterizerSceneGLES3::_setup_lights(RID *p_light_cull_result,int p_light_cull_count,const Transform& p_camera_inverse_transform) { + + directional_light_instance_count=0; + light_instance_count=0; + + for(int i=0;i=RenderList::MAX_LIGHTS ); + + LightInstance *li = light_instance_owner.getptr(p_light_cull_result[i]); + + switch(li->light_ptr->type) { + + case VS::LIGHT_DIRECTIONAL: { + + ERR_FAIL_COND( directional_light_instance_count >= RenderList::MAX_LIGHTS); + directional_light_instances[directional_light_instance_count++]=li; + + li->light_ubo_data.light_color_energy[0]=li->light_ptr->color.r; + li->light_ubo_data.light_color_energy[1]=li->light_ptr->color.g; + li->light_ubo_data.light_color_energy[2]=li->light_ptr->color.b; + li->light_ubo_data.light_color_energy[3]=li->light_ptr->param[VS::LIGHT_PARAM_ENERGY]; + + //omni, keep at 0 + li->light_ubo_data.light_pos_inv_radius[0]=0.0; + li->light_ubo_data.light_pos_inv_radius[1]=0.0; + li->light_ubo_data.light_pos_inv_radius[2]=0.0; + li->light_ubo_data.light_pos_inv_radius[3]=0.0; + + Vector3 direction = p_camera_inverse_transform.basis.xform(li->transform.basis.xform(Vector3(0,0,-1))).normalized(); + li->light_ubo_data.light_direction_attenuation[0]=direction.x; + li->light_ubo_data.light_direction_attenuation[1]=direction.y; + li->light_ubo_data.light_direction_attenuation[2]=direction.z; + li->light_ubo_data.light_direction_attenuation[3]=1.0; + + li->light_ubo_data.light_params[0]=0; + li->light_ubo_data.light_params[1]=li->light_ptr->param[VS::LIGHT_PARAM_SPECULAR]; + li->light_ubo_data.light_params[2]=0; + li->light_ubo_data.light_params[3]=0; + + + +#if 0 + if (li->light_ptr->shadow_enabled) { + CameraMatrix bias; + bias.set_light_bias(); + + int passes=light_instance_get_shadow_passes(p_light_instance); + + for(int i=0;icustom_transform[i]).inverse(); + li->shadow_projection[i] = bias * li->custom_projection[i] * modelview; + } + lights_use_shadow=true; + } +#endif + } break; + case VS::LIGHT_OMNI: { + + li->light_ubo_data.light_color_energy[0]=li->light_ptr->color.r; + li->light_ubo_data.light_color_energy[1]=li->light_ptr->color.g; + li->light_ubo_data.light_color_energy[2]=li->light_ptr->color.b; + li->light_ubo_data.light_color_energy[3]=li->light_ptr->param[VS::LIGHT_PARAM_ENERGY]; + + Vector3 pos = p_camera_inverse_transform.xform(li->transform.origin); + + //directional, keep at 0 + li->light_ubo_data.light_pos_inv_radius[0]=pos.x; + li->light_ubo_data.light_pos_inv_radius[1]=pos.y; + li->light_ubo_data.light_pos_inv_radius[2]=pos.z; + li->light_ubo_data.light_pos_inv_radius[3]=1.0/MAX(0.001,li->light_ptr->param[VS::LIGHT_PARAM_RANGE]); + + li->light_ubo_data.light_direction_attenuation[0]=0; + li->light_ubo_data.light_direction_attenuation[1]=0; + li->light_ubo_data.light_direction_attenuation[2]=0; + li->light_ubo_data.light_direction_attenuation[3]=li->light_ptr->param[VS::LIGHT_PARAM_ATTENUATION]; + + li->light_ubo_data.light_params[0]=0; + li->light_ubo_data.light_params[1]=li->light_ptr->param[VS::LIGHT_PARAM_SPECULAR]; + li->light_ubo_data.light_params[2]=0; + li->light_ubo_data.light_params[3]=0; + +#if 0 + if (li->light_ptr->shadow_enabled) { + li->shadow_projection[0] = Transform(camera_transform_inverse * li->transform).inverse(); + lights_use_shadow=true; + } +#endif + } break; + case VS::LIGHT_SPOT: { + + li->light_ubo_data.light_color_energy[0]=li->light_ptr->color.r; + li->light_ubo_data.light_color_energy[1]=li->light_ptr->color.g; + li->light_ubo_data.light_color_energy[2]=li->light_ptr->color.b; + li->light_ubo_data.light_color_energy[3]=li->light_ptr->param[VS::LIGHT_PARAM_ENERGY]; + + Vector3 pos = p_camera_inverse_transform.xform(li->transform.origin); + + //directional, keep at 0 + li->light_ubo_data.light_pos_inv_radius[0]=pos.x; + li->light_ubo_data.light_pos_inv_radius[1]=pos.y; + li->light_ubo_data.light_pos_inv_radius[2]=pos.z; + li->light_ubo_data.light_pos_inv_radius[3]=1.0/MAX(0.001,li->light_ptr->param[VS::LIGHT_PARAM_RANGE]); + + Vector3 direction = p_camera_inverse_transform.basis.xform(li->transform.basis.xform(Vector3(0,0,-1))).normalized(); + li->light_ubo_data.light_direction_attenuation[0]=direction.x; + li->light_ubo_data.light_direction_attenuation[1]=direction.y; + li->light_ubo_data.light_direction_attenuation[2]=direction.z; + li->light_ubo_data.light_direction_attenuation[3]=li->light_ptr->param[VS::LIGHT_PARAM_ATTENUATION]; + + li->light_ubo_data.light_params[0]=li->light_ptr->param[VS::LIGHT_PARAM_SPOT_ATTENUATION]; + li->light_ubo_data.light_params[1]=li->light_ptr->param[VS::LIGHT_PARAM_SPECULAR]; + li->light_ubo_data.light_params[2]=0; + li->light_ubo_data.light_params[3]=0; + +#if 0 + if (li->light_ptr->shadow_enabled) { + CameraMatrix bias; + bias.set_light_bias(); + Transform modelview=Transform(camera_transform_inverse * li->transform).inverse(); + li->shadow_projection[0] = bias * li->projection * modelview; + lights_use_shadow=true; + } +#endif + } break; + + } + + + /* make light hash */ + + // actually, not really a hash, but helps to sort the lights + // and avoid recompiling redudant shader versions + + + li->last_pass=render_pass; + li->light_index=i; + + //update UBO for forward rendering, blit to texture for clustered + + glBindBuffer(GL_UNIFORM_BUFFER, li->light_ubo); + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(LightInstance::LightDataUBO), &li->light_ubo_data); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + light_instances[i]=li; + } + +} + +void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraMatrix& p_cam_projection,bool p_cam_ortogonal,InstanceBase** p_cull_result,int p_cull_count,RID* p_light_cull_result,int p_light_cull_count,RID* p_directional_lights,int p_directional_light_count,RID p_environment){ + + //first of all, make a new render pass + render_pass++; + + //fill up ubo + + Environment *env = environment_owner.getornull(p_environment); + + _setup_environment(env,p_cam_projection,p_cam_transform); + + _setup_lights(p_light_cull_result,p_light_cull_count,p_cam_transform.affine_inverse()); render_list.clear(); - render_pass++; current_material_index=0; //fill list @@ -744,8 +1169,7 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM glClearDepth(1.0); glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->front.fbo); - - Environment *env = environment_owner.getornull(p_environment); + RasterizerStorageGLES3::Texture* env_radiance_tex; if (!env || env->bg_mode==VS::ENV_BG_CLEAR_COLOR) { @@ -756,12 +1180,21 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM storage->frame.clear_request=false; } + } else if (env->bg_mode==VS::ENV_BG_COLOR) { glClearColor( env->bg_color.r, env->bg_color.g, env->bg_color.b, env->bg_color.a ); glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); storage->frame.clear_request=false; + } else if (env->bg_mode==VS::ENV_BG_SKYBOX) { + + if (env->skybox_radiance.is_valid()) { + env_radiance_tex = storage->texture_owner.getornull(env->skybox_radiance); + } + glClear(GL_DEPTH_BUFFER_BIT); + storage->frame.clear_request=false; + } else { glClear(GL_DEPTH_BUFFER_BIT); storage->frame.clear_request=false; @@ -810,7 +1243,7 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM // } - _render_list(render_list.elements,render_list.element_count,p_cam_transform,p_cam_projection,false,false); + _render_list(render_list.elements,render_list.element_count,p_cam_transform,p_cam_projection,env_radiance_tex,false,false); if (env && env->bg_mode==VS::ENV_BG_SKYBOX) { @@ -824,6 +1257,17 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM // state.scene_shader.set_conditional( SceneShaderGLES3::USE_FOG,false); glPolygonMode(GL_FRONT_AND_BACK,GL_FILL); + glEnable(GL_BLEND); + glDepthMask(GL_TRUE); + glEnable(GL_DEPTH_TEST); + glDisable(GL_SCISSOR_TEST); + glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->front.fbo); + + render_list.sort_by_depth(true); + + _render_list(&render_list.elements[render_list.max_elements-render_list.alpha_element_count],render_list.alpha_element_count,p_cam_transform,p_cam_projection,env_radiance_tex,false,false); + + #if 0 if (use_fb) { @@ -972,7 +1416,143 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM bool RasterizerSceneGLES3::free(RID p_rid) { - return false; + if (light_instance_owner.owns(p_rid)) { + + LightInstance *light_instance = light_instance_owner.getptr(p_rid); + glDeleteBuffers(1,&light_instance->light_ubo); + light_instance_owner.free(p_rid); + memdelete(light_instance); + + + } else { + return false; + } + + + return true; + +} + +// http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html +static _FORCE_INLINE_ float radicalInverse_VdC(uint32_t bits) { + bits = (bits << 16u) | (bits >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + return float(bits) * 2.3283064365386963e-10f; // / 0x100000000 +} + +static _FORCE_INLINE_ Vector2 Hammersley(uint32_t i, uint32_t N) { + return Vector2(float(i) / float(N), radicalInverse_VdC(i)); +} + +static _FORCE_INLINE_ Vector3 ImportanceSampleGGX(Vector2 Xi, float Roughness, Vector3 N) { + float a = Roughness * Roughness; // DISNEY'S ROUGHNESS [see Burley'12 siggraph] + + // Compute distribution direction + float Phi = 2.0f * M_PI * Xi.x; + float CosTheta = Math::sqrt((1.0f - Xi.y) / (1.0f + (a*a - 1.0f) * Xi.y)); + float SinTheta = Math::sqrt((float)Math::abs(1.0f - CosTheta * CosTheta)); + + // Convert to spherical direction + Vector3 H; + H.x = SinTheta * Math::cos(Phi); + H.y = SinTheta * Math::sin(Phi); + H.z = CosTheta; + + Vector3 UpVector = Math::abs(N.z) < 0.999 ? Vector3(0.0, 0.0, 1.0) : Vector3(1.0, 0.0, 0.0); + Vector3 TangentX = UpVector.cross(N); + TangentX.normalize(); + Vector3 TangentY = N.cross(TangentX); + + // Tangent to world space + return TangentX * H.x + TangentY * H.y + N * H.z; +} + +static _FORCE_INLINE_ float GGX(float NdotV, float a) { + float k = a / 2.0; + return NdotV / (NdotV * (1.0 - k) + k); +} + +// http://graphicrants.blogspot.com.au/2013/08/specular-brdf-reference.html +float _FORCE_INLINE_ G_Smith(float a, float nDotV, float nDotL) +{ + return GGX(nDotL, a * a) * GGX(nDotV, a * a); +} + +void RasterizerSceneGLES3::_generate_brdf() { + + int brdf_size=GLOBAL_DEF("rendering/gles3/brdf_texture_size",64); + + + + DVector brdf; + brdf.resize(brdf_size*brdf_size*2); + + DVector::Write w = brdf.write(); + + + for(int i=0;i 0.0 ) { + float G = G_Smith( Roughness, NoV, NoL ); + float G_Vis = G * VoH / (NoH * NoV); + float Fc = pow(1.0 - VoH, 5.0); + + A += (1.0 - Fc) * G_Vis; + B += Fc * G_Vis; + } + } + + A/=512.0; + B/=512.0; + + int tofs = ((brdf_size-j-1)*brdf_size+i)*2; + w[tofs+0]=CLAMP(A*255,0,255); + w[tofs+1]=CLAMP(B*255,0,255); + } + } + + + //set up brdf texture + + + glGenTextures(1, &state.brdf_texture); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D,state.brdf_texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, brdf_size, brdf_size, 0, GL_RG, GL_UNSIGNED_BYTE,w.ptr()); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glBindTexture(GL_TEXTURE_2D,0); } @@ -989,6 +1569,12 @@ void RasterizerSceneGLES3::initialize() { glBufferData(GL_UNIFORM_BUFFER, sizeof(State::SceneDataUBO), &state.scene_ubo, GL_DYNAMIC_DRAW); glBindBuffer(GL_UNIFORM_BUFFER, 0); + glGenBuffers(1, &state.env_radiance_ubo); + glBindBuffer(GL_UNIFORM_BUFFER, state.env_radiance_ubo); + glBufferData(GL_UNIFORM_BUFFER, sizeof(State::EnvironmentRadianceUBO), &state.env_radiance_ubo, GL_DYNAMIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + render_list.max_elements=GLOBAL_DEF("rendering/gles3/max_renderable_elements",(int)RenderList::DEFAULT_MAX_ELEMENTS); if (render_list.max_elements>1000000) render_list.max_elements=1000000; @@ -1017,6 +1603,7 @@ void RasterizerSceneGLES3::initialize() { glBindBuffer(GL_ARRAY_BUFFER,0); //unbind } render_list.init(); + _generate_brdf(); } void RasterizerSceneGLES3::finalize(){ diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h index 53088deb0d7..4b1b77f13c1 100644 --- a/drivers/gles3/rasterizer_scene_gles3.h +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -32,12 +32,28 @@ public: float camera_inverse_matrix[16]; float camera_matrix[16]; float time[4]; - float ambient_light[4]; + float ambient_light_color[4]; + float bg_color[4]; + float ambient_energy; + float bg_energy; } ubo_data; GLuint scene_ubo; + struct EnvironmentRadianceUBO { + + float transform[16]; + float box_min[4]; //unused for now + float box_max[4]; + float ambient_contribution; + + } env_radiance_data; + + GLuint env_radiance_ubo; + + GLuint brdf_texture; + GLuint skybox_verts; GLuint skybox_array; @@ -54,16 +70,15 @@ public: RID skybox_color; RID skybox_radiance; - RID skybox_irradiance; float skybox_scale; Color bg_color; - float energy; + float bg_energy; float skybox_ambient; Color ambient_color; - float ambient_anergy; - float ambient_skybox_energy; + float ambient_energy; + float ambient_skybox_contribution; int canvas_max_layer; @@ -71,10 +86,10 @@ public: Environment() { bg_mode=VS::ENV_BG_CLEAR_COLOR; skybox_scale=1.0; - energy=1.0; + bg_energy=1.0; skybox_ambient=0; - ambient_anergy=1.0; - ambient_skybox_energy=0.0; + ambient_energy=1.0; + ambient_skybox_contribution=0.0; canvas_max_layer=0; } }; @@ -84,12 +99,12 @@ public: virtual RID environment_create(); virtual void environment_set_background(RID p_env,VS::EnvironmentBG p_bg); - virtual void environment_set_skybox(RID p_env,RID p_skybox,int p_radiance_size,int p_irradiance_size); + virtual void environment_set_skybox(RID p_env,RID p_skybox,int p_radiance_size); virtual void environment_set_skybox_scale(RID p_env,float p_scale); virtual void environment_set_bg_color(RID p_env,const Color& p_color); virtual void environment_set_bg_energy(RID p_env,float p_energy); virtual void environment_set_canvas_max_layer(RID p_env,int p_max_layer); - virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0,float p_skybox_energy=0.0); + virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0,float p_skybox_contribution=0.0); virtual void environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,VS::EnvironmentGlowBlendMode p_blend_mode); virtual void environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture); @@ -100,19 +115,81 @@ public: virtual void environment_set_saturation(RID p_env,bool p_enable,float p_saturation); virtual void environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp); + + /* LIGHT INSTANCE */ + + struct LightInstance : public RID_Data { + + struct SplitInfo { + + CameraMatrix camera; + Transform transform; + float near; + float far; + }; + + struct LightDataUBO { + + float light_pos_inv_radius[4]; + float light_direction_attenuation[4]; + float light_color_energy[4]; + float light_params[4]; //cone attenuation, specular, shadow darkening, + float shadow_split_offsets[4]; + float shadow_matrix1[16]; + float shadow_matrix2[16]; + float shadow_matrix3[16]; + float shadow_matrix4[16]; + + } light_ubo_data; + + + SplitInfo split_info[4]; + + RID light; + RasterizerStorageGLES3::Light *light_ptr; + + CameraMatrix shadow_matrix[4]; + + Transform transform; + + Vector3 light_vector; + Vector3 spot_vector; + float linear_att; + + GLuint light_ubo; + + uint64_t shadow_pass; + uint64_t last_pass; + uint16_t light_index; + + Vector2 dp; + + CameraMatrix shadow_projection[4]; + + + LightInstance() { } + + }; + + mutable RID_Owner light_instance_owner; + + virtual RID light_instance_create(RID p_light); + virtual void light_instance_set_transform(RID p_light_instance,const Transform& p_transform); + /* RENDER LIST */ struct RenderList { enum { DEFAULT_MAX_ELEMENTS=65536, - MAX_LIGHTS=4, SORT_FLAG_SKELETON=1, SORT_FLAG_INSTANCING=2, - + MAX_DIRECTIONAL_LIGHTS=16, + MAX_LIGHTS=4096, SORT_KEY_DEPTH_LAYER_SHIFT=58, SORT_KEY_LIGHT_TYPE_SHIFT=54, //type is most important SORT_KEY_LIGHT_INDEX_SHIFT=38, //type is most important + SORT_KEY_LIGHT_INDEX_UNSHADED=uint64_t(0xF) << SORT_KEY_LIGHT_TYPE_SHIFT, //type is most important SORT_KEY_MATERIAL_INDEX_SHIFT=22, SORT_KEY_GEOMETRY_INDEX_SHIFT=6, SORT_KEY_GEOMETRY_TYPE_SHIFT=2, @@ -161,7 +238,24 @@ public: SortArray sorter; if (p_alpha) { - sorter.sort(&elements[max_elements-alpha_element_count-1],alpha_element_count); + sorter.sort(&elements[max_elements-alpha_element_count],alpha_element_count); + } else { + sorter.sort(elements,element_count); + } + } + + struct SortByDepth { + + _FORCE_INLINE_ bool operator()(const Element* A, const Element* B ) const { + return A->instance->depth > B->instance->depth; + } + }; + + void sort_by_depth(bool p_alpha) { + + SortArray sorter; + if (p_alpha) { + sorter.sort(&elements[max_elements-alpha_element_count],alpha_element_count); } else { sorter.sort(elements,element_count); } @@ -197,6 +291,7 @@ public: } + RenderList() { max_elements=DEFAULT_MAX_ELEMENTS; @@ -210,26 +305,35 @@ public: + LightInstance *directional_light_instances[RenderList::MAX_DIRECTIONAL_LIGHTS]; + int directional_light_instance_count; + + LightInstance *light_instances[RenderList::MAX_LIGHTS]; + int light_instance_count; + RenderList render_list; _FORCE_INLINE_ bool _setup_material(RasterizerStorageGLES3::Material* p_material,bool p_alpha_pass); _FORCE_INLINE_ void _setup_geometry(RenderList::Element *e); _FORCE_INLINE_ void _render_geometry(RenderList::Element *e); + _FORCE_INLINE_ void _setup_light(LightInstance *p_light); + void _render_list(RenderList::Element **p_elements, int p_element_count, const Transform& p_view_transform, const CameraMatrix& p_projection, RasterizerStorageGLES3::Texture *p_base_env, bool p_reverse_cull, bool p_alpha_pass); - void _render_list(RenderList::Element **p_elements, int p_element_count, const Transform& p_view_transform, const CameraMatrix& p_projection, bool p_reverse_cull, bool p_alpha_pass); - - virtual RID light_instance_create(RID p_light); - virtual void light_instance_set_transform(RID p_light_instance,const Transform& p_transform); _FORCE_INLINE_ void _add_geometry( RasterizerStorageGLES3::Geometry* p_geometry, InstanceBase *p_instance, RasterizerStorageGLES3::GeometryOwner *p_owner,int p_material); void _draw_skybox(RID p_skybox, CameraMatrix& p_projection, const Transform& p_transform, bool p_vflip, float p_scale); + void _setup_environment(Environment *env,CameraMatrix& p_cam_projection, const Transform& p_cam_transform); + void _setup_lights(RID *p_light_cull_result, int p_light_cull_count, const Transform &p_camera_inverse_transform); + virtual void render_scene(const Transform& p_cam_transform,CameraMatrix& p_cam_projection,bool p_cam_ortogonal,InstanceBase** p_cull_result,int p_cull_count,RID* p_light_cull_result,int p_light_cull_count,RID* p_directional_lights,int p_directional_light_count,RID p_environment); virtual bool free(RID p_rid); + void _generate_brdf(); + void initialize(); void finalize(); RasterizerSceneGLES3(); diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index 94ad2afabee..1141a605c5e 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -587,6 +587,7 @@ void RasterizerStorageGLES3::texture_allocate(RID p_texture,int p_width, int p_h texture->height=p_height; texture->format=p_format; texture->flags=p_flags; + texture->stored_cube_sides=0; texture->target = (p_flags & VS::TEXTURE_FLAG_CUBEMAP) ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; _get_gl_image_and_format(Image(),texture->format,texture->flags,format,internal_format,type,compressed,srgb); @@ -759,8 +760,9 @@ void RasterizerStorageGLES3::texture_set_data(RID p_texture,const Image& p_image //printf("texture: %i x %i - size: %i - total: %i\n",texture->width,texture->height,tsize,_rinfo.texture_mem); + texture->stored_cube_sides|=(1<flags&VS::TEXTURE_FLAG_MIPMAPS && mipmaps==1 && !texture->ignore_mipmaps) { + if (texture->flags&VS::TEXTURE_FLAG_MIPMAPS && mipmaps==1 && !texture->ignore_mipmaps && (!(texture->flags&VS::TEXTURE_FLAG_CUBEMAP) || texture->stored_cube_sides==(1<<6)-1)) { //generate mipmaps if they were requested and the image does not contain them glGenerateMipmap(texture->target); } @@ -995,7 +997,7 @@ void RasterizerStorageGLES3::texture_set_shrink_all_x2_on_set_data(bool p_enable config.shrink_textures_x2=p_enable; } -RID RasterizerStorageGLES3::texture_create_pbr_cubemap(RID p_source,VS::PBRCubeMapMode p_mode,int p_resolution) const { +RID RasterizerStorageGLES3::texture_create_radiance_cubemap(RID p_source,int p_resolution) const { Texture * texture = texture_owner.get(p_source); ERR_FAIL_COND_V(!texture,RID()); @@ -1019,12 +1021,13 @@ RID RasterizerStorageGLES3::texture_create_pbr_cubemap(RID p_source,VS::PBRCubeM glDisable(GL_BLEND); - glActiveTexture(GL_TEXTURE1); + glActiveTexture(GL_TEXTURE0); glBindTexture(texture->target, texture->tex_id); - glActiveTexture(GL_TEXTURE0); + glActiveTexture(GL_TEXTURE1); GLuint new_cubemap; glGenTextures(1, &new_cubemap); + glBindTexture(GL_TEXTURE_CUBE_MAP, new_cubemap); GLuint tmp_fb; @@ -1033,8 +1036,7 @@ RID RasterizerStorageGLES3::texture_create_pbr_cubemap(RID p_source,VS::PBRCubeM glBindFramebuffer(GL_FRAMEBUFFER, tmp_fb); - int w = texture->width; - int h = texture->height; + int size = p_resolution; int lod=0; @@ -1044,19 +1046,42 @@ RID RasterizerStorageGLES3::texture_create_pbr_cubemap(RID p_source,VS::PBRCubeM int mm_level=mipmaps; + GLenum internal_format = use_float?GL_RGBA16F:GL_RGB10_A2; + GLenum format = GL_RGBA; + GLenum type = use_float?GL_HALF_FLOAT:GL_UNSIGNED_INT_2_10_10_10_REV; + + while(mm_level) { for(int i=0;i<6;i++) { - glTexImage2D(_cube_side_enum[i], lod, use_float?GL_RGBA16F:GL_RGB10_A2, w, h, 0, GL_RGBA, use_float?GL_HALF_FLOAT:GL_UNSIGNED_INT_2_10_10_10_REV, NULL); - glTexParameteri(_cube_side_enum[i], GL_TEXTURE_BASE_LEVEL, lod); - glTexParameteri(_cube_side_enum[i], GL_TEXTURE_MAX_LEVEL, lod); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, _cube_side_enum[i], new_cubemap, 0); + glTexImage2D(_cube_side_enum[i], lod, internal_format, size, size, 0, format, type, NULL); + } - glViewport(0,0,w,h); + lod++; + mm_level--; + + if (size>1) + size>>=1; + } + + glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAX_LEVEL, lod-1); + + lod=0; + mm_level=mipmaps; + + size = p_resolution; + + while(mm_level) { + + for(int i=0;i<6;i++) { + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, _cube_side_enum[i], new_cubemap, lod); + + glViewport(0,0,size,size); glBindVertexArray(resources.quadie_array); shaders.cubemap_filter.set_uniform(CubemapFilterShaderGLES3::FACE_ID,i); - shaders.cubemap_filter.set_uniform(CubemapFilterShaderGLES3::ROUGHNESS,lod/float(mipmaps)); + shaders.cubemap_filter.set_uniform(CubemapFilterShaderGLES3::ROUGHNESS,lod/float(mipmaps-1)); glDrawArrays(GL_TRIANGLE_FAN,0,4); @@ -1069,29 +1094,51 @@ RID RasterizerStorageGLES3::texture_create_pbr_cubemap(RID p_source,VS::PBRCubeM - if (w>1) - w>>=1; - if (h>1) - h>>=1; - + if (size>1) + size>>=1; lod++; mm_level--; } - for(int i=0;i<6;i++) { - //restore ranges - glTexParameteri(_cube_side_enum[i], GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(_cube_side_enum[i], GL_TEXTURE_MAX_LEVEL, lod); + //restore ranges + glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAX_LEVEL, lod-1); - } + glTexParameterf(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameterf(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); glBindFramebuffer(GL_FRAMEBUFFER, config.system_fbo); glDeleteFramebuffers(1, &tmp_fb); + Texture * ctex = memnew( Texture ); - return RID(); + ctex->flags=VS::TEXTURE_FLAG_CUBEMAP|VS::TEXTURE_FLAG_MIPMAPS|VS::TEXTURE_FLAG_FILTER; + ctex->width=p_resolution; + ctex->height=p_resolution; + ctex->alloc_width=p_resolution; + ctex->alloc_height=p_resolution; + ctex->format=use_float?Image::FORMAT_RGBAH:Image::FORMAT_RGBA8; + ctex->target=GL_TEXTURE_CUBE_MAP; + ctex->gl_format_cache=format; + ctex->gl_internal_format_cache=internal_format; + ctex->gl_type_cache=type; + ctex->data_size=0; + ctex->compressed=false; + ctex->srgb=false; + ctex->total_data_size=0; + ctex->ignore_mipmaps=false; + ctex->mipmaps=mipmaps; + ctex->active=true; + ctex->tex_id=new_cubemap; + ctex->stored_cube_sides=(1<<6)-1; + ctex->render_target=NULL; + + return texture_owner.make_rid(ctex); } @@ -1137,9 +1184,9 @@ void RasterizerStorageGLES3::shader_set_mode(RID p_shader,VS::ShaderMode p_mode) shader->mode=p_mode; ShaderGLES3* shaders[VS::SHADER_MAX]={ - &canvas->state.canvas_shader, &scene->state.scene_shader, &canvas->state.canvas_shader, + &canvas->state.canvas_shader, }; @@ -1231,10 +1278,14 @@ void RasterizerStorageGLES3::_update_shader(Shader* p_shader) const { shaders.actions_scene.render_mode_values["cull_back"]=Pair(&p_shader->spatial.cull_mode,Shader::Spatial::CULL_MODE_BACK); shaders.actions_scene.render_mode_values["cull_disable"]=Pair(&p_shader->spatial.cull_mode,Shader::Spatial::CULL_MODE_DISABLED); - shaders.actions_canvas.render_mode_flags["unshaded"]=&p_shader->spatial.unshaded; - shaders.actions_canvas.render_mode_flags["ontop"]=&p_shader->spatial.ontop; + shaders.actions_scene.render_mode_flags["unshaded"]=&p_shader->spatial.unshaded; + shaders.actions_scene.render_mode_flags["ontop"]=&p_shader->spatial.ontop; + + shaders.actions_scene.usage_flag_pointers["ALPHA"]=&p_shader->spatial.uses_alpha; + + actions=&shaders.actions_scene; + actions->uniforms=&p_shader->uniforms; - shaders.actions_canvas.usage_flag_pointers["ALPHA"]=&p_shader->spatial.uses_alpha; } @@ -1243,6 +1294,7 @@ void RasterizerStorageGLES3::_update_shader(Shader* p_shader) const { Error err = shaders.compiler.compile(p_shader->mode,p_shader->code,actions,p_shader->path,gen_code); + ERR_FAIL_COND(err!=OK); p_shader->shader->set_custom_shader_code(p_shader->custom_code_id,gen_code.vertex,gen_code.vertex_global,gen_code.fragment,gen_code.light,gen_code.fragment_global,gen_code.uniforms,gen_code.texture_uniforms,gen_code.defines); @@ -1633,29 +1685,29 @@ _FORCE_INLINE_ static void _fill_std140_variant_ubo_value(ShaderLanguage::DataTy gui[0]=v.r; gui[1]=v.g; - gui[3]=v.b; - gui[4]=v.a; + gui[2]=v.b; + gui[3]=v.a; } else if (value.get_type()==Variant::RECT2) { Rect2 v=value; gui[0]=v.pos.x; gui[1]=v.pos.y; - gui[3]=v.size.x; - gui[4]=v.size.y; + gui[2]=v.size.x; + gui[3]=v.size.y; } else if (value.get_type()==Variant::QUAT) { Quat v=value; gui[0]=v.x; gui[1]=v.y; - gui[3]=v.z; - gui[4]=v.w; + gui[2]=v.z; + gui[3]=v.w; } else { Plane v=value; gui[0]=v.normal.x; gui[1]=v.normal.y; - gui[3]=v.normal.x; - gui[4]=v.d; + gui[2]=v.normal.x; + gui[3]=v.d; } } break; @@ -2307,8 +2359,6 @@ void RasterizerStorageGLES3::mesh_add_surface(RID p_mesh,uint32_t p_format,VS::P if (p_format&VS::ARRAY_FORMAT_INDEX) { index_array_size=attribs[VS::ARRAY_INDEX].stride*p_index_count; - - print_line("index count: "+itos(p_index_count)+" stride: "+itos(attribs[VS::ARRAY_INDEX].stride) ); } @@ -2882,55 +2932,134 @@ Matrix32 RasterizerStorageGLES3::skeleton_bone_get_transform_2d(RID p_skeleton,i RID RasterizerStorageGLES3::light_create(VS::LightType p_type){ - return RID(); + Light *light = memnew( Light ); + light->type=p_type; + + light->param[VS::LIGHT_PARAM_ENERGY]=1.0; + light->param[VS::LIGHT_PARAM_SPECULAR]=1.0; + light->param[VS::LIGHT_PARAM_RANGE]=1.0; + light->param[VS::LIGHT_PARAM_SPOT_ANGLE]=45; + light->param[VS::LIGHT_PARAM_SHADOW_MAX_DISTANCE]=0; + light->param[VS::LIGHT_PARAM_SHADOW_DARKNESS]=0; + light->param[VS::LIGHT_PARAM_SHADOW_SPLIT_1_OFFSET]=0.1; + light->param[VS::LIGHT_PARAM_SHADOW_SPLIT_2_OFFSET]=0.3; + light->param[VS::LIGHT_PARAM_SHADOW_SPLIT_3_OFFSET]=0.6; + light->param[VS::LIGHT_PARAM_SHADOW_SPLIT_4_OFFSET]=1.0; + light->param[VS::LIGHT_PARAM_SHADOW_NORMAL_BIAS]=0.1; + light->param[VS::LIGHT_PARAM_SHADOW_BIAS_SPLIT_SCALE]=0.1; + + + light->color=Color(1,1,1,1); + light->shadow=false; + light->negative=false; + light->cull_mask=0xFFFFFFFF; + light->directional_shadow_mode=VS::LIGHT_DIRECTIONAL_SHADOW_ORTHOGONAL; + + return light_owner.make_rid(light); } void RasterizerStorageGLES3::light_set_color(RID p_light,const Color& p_color){ + Light * light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + light->color=p_color; } void RasterizerStorageGLES3::light_set_param(RID p_light,VS::LightParam p_param,float p_value){ + Light * light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + ERR_FAIL_INDEX(p_param,VS::LIGHT_PARAM_MAX); + if (p_param==VS::LIGHT_PARAM_RANGE || p_param==VS::LIGHT_PARAM_SPOT_ANGLE) { + light->instance_change_notify(); + } + light->param[p_param]=p_value; } void RasterizerStorageGLES3::light_set_shadow(RID p_light,bool p_enabled){ + Light * light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + light->shadow=p_enabled; } void RasterizerStorageGLES3::light_set_projector(RID p_light,RID p_texture){ + Light * light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + } void RasterizerStorageGLES3::light_set_attenuation_texure(RID p_light,RID p_texture){ + Light * light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + } void RasterizerStorageGLES3::light_set_negative(RID p_light,bool p_enable){ + Light * light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + light->negative=p_enable; } void RasterizerStorageGLES3::light_set_cull_mask(RID p_light,uint32_t p_mask){ + Light * light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); + light->cull_mask=p_mask; } void RasterizerStorageGLES3::light_set_shader(RID p_light,RID p_shader){ + Light * light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); } void RasterizerStorageGLES3::light_directional_set_shadow_mode(RID p_light,VS::LightDirectionalShadowMode p_mode){ + Light * light = light_owner.getornull(p_light); + ERR_FAIL_COND(!light); } VS::LightType RasterizerStorageGLES3::light_get_type(RID p_light) const { + const Light * light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light,VS::LIGHT_DIRECTIONAL); + return VS::LIGHT_DIRECTIONAL; } AABB RasterizerStorageGLES3::light_get_aabb(RID p_light) const { + const Light * light = light_owner.getornull(p_light); + ERR_FAIL_COND_V(!light,AABB()); + + switch( light->type ) { + + case VS::LIGHT_SPOT: { + + float len=light->param[VS::LIGHT_PARAM_RANGE]; + float size=Math::tan(Math::deg2rad(light->param[VS::LIGHT_PARAM_SPOT_ANGLE]))*len; + return AABB( Vector3( -size,-size,-len ), Vector3( size*2, size*2, len ) ); + } break; + case VS::LIGHT_OMNI: { + + float r = light->param[VS::LIGHT_PARAM_RANGE]; + return AABB( -Vector3(r,r,r), Vector3(r,r,r)*2 ); + } break; + case VS::LIGHT_DIRECTIONAL: { + + return AABB(); + } break; + default: {} + } + + ERR_FAIL_V( AABB() ); return AABB(); } @@ -3028,6 +3157,10 @@ void RasterizerStorageGLES3::instance_add_dependency(RID p_base,RasterizerScene: inst = mesh_owner.getornull(p_base); ERR_FAIL_COND(!inst); } break; + case VS::INSTANCE_LIGHT: { + inst = light_owner.getornull(p_base); + ERR_FAIL_COND(!inst); + } break; default: { ERR_FAIL(); } @@ -3046,6 +3179,10 @@ void RasterizerStorageGLES3::instance_remove_dependency(RID p_base,RasterizerSce ERR_FAIL_COND(!inst); } break; + case VS::INSTANCE_LIGHT: { + inst = light_owner.getornull(p_base); + ERR_FAIL_COND(!inst); + } break; default: { ERR_FAIL(); } @@ -3542,6 +3679,9 @@ VS::InstanceType RasterizerStorageGLES3::get_base_type(RID p_rid) const { if (mesh_owner.owns(p_rid)) { return VS::INSTANCE_MESH; } + if (light_owner.owns(p_rid)) { + return VS::INSTANCE_LIGHT; + } return VS::INSTANCE_NONE; } @@ -3617,6 +3757,14 @@ bool RasterizerStorageGLES3::free(RID p_rid){ mesh_owner.free(p_rid); memdelete(mesh); + } else if (light_owner.owns(p_rid)) { + + // delete the texture + Light *light = light_owner.get(p_rid); + + light_owner.free(p_rid); + memdelete(light); + } else if (canvas_occluder_owner.owns(p_rid)) { diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index c3022b3ac07..f052aa8019c 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -135,10 +135,13 @@ public: bool active; GLuint tex_id; + uint16_t stored_cube_sides; + RenderTarget *render_target; Texture() { + stored_cube_sides=0; ignore_mipmaps=false; render_target=NULL; flags=width=height=0; @@ -184,7 +187,7 @@ public: virtual void texture_debug_usage(List *r_info); - virtual RID texture_create_pbr_cubemap(RID p_source,VS::PBRCubeMapMode p_mode,int p_resolution=-1) const; + virtual RID texture_create_radiance_cubemap(RID p_source,int p_resolution=-1) const; /* SHADER API */ @@ -351,13 +354,6 @@ public: struct Instantiable : public RID_Data { - enum Type { - GEOMETRY_INVALID, - GEOMETRY_SURFACE, - GEOMETRY_IMMEDIATE, - GEOMETRY_MULTISURFACE, - }; - SelfList::List instance_list; _FORCE_INLINE_ void instance_change_notify() { @@ -582,6 +578,20 @@ public: /* Light API */ + + struct Light : Instantiable { + + VS::LightType type; + float param[VS::LIGHT_PARAM_MAX]; + Color color; + bool shadow; + bool negative; + uint32_t cull_mask; + VS::LightDirectionalShadowMode directional_shadow_mode; + }; + + mutable RID_Owner light_owner; + virtual RID light_create(VS::LightType p_type); virtual void light_set_color(RID p_light,const Color& p_color); diff --git a/drivers/gles3/shader_compiler_gles3.cpp b/drivers/gles3/shader_compiler_gles3.cpp index 49707400ca1..da8f6689d0e 100644 --- a/drivers/gles3/shader_compiler_gles3.cpp +++ b/drivers/gles3/shader_compiler_gles3.cpp @@ -48,7 +48,7 @@ static int _get_datatype_size(SL::DataType p_type) { case SL::TYPE_SAMPLERCUBE: return 16; } - + ERR_FAIL_V(0); } @@ -195,7 +195,12 @@ String ShaderCompilerGLES3::_dump_node_code(SL::Node *p_node, int p_level, Gener for(Map::Element *E=pnode->uniforms.front();E;E=E->next()) { - String ucode="uniform "; + String ucode; + + if (SL::is_sampler_type(E->get().type)) { + ucode="uniform "; + } + ucode+=_prestr(E->get().precission); ucode+=_typestr(E->get().type); ucode+=" "+_mkid(E->key()); @@ -228,7 +233,7 @@ String ShaderCompilerGLES3::_dump_node_code(SL::Node *p_node, int p_level, Gener for(int i=0;i0) - r_gen_code.uniform_offsets[i]=uniform_sizes[i]-1; + r_gen_code.uniform_offsets[i]=uniform_sizes[i-1]; else r_gen_code.uniform_offsets[i]=0; } @@ -320,7 +325,11 @@ String ShaderCompilerGLES3::_dump_node_code(SL::Node *p_node, int p_level, Gener SL::VariableNode *vnode=(SL::VariableNode*)p_node; if (p_default_actions.usage_defines.has(vnode->name) && !used_name_defines.has(vnode->name)) { - r_gen_code.defines.push_back(p_default_actions.usage_defines[vnode->name].utf8()); + String define = p_default_actions.usage_defines[vnode->name]; + if (define.begins_with("@")) { + define = p_default_actions.usage_defines[define.substr(1,define.length())]; + } + r_gen_code.defines.push_back(define.utf8()); used_name_defines.insert(vnode->name); } @@ -451,6 +460,14 @@ Error ShaderCompilerGLES3::compile(VS::ShaderMode p_mode, const String& p_code, Error err = parser.compile(p_code,ShaderTypes::get_singleton()->get_functions(p_mode),ShaderTypes::get_singleton()->get_modes(p_mode)); if (err!=OK) { +#if 1 + + Vector shader = p_code.split("\n"); + for(int i=0;i func_list; ShaderLanguage::get_builtin_funcs(&func_list); diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index ebdf60cf423..052c9152419 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -751,6 +751,7 @@ void ShaderGLES3::set_custom_shader_code(uint32_t p_code_id, const String& p_ver ERR_FAIL_COND(!custom_code_map.has(p_code_id)); CustomCode *cc=&custom_code_map[p_code_id]; + cc->vertex=p_vertex; cc->vertex_globals=p_vertex_globals; cc->fragment=p_fragment; diff --git a/drivers/gles3/shader_gles3.h b/drivers/gles3/shader_gles3.h index 176a2282fdf..4c7f682dff7 100644 --- a/drivers/gles3/shader_gles3.h +++ b/drivers/gles3/shader_gles3.h @@ -29,6 +29,7 @@ #ifndef SHADER_GLES3_H #define SHADER_GLES3_H +#include #include "platform_config.h" #ifndef GLES3_INCLUDE_H diff --git a/drivers/gles3/shaders/cubemap_filter.glsl b/drivers/gles3/shaders/cubemap_filter.glsl index f450f341133..d3d4cbd435e 100644 --- a/drivers/gles3/shaders/cubemap_filter.glsl +++ b/drivers/gles3/shaders/cubemap_filter.glsl @@ -16,19 +16,26 @@ void main() { [fragment] -uniform samplerCube source_cube; //texunit:1 +precision highp float; +precision highp int; + + +uniform samplerCube source_cube; //texunit:0 uniform int face_id; uniform float roughness; in highp vec2 uv_interp; -layout(location = 0) vec4 frag_color; +layout(location = 0) out vec4 frag_color; + + +#define M_PI 3.14159265359 vec3 texelCoordToVec(vec2 uv, int faceID) { mat3 faceUvVectors[6]; - +/* // -x faceUvVectors[1][0] = vec3(0.0, 0.0, 1.0); // u -> +z faceUvVectors[1][1] = vec3(0.0, -1.0, 0.0); // v -> -y @@ -58,6 +65,37 @@ vec3 texelCoordToVec(vec2 uv, int faceID) faceUvVectors[4][0] = vec3(1.0, 0.0, 0.0); // u -> +x faceUvVectors[4][1] = vec3(0.0, -1.0, 0.0); // v -> -y faceUvVectors[4][2] = vec3(0.0, 0.0, 1.0); // +z face +*/ + + // -x + faceUvVectors[0][0] = vec3(0.0, 0.0, 1.0); // u -> +z + faceUvVectors[0][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[0][2] = vec3(-1.0, 0.0, 0.0); // -x face + + // +x + faceUvVectors[1][0] = vec3(0.0, 0.0, -1.0); // u -> -z + faceUvVectors[1][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[1][2] = vec3(1.0, 0.0, 0.0); // +x face + + // -y + faceUvVectors[2][0] = vec3(1.0, 0.0, 0.0); // u -> +x + faceUvVectors[2][1] = vec3(0.0, 0.0, -1.0); // v -> -z + faceUvVectors[2][2] = vec3(0.0, -1.0, 0.0); // -y face + + // +y + faceUvVectors[3][0] = vec3(1.0, 0.0, 0.0); // u -> +x + faceUvVectors[3][1] = vec3(0.0, 0.0, 1.0); // v -> +z + faceUvVectors[3][2] = vec3(0.0, 1.0, 0.0); // +y face + + // -z + faceUvVectors[4][0] = vec3(-1.0, 0.0, 0.0); // u -> -x + faceUvVectors[4][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[4][2] = vec3(0.0, 0.0, -1.0); // -z face + + // +z + faceUvVectors[5][0] = vec3(1.0, 0.0, 0.0); // u -> +x + faceUvVectors[5][1] = vec3(0.0, -1.0, 0.0); // v -> -y + faceUvVectors[5][2] = vec3(0.0, 0.0, 1.0); // +z face // out = u * s_faceUv[0] + v * s_faceUv[1] + s_faceUv[2]. vec3 result = (faceUvVectors[faceID][0] * uv.x) + (faceUvVectors[faceID][1] * uv.y) + faceUvVectors[faceID][2]; @@ -113,7 +151,7 @@ vec2 Hammersley(uint i, uint N) { return vec2(float(i)/float(N), radicalInverse_VdC(i)); } -#define SAMPLE_COUNT 1024 +#define SAMPLE_COUNT 1024u void main() { @@ -123,20 +161,21 @@ void main() { //vec4 color = color_interp; vec4 sum = vec4(0.0, 0.0, 0.0, 0.0); - for(int sampleNum = 0; sampleNum < SAMPLE_COUNT; sampleNum++) { + for(uint sampleNum = 0u; sampleNum < SAMPLE_COUNT; sampleNum++) { vec2 xi = Hammersley(sampleNum, SAMPLE_COUNT); - vec2 xi = texture2DLod(Texture0, vec2(float(sampleNum) / float(SAMPLE_COUNT), 0.5), 0.0).xy; vec3 H = ImportanceSampleGGX( xi, roughness, N ); vec3 V = N; vec3 L = normalize(2.0 * dot( V, H ) * H - V); - float ndotl = max(0.0, dot(N, L)); - vec3 s = textureCubeLod(u_skyCube, H, 0.0).rgb * ndotl; + float ndotl = clamp(dot(N, L),0.0,1.0); - sum += vec4(s, 1.0); + if (ndotl>0.0) { + sum.rgb += textureLod(source_cube, H, 0.0).rgb *ndotl; + sum.a += ndotl; + } } - sum /= sum.w; + sum /= sum.a; frag_color = vec4(sum.rgb, 1.0); } diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index 4183e828f55..3f942526068 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -1,7 +1,7 @@ [vertex] - +#define ENABLE_UV_INTERP /* from VisualServer: @@ -52,14 +52,47 @@ layout(std140) uniform SceneData { //ubo:0 highp mat4 camera_matrix; highp vec4 time; - highp vec4 ambient_light; + highp vec4 ambient_light_color; + highp vec4 bg_color; + float ambient_energy; + float bg_energy; }; uniform highp mat4 world_transform; +#ifdef USE_FORWARD_LIGHTING + +layout(std140) uniform LightData { //ubo:3 + + highp vec4 light_pos_inv_radius; + mediump vec4 light_direction_attenuation; + mediump vec4 light_color_energy; + mediump vec4 light_params; //cone attenuation, specular, shadow darkening, + mediump vec4 shadow_split_offsets; + highp mat4 shadow_matrix1; + highp mat4 shadow_matrix2; + highp mat4 shadow_matrix3; + highp mat4 shadow_matrix4; +}; + +#ifdef USE_FORWARD_1_SHADOW_MAP +out mediump vec4 forward_shadow_pos1; +#endif + +#ifdef USE_FORWARD_2_SHADOW_MAP +out mediump vec4 forward_shadow_pos2; +#endif + +#ifdef USE_FORWARD_4_SHADOW_MAP +out mediump vec4 forward_shadow_pos3; +out mediump vec4 forward_shadow_pos4; +#endif + +#endif + /* Varyings */ -out vec3 vertex_interp; +out highp vec3 vertex_interp; out vec3 normal_interp; #if defined(ENABLE_COLOR_INTERP) @@ -74,13 +107,6 @@ out vec2 uv_interp; out vec2 uv2_interp; #endif -#if defined(ENABLE_VAR1_INTERP) -out vec4 var1_interp; -#endif - -#if defined(ENABLE_VAR2_INTERP) -out vec4 var2_interp; -#endif #if defined(ENABLE_TANGENT_INTERP) out vec3 tangent_interp; @@ -118,13 +144,13 @@ MATERIAL_UNIFORMS void main() { - highp vec4 vertex_in = vertex_attrib; // vec4(vertex_attrib.xyz * data_attrib.x,1.0); + highp vec4 vertex = vertex_attrib; // vec4(vertex_attrib.xyz * data_attrib.x,1.0); highp mat4 modelview = camera_inverse_matrix * world_transform; - vec3 normal_in = normal_attrib; - normal_in*=normal_mult; + vec3 normal = normal_attrib * normal_mult; + #if defined(ENABLE_TANGENT_INTERP) - vec3 tangent_in = tangent_attrib.xyz; - tangent_in*=normal_mult; + vec3 tangent = tangent_attrib.xyz; + tangent*=normal_mult; float binormalf = tangent_attrib.a; #endif @@ -137,23 +163,31 @@ void main() { m+=mat4(texture2D(skeleton_matrices,vec2((bone_indices.z*3.0+0.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.z*3.0+1.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.z*3.0+2.0)*skeltex_pixel_size,0.0)),vec4(0.0,0.0,0.0,1.0))*bone_weights.z; m+=mat4(texture2D(skeleton_matrices,vec2((bone_indices.w*3.0+0.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.w*3.0+1.0)*skeltex_pixel_size,0.0)),texture2D(skeleton_matrices,vec2((bone_indices.w*3.0+2.0)*skeltex_pixel_size,0.0)),vec4(0.0,0.0,0.0,1.0))*bone_weights.w; - vertex_in = vertex_in * m; - normal_in = (vec4(normal_in,0.0) * m).xyz; + vertex = vertex_in * m; + normal = (vec4(normal,0.0) * m).xyz; #if defined(ENABLE_TANGENT_INTERP) - tangent_in = (vec4(tangent_in,0.0) * m).xyz; + tangent = (vec4(tangent,0.0) * m).xyz; #endif } #endif - vertex_interp = (modelview * vertex_in).xyz; - normal_interp = normalize((modelview * vec4(normal_in,0.0)).xyz); +#if !defined(SKIP_TRANSFORM_USED) + + vertex = modelview * vertex; + normal = normalize((modelview * vec4(normal,0.0)).xyz); +#endif #if defined(ENABLE_TANGENT_INTERP) - tangent_interp=normalize((modelview * vec4(tangent_in,0.0)).xyz); - binormal_interp = normalize( cross(normal_interp,tangent_interp) * binormalf ); +# if !defined(SKIP_TRANSFORM_USED) + + tangent=normalize((modelview * vec4(tangent,0.0)).xyz); +# endif + vec3 binormal = normalize( cross(normal,tangent) * binormalf ); #endif + + #if defined(ENABLE_COLOR_INTERP) color_interp = color_attrib; #endif @@ -161,13 +195,17 @@ void main() { #if defined(ENABLE_UV_INTERP) uv_interp = uv_attrib; #endif + #if defined(ENABLE_UV2_INTERP) uv2_interp = uv2_attrib; #endif +{ VERTEX_SHADER_CODE +} + #ifdef USE_SHADOW_PASS @@ -177,26 +215,32 @@ VERTEX_SHADER_CODE #endif -#ifdef USE_FOG + vertex_interp = vertex.xyz; + normal_interp = normal; - fog_interp.a = pow( clamp( (length(vertex_interp)-fog_params.x)/(fog_params.y-fog_params.x), 0.0, 1.0 ), fog_params.z ); - fog_interp.rgb = mix( fog_color_begin, fog_color_end, fog_interp.a ); +#if defined(ENABLE_TANGENT_INTERP) + tangent_interp = tangent; + binormal_interp = binormal; #endif -#ifndef VERTEX_SHADER_WRITE_POSITION -//vertex shader might write a position +#if !defined(SKIP_TRANSFORM_USED) gl_Position = projection_matrix * vec4(vertex_interp,1.0); +#else + gl_Position = vertex; #endif - - } [fragment] + +#define M_PI 3.14159265359 + + +#define ENABLE_UV_INTERP //hack to use uv if no uv present so it works with lightmap @@ -219,18 +263,28 @@ in vec3 tangent_interp; in vec3 binormal_interp; #endif -#if defined(ENABLE_VAR1_INTERP) -in vec4 var1_interp; -#endif - -#if defined(ENABLE_VAR2_INTERP) -in vec4 var2_interp; -#endif - -in vec3 vertex_interp; +in highp vec3 vertex_interp; in vec3 normal_interp; +/* PBR CHANNELS */ + +#ifdef USE_RADIANCE_CUBEMAP + +uniform sampler2D brdf_texture; //texunit:-1 +uniform samplerCube radiance_cube; //texunit:-2 + +layout(std140) uniform Radiance { //ubo:2 + + mat4 radiance_inverse_xform; + vec3 radiance_box_min; + vec3 radiance_box_max; + float radiance_ambient_contribution; + +}; + +#endif + /* Material Uniforms */ @@ -255,18 +309,97 @@ layout(std140) uniform SceneData { highp mat4 camera_matrix; highp vec4 time; - highp vec4 ambient_light; + highp vec4 ambient_light_color; + highp vec4 bg_color; + float ambient_energy; + float bg_energy; }; + +#ifdef USE_FORWARD_LIGHTING + +layout(std140) uniform LightData { + + highp vec4 light_pos_inv_radius; + mediump vec4 light_direction_attenuation; + mediump vec4 light_color_energy; + mediump vec4 light_params; //cone attenuation, specular, shadow darkening, + mediump vec4 shadow_split_offsets; + highp mat4 shadow_matrix1; + highp mat4 shadow_matrix2; + highp mat4 shadow_matrix3; + highp mat4 shadow_matrix4; +}; + +#ifdef USE_FORWARD_1_SHADOW_MAP +in mediump vec4 forward_shadow_pos1; +#endif + +#ifdef USE_FORWARD_2_SHADOW_MAP +in mediump vec4 forward_shadow_pos2; +#endif + +#ifdef USE_FORWARD_4_SHADOW_MAP +in mediump vec4 forward_shadow_pos3; +in mediump vec4 forward_shadow_pos4; +#endif + +#endif + layout(location=0) out vec4 frag_color; + +// GGX Specular +// Source: http://www.filmicworlds.com/images/ggx-opt/optimized-ggx.hlsl +float G1V(float dotNV, float k) +{ + return 1.0 / (dotNV * (1.0 - k) + k); +} + +float specularGGX(vec3 N, vec3 V, vec3 L, float roughness, float F0) +{ + float alpha = roughness * roughness; + + vec3 H = normalize(V + L); + + float dotNL = max(dot(N,L), 0.0 ); + float dotNV = max(dot(N,V), 0.0 ); + float dotNH = max(dot(N,H), 0.0 ); + float dotLH = max(dot(L,H), 0.0 ); + + // D + float alphaSqr = alpha * alpha; + float pi = M_PI; + float denom = dotNH * dotNH * (alphaSqr - 1.0) + 1.0; + float D = alphaSqr / (pi * denom * denom); + + // F + float dotLH5 = pow(1.0 - dotLH, 5.0); + float F = F0 + (1.0 - F0) * (dotLH5); + + // V + float k = alpha / 2.0f; + float vis = G1V(dotNL, k) * G1V(dotNV, k); + + return dotNL * D * F * vis; +} + +void light_compute(vec3 normal, vec3 light_vec,vec3 eye_vec,vec3 diffuse_color, vec3 specular_color, float roughness, float attenuation, inout vec3 diffuse, inout vec3 specular) { + + diffuse += max(0.0,dot(normal,light_vec)) * diffuse_color * attenuation; + //specular += specular_ggx( roughness, max(0.0,dot(normal,eye_vec)) ) * specular_color * attenuation; + float s = roughness > 0.0 ? specularGGX(normal,eye_vec,light_vec,roughness,1.0) : 0.0; + specular += s * specular_color * attenuation; +} + + void main() { //lay out everything, whathever is unused is optimized away anyway vec3 vertex = vertex_interp; - vec3 albedo = vec3(0.9,0.9,0.9); - vec3 metal = vec3(0.0,0.0,0.0); - float rough = 0.0; + vec3 albedo = vec3(0.8,0.8,0.8); + vec3 specular = vec3(0.2,0.2,0.2); + float roughness = 1.0; float alpha = 1.0; #ifdef METERIAL_DOUBLESIDED @@ -334,6 +467,66 @@ FRAGMENT_SHADER_CODE } #endif +/////////////////////// LIGHTING ////////////////////////////// + + vec3 specular_light = vec3(0.0,0.0,0.0); + vec3 ambient_light = ambient_light_color.rgb; + vec3 diffuse_light = vec3(0.0,0.0,0.0); + + vec3 eye_vec = -normalize( vertex_interp ); + +#ifdef USE_RADIANCE_CUBEMAP + + { + + float ndotv = clamp(dot(normal,eye_vec),0.0,1.0); + vec2 brdf = texture(brdf_texture, vec2(roughness, ndotv)).xy; + + float lod = roughness * 5.0; + vec3 r = reflect(-eye_vec,normal); //2.0 * ndotv * normal - view; // reflect(v, n); + r=normalize((radiance_inverse_xform * vec4(r,0.0)).xyz); + vec3 radiance = textureLod(radiance_cube, r, lod).xyz * ( brdf.x + brdf.y); + + specular_light=mix(albedo,radiance,specular); + + } + + { + + vec3 ambient_dir=normalize((radiance_inverse_xform * vec4(normal,0.0)).xyz); + vec3 env_ambient=textureLod(radiance_cube, ambient_dir, 5.0).xyz; + + ambient_light=mix(ambient_light,env_ambient,radiance_ambient_contribution); + } + + +#else + + ambient_light=albedo; +#endif + + +#ifdef USE_FORWARD_LIGHTING + +#ifdef USE_FORWARD_DIRECTIONAL + + light_compute(normal,light_direction_attenuation.xyz,eye_vec,albedo,specular,roughness,1.0,diffuse_light,specular_light); +#endif + +#ifdef USE_FORWARD_OMNI + + vec3 light_rel_vec = light_pos_inv_radius.xyz-vertex; + float normalized_distance = length( light_rel_vec )*light_pos_inv_radius.w; + float light_attenuation = pow( max(1.0 - normalized_distance, 0.0), light_direction_attenuation.w ); + light_compute(normal,normalize(light_rel_vec),eye_vec,albedo,specular,roughness,light_attenuation,diffuse_light,specular_light); + +#endif + +#ifdef USE_FORWARD_SPOT + +#endif + +#endif #if defined(USE_LIGHT_SHADER_CODE) @@ -345,7 +538,14 @@ LIGHT_SHADER_CODE } #endif +#ifdef SHADELESS + frag_color=vec4(albedo,alpha); +#else + frag_color=vec4(ambient_light+diffuse_light+specular_light,alpha); + +#endif + } diff --git a/modules/gridmap/grid_map.cpp b/modules/gridmap/grid_map.cpp index 6e73244b57e..57027bd9bfd 100644 --- a/modules/gridmap/grid_map.cpp +++ b/modules/gridmap/grid_map.cpp @@ -960,9 +960,9 @@ void GridMap::_octant_bake(const OctantKey &p_key, const Ref& p_tm st->add_to_format(VS::ARRAY_FORMAT_COLOR); if (m.is_valid()) { - Ref fm = m; + Ref fm = m; if (fm.is_valid()) - fm->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY,true); + fm->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY,true); } } } diff --git a/platform/iphone/rasterizer_iphone.cpp b/platform/iphone/rasterizer_iphone.cpp index 5478569a426..874056f76d8 100644 --- a/platform/iphone/rasterizer_iphone.cpp +++ b/platform/iphone/rasterizer_iphone.cpp @@ -486,7 +486,7 @@ RID RasterizerIPhone::material_create() { return material_owner.make_rid( memnew( Material ) ); } -void RasterizerIPhone::fixed_material_set_parameter(RID p_material, VS::FixedMaterialParam p_parameter, const Variant& p_value) { +void RasterizerIPhone::fixed_material_set_parameter(RID p_material, VS::FixedSpatialMaterialParam p_parameter, const Variant& p_value) { Material *m=material_owner.get( p_material ); ERR_FAIL_COND(!m); @@ -494,7 +494,7 @@ void RasterizerIPhone::fixed_material_set_parameter(RID p_material, VS::FixedMat m->parameters[p_parameter] = p_value; } -Variant RasterizerIPhone::fixed_material_get_parameter(RID p_material,VS::FixedMaterialParam p_parameter) const { +Variant RasterizerIPhone::fixed_material_get_parameter(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const { Material *m=material_owner.get( p_material ); ERR_FAIL_COND_V(!m, Variant()); @@ -503,7 +503,7 @@ Variant RasterizerIPhone::fixed_material_get_parameter(RID p_material,VS::FixedM return m->parameters[p_parameter]; } -void RasterizerIPhone::fixed_material_set_texture(RID p_material,VS::FixedMaterialParam p_parameter, RID p_texture) { +void RasterizerIPhone::fixed_material_set_texture(RID p_material,VS::FixedSpatialMaterialParam p_parameter, RID p_texture) { Material *m=material_owner.get( p_material ); ERR_FAIL_COND(!m); @@ -511,7 +511,7 @@ void RasterizerIPhone::fixed_material_set_texture(RID p_material,VS::FixedMateri m->textures[p_parameter] = p_texture; } -RID RasterizerIPhone::fixed_material_get_texture(RID p_material,VS::FixedMaterialParam p_parameter) const { +RID RasterizerIPhone::fixed_material_get_texture(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const { Material *m=material_owner.get( p_material ); ERR_FAIL_COND_V(!m, RID()); @@ -535,7 +535,7 @@ VS::MaterialBlendMode RasterizerIPhone::fixed_material_get_detail_blend_mode(RID return m->detail_blend_mode; } -void RasterizerIPhone::fixed_material_set_texcoord_mode(RID p_material,VS::FixedMaterialParam p_parameter, VS::FixedMaterialTexCoordMode p_mode) { +void RasterizerIPhone::fixed_material_set_texcoord_mode(RID p_material,VS::FixedSpatialMaterialParam p_parameter, VS::FixedSpatialMaterialTexCoordMode p_mode) { Material *m=material_owner.get( p_material ); ERR_FAIL_COND(!m); @@ -543,7 +543,7 @@ void RasterizerIPhone::fixed_material_set_texcoord_mode(RID p_material,VS::Fixed m->texcoord_mode[p_parameter] = p_mode; } -VS::FixedMaterialTexCoordMode RasterizerIPhone::fixed_material_get_texcoord_mode(RID p_material,VS::FixedMaterialParam p_parameter) const { +VS::FixedSpatialMaterialTexCoordMode RasterizerIPhone::fixed_material_get_texcoord_mode(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const { Material *m=material_owner.get( p_material ); ERR_FAIL_COND_V(!m, VS::FIXED_MATERIAL_TEXCOORD_TEXGEN); @@ -552,7 +552,7 @@ VS::FixedMaterialTexCoordMode RasterizerIPhone::fixed_material_get_texcoord_mode return m->texcoord_mode[p_parameter]; // for now } -void RasterizerIPhone::fixed_material_set_texgen_mode(RID p_material,VS::FixedMaterialTexGenMode p_mode) { +void RasterizerIPhone::fixed_material_set_texgen_mode(RID p_material,VS::FixedSpatialMaterialTexGenMode p_mode) { Material *m=material_owner.get( p_material ); ERR_FAIL_COND(!m); @@ -560,7 +560,7 @@ void RasterizerIPhone::fixed_material_set_texgen_mode(RID p_material,VS::FixedMa m->texgen_mode = p_mode; }; -VS::FixedMaterialTexGenMode RasterizerIPhone::fixed_material_get_texgen_mode(RID p_material) const { +VS::FixedSpatialMaterialTexGenMode RasterizerIPhone::fixed_material_get_texgen_mode(RID p_material) const { Material *m=material_owner.get( p_material ); ERR_FAIL_COND_V(!m, VS::FIXED_MATERIAL_TEXGEN_SPHERE); diff --git a/platform/iphone/rasterizer_iphone.h b/platform/iphone/rasterizer_iphone.h index fcbb339ab30..bb9bddfe2d2 100644 --- a/platform/iphone/rasterizer_iphone.h +++ b/platform/iphone/rasterizer_iphone.h @@ -100,11 +100,11 @@ class RasterizerIPhone : public Rasterizer { RID textures[VisualServer::FIXED_MATERIAL_PARAM_MAX]; Transform uv_transform; - VS::FixedMaterialTexCoordMode texcoord_mode[VisualServer::FIXED_MATERIAL_PARAM_MAX]; + VS::FixedSpatialMaterialTexCoordMode texcoord_mode[VisualServer::FIXED_MATERIAL_PARAM_MAX]; VS::MaterialBlendMode detail_blend_mode; - VS::FixedMaterialTexGenMode texgen_mode; + VS::FixedSpatialMaterialTexGenMode texgen_mode; Material() { @@ -624,20 +624,20 @@ public: virtual RID material_create(); - virtual void fixed_material_set_parameter(RID p_material, VS::FixedMaterialParam p_parameter, const Variant& p_value); - virtual Variant fixed_material_get_parameter(RID p_material,VS::FixedMaterialParam p_parameter) const; + virtual void fixed_material_set_parameter(RID p_material, VS::FixedSpatialMaterialParam p_parameter, const Variant& p_value); + virtual Variant fixed_material_get_parameter(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const; - virtual void fixed_material_set_texture(RID p_material,VS::FixedMaterialParam p_parameter, RID p_texture); - virtual RID fixed_material_get_texture(RID p_material,VS::FixedMaterialParam p_parameter) const; + virtual void fixed_material_set_texture(RID p_material,VS::FixedSpatialMaterialParam p_parameter, RID p_texture); + virtual RID fixed_material_get_texture(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const; virtual void fixed_material_set_detail_blend_mode(RID p_material,VS::MaterialBlendMode p_mode); virtual VS::MaterialBlendMode fixed_material_get_detail_blend_mode(RID p_material) const; - virtual void fixed_material_set_texgen_mode(RID p_material,VS::FixedMaterialTexGenMode p_mode); - virtual VS::FixedMaterialTexGenMode fixed_material_get_texgen_mode(RID p_material) const; + virtual void fixed_material_set_texgen_mode(RID p_material,VS::FixedSpatialMaterialTexGenMode p_mode); + virtual VS::FixedSpatialMaterialTexGenMode fixed_material_get_texgen_mode(RID p_material) const; - virtual void fixed_material_set_texcoord_mode(RID p_material,VS::FixedMaterialParam p_parameter, VS::FixedMaterialTexCoordMode p_mode); - virtual VS::FixedMaterialTexCoordMode fixed_material_get_texcoord_mode(RID p_material,VS::FixedMaterialParam p_parameter) const; + virtual void fixed_material_set_texcoord_mode(RID p_material,VS::FixedSpatialMaterialParam p_parameter, VS::FixedSpatialMaterialTexCoordMode p_mode); + virtual VS::FixedSpatialMaterialTexCoordMode fixed_material_get_texcoord_mode(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const; virtual void fixed_material_set_uv_transform(RID p_material,const Transform& p_transform); virtual Transform fixed_material_get_uv_transform(RID p_material) const; diff --git a/scene/3d/light.cpp b/scene/3d/light.cpp index d98f1000204..88ba7b3731e 100644 --- a/scene/3d/light.cpp +++ b/scene/3d/light.cpp @@ -39,24 +39,88 @@ bool Light::_can_gizmo_scale() const { } +void Light::set_param(Param p_param, float p_value) { + + ERR_FAIL_INDEX(p_param,PARAM_MAX); + param[p_param]=p_value; + + VS::get_singleton()->light_set_param(light,VS::LightParam(p_param),p_value); + + if (p_param==PARAM_SPOT_ANGLE || p_param==PARAM_RANGE) { + update_gizmo();; + } + + +} + +float Light::get_param(Param p_param) const{ + + ERR_FAIL_INDEX_V(p_param,PARAM_MAX,0); + return param[p_param]; + +} + +void Light::set_shadow(bool p_enable){ + + shadow=p_enable; + VS::get_singleton()->light_set_shadow(light,p_enable); + +} +bool Light::has_shadow() const{ + + return shadow; +} + +void Light::set_negative(bool p_enable){ + + negative=p_enable; + VS::get_singleton()->light_set_negative(light,p_enable); +} +bool Light::is_negative() const{ + + return negative; +} + +void Light::set_cull_mask(uint32_t p_cull_mask){ + + cull_mask=p_cull_mask; + VS::get_singleton()->light_set_cull_mask(light,p_cull_mask); + +} +uint32_t Light::get_cull_mask() const{ + + return cull_mask; +} + +void Light::set_color(const Color& p_color){ + + color=p_color; + VS::get_singleton()->light_set_color(light,p_color); +} +Color Light::get_color() const{ + + return color; +} + + AABB Light::get_aabb() const { -#if 0 + if (type==VisualServer::LIGHT_DIRECTIONAL) { return AABB( Vector3(-1,-1,-1), Vector3(2, 2, 2 ) ); } else if (type==VisualServer::LIGHT_OMNI) { - return AABB( Vector3(-1,-1,-1) * vars[PARAM_RADIUS], Vector3(2, 2, 2 ) * vars[PARAM_RADIUS]); + return AABB( Vector3(-1,-1,-1) * param[PARAM_RANGE], Vector3(2, 2, 2 ) * param[PARAM_RANGE]); } else if (type==VisualServer::LIGHT_SPOT) { - float len=vars[PARAM_RADIUS]; - float size=Math::tan(Math::deg2rad(vars[PARAM_SPOT_ANGLE]))*len; + float len=param[PARAM_RANGE]; + float size=Math::tan(Math::deg2rad(param[PARAM_SPOT_ANGLE]))*len; return AABB( Vector3( -size,-size,-len ), Vector3( size*2, size*2, len ) ); } -#endif + return AABB(); } @@ -118,10 +182,51 @@ void Light::_bind_methods() { ObjectTypeDB::bind_method(_MD("is_editor_only"), &Light::is_editor_only ); - ADD_PROPERTY( PropertyInfo( Variant::BOOL, "params/editor_only"), _SCS("set_editor_only"), _SCS("is_editor_only")); + ObjectTypeDB::bind_method(_MD("set_param","param","value"), &Light::set_param ); + ObjectTypeDB::bind_method(_MD("get_param","param"), &Light::get_param ); + ObjectTypeDB::bind_method(_MD("set_shadow","enabled"), &Light::set_shadow ); + ObjectTypeDB::bind_method(_MD("has_shadow"), &Light::has_shadow ); + ObjectTypeDB::bind_method(_MD("set_negative","enabled"), &Light::set_negative ); + ObjectTypeDB::bind_method(_MD("is_negative"), &Light::is_negative ); + ObjectTypeDB::bind_method(_MD("set_cull_mask","cull_mask"), &Light::set_cull_mask ); + ObjectTypeDB::bind_method(_MD("get_cull_mask"), &Light::get_cull_mask ); + + ObjectTypeDB::bind_method(_MD("set_color","color"), &Light::set_color ); + ObjectTypeDB::bind_method(_MD("get_color"), &Light::get_color ); + + ADD_PROPERTY( PropertyInfo( Variant::COLOR, "light/color"), _SCS("set_color"), _SCS("get_color")); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "light/energy"), _SCS("set_param"), _SCS("get_param"), PARAM_ENERGY); + ADD_PROPERTY( PropertyInfo( Variant::BOOL, "light/negative"), _SCS("set_negative"), _SCS("is_negative")); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "light/specular"), _SCS("set_param"), _SCS("get_param"), PARAM_SPECULAR); + ADD_PROPERTY( PropertyInfo( Variant::INT, "light/cull_mask"), _SCS("set_cull_mask"), _SCS("get_cull_mask")); + ADD_PROPERTY( PropertyInfo( Variant::INT, "shadow/enabled"), _SCS("set_shadow"), _SCS("has_shadow")); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "shadow/darkness"), _SCS("set_param"), _SCS("get_param"), PARAM_SHADOW_DARKNESS); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "shadow/normal_bias"), _SCS("set_param"), _SCS("get_param"), PARAM_SHADOW_NORMAL_BIAS); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "shadow/bias"), _SCS("set_param"), _SCS("get_param"), PARAM_SHADOW_BIAS); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "shadow/bias_split_scale"), _SCS("set_param"), _SCS("get_param"), PARAM_SHADOW_BIAS_SPLIT_SCALE); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "shadow/max_distance"), _SCS("set_param"), _SCS("get_param"), PARAM_SHADOW_MAX_DISTANCE); + + ADD_PROPERTY( PropertyInfo( Variant::BOOL, "editor/editor_only"), _SCS("set_editor_only"), _SCS("is_editor_only")); + + BIND_CONSTANT( PARAM_ENERGY ); + BIND_CONSTANT( PARAM_SPECULAR ); + BIND_CONSTANT( PARAM_RANGE ); + BIND_CONSTANT( PARAM_ATTENUATION ); + BIND_CONSTANT( PARAM_SPOT_ANGLE ); + BIND_CONSTANT( PARAM_SPOT_ATTENUATION ); + BIND_CONSTANT( PARAM_SHADOW_MAX_DISTANCE ); + BIND_CONSTANT( PARAM_SHADOW_DARKNESS ); + BIND_CONSTANT( PARAM_SHADOW_SPLIT_1_OFFSET ); + BIND_CONSTANT( PARAM_SHADOW_SPLIT_2_OFFSET ); + BIND_CONSTANT( PARAM_SHADOW_SPLIT_3_OFFSET ); + BIND_CONSTANT( PARAM_SHADOW_SPLIT_4_OFFSET ); + BIND_CONSTANT( PARAM_SHADOW_NORMAL_BIAS ); + BIND_CONSTANT( PARAM_SHADOW_BIAS ); + BIND_CONSTANT( PARAM_SHADOW_BIAS_SPLIT_SCALE ); + BIND_CONSTANT( PARAM_MAX ); } @@ -131,9 +236,29 @@ Light::Light(VisualServer::LightType p_type) { type=p_type; light=VisualServer::get_singleton()->light_create(p_type); - + VS::get_singleton()->instance_set_base(get_instance(),light); editor_only=false; + set_color(Color(1,1,1,1)); + set_shadow(false); + set_negative(false); + set_cull_mask(0xFFFFFFFF); + + set_param(PARAM_ENERGY,1); + set_param(PARAM_SPECULAR,1); + set_param(PARAM_RANGE,5); + set_param(PARAM_ATTENUATION,1); + set_param(PARAM_SPOT_ANGLE,45); + set_param(PARAM_SPOT_ATTENUATION,1); + set_param(PARAM_SHADOW_MAX_DISTANCE,0); + set_param(PARAM_SHADOW_DARKNESS,0); + set_param(PARAM_SHADOW_SPLIT_1_OFFSET,0.1); + set_param(PARAM_SHADOW_SPLIT_2_OFFSET,0.2); + set_param(PARAM_SHADOW_SPLIT_3_OFFSET,0.5); + set_param(PARAM_SHADOW_SPLIT_4_OFFSET,1.0); + set_param(PARAM_SHADOW_NORMAL_BIAS,0.1); + set_param(PARAM_SHADOW_BIAS,0.1); + set_param(PARAM_SHADOW_BIAS_SPLIT_SCALE,0.1); } @@ -147,6 +272,8 @@ Light::Light() { Light::~Light() { + VS::get_singleton()->instance_set_base(get_instance(),RID()); + if (light.is_valid()) VisualServer::get_singleton()->free(light); } @@ -156,6 +283,10 @@ Light::~Light() { void DirectionalLight::_bind_methods() { + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "pssm/split_1"), _SCS("set_param"), _SCS("get_param"), PARAM_SHADOW_SPLIT_1_OFFSET); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "pssm/split_2"), _SCS("set_param"), _SCS("get_param"), PARAM_SHADOW_SPLIT_2_OFFSET); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "pssm/split_3"), _SCS("set_param"), _SCS("get_param"), PARAM_SHADOW_SPLIT_3_OFFSET); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "pssm/split_4"), _SCS("set_param"), _SCS("get_param"), PARAM_SHADOW_SPLIT_4_OFFSET); } @@ -169,11 +300,16 @@ DirectionalLight::DirectionalLight() : Light( VisualServer::LIGHT_DIRECTIONAL ) void OmniLight::_bind_methods() { + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "light/range"), _SCS("set_param"), _SCS("get_param"), PARAM_RANGE); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "light/attenuation"), _SCS("set_param"), _SCS("get_param"), PARAM_ATTENUATION); } void SpotLight::_bind_methods() { + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "light/spot_angle"), _SCS("set_param"), _SCS("get_param"), PARAM_SPOT_ANGLE); + ADD_PROPERTYI( PropertyInfo( Variant::REAL, "light/spot_attenuation"), _SCS("set_param"), _SCS("get_param"), PARAM_SPOT_ATTENUATION); + } diff --git a/scene/3d/light.h b/scene/3d/light.h index 3c31d90d4c0..7da2d8e7caf 100644 --- a/scene/3d/light.h +++ b/scene/3d/light.h @@ -44,10 +44,32 @@ class Light : public VisualInstance { public: - + enum Param { + PARAM_ENERGY, + PARAM_SPECULAR, + PARAM_RANGE, + PARAM_ATTENUATION, + PARAM_SPOT_ANGLE, + PARAM_SPOT_ATTENUATION, + PARAM_SHADOW_MAX_DISTANCE, + PARAM_SHADOW_DARKNESS, + PARAM_SHADOW_SPLIT_1_OFFSET, + PARAM_SHADOW_SPLIT_2_OFFSET, + PARAM_SHADOW_SPLIT_3_OFFSET, + PARAM_SHADOW_SPLIT_4_OFFSET, + PARAM_SHADOW_NORMAL_BIAS, + PARAM_SHADOW_BIAS, + PARAM_SHADOW_BIAS_SPLIT_SCALE, + PARAM_MAX + }; private: + Color color; + float param[PARAM_MAX]; + bool shadow; + bool negative; + uint32_t cull_mask; VS::LightType type; bool editor_only; void _update_visibility(); @@ -71,6 +93,22 @@ public: void set_editor_only(bool p_editor_only); bool is_editor_only() const; + void set_param(Param p_param, float p_value); + float get_param(Param p_param) const; + + void set_shadow(bool p_enable); + bool has_shadow() const; + + void set_negative(bool p_enable); + bool is_negative() const; + + void set_cull_mask(uint32_t p_cull_mask); + uint32_t get_cull_mask() const; + + void set_color(const Color& p_color); + Color get_color() const; + + virtual AABB get_aabb() const; virtual DVector get_faces(uint32_t p_usage_flags) const; @@ -79,6 +117,7 @@ public: }; +VARIANT_ENUM_CAST(Light::Param); class DirectionalLight : public Light { diff --git a/scene/3d/particles.cpp b/scene/3d/particles.cpp index 994e3298536..1e18ba2c791 100644 --- a/scene/3d/particles.cpp +++ b/scene/3d/particles.cpp @@ -319,10 +319,10 @@ RES Particles::_get_gizmo_geometry() const { Ref surface_tool( memnew( SurfaceTool )); - Ref mat( memnew( FixedMaterial )); + Ref mat( memnew( FixedSpatialMaterial )); - mat->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(0.0,0.6,0.7,0.2) ); - mat->set_parameter( FixedMaterial::PARAM_EMISSION,Color(0.5,0.7,0.8) ); + mat->set_parameter( FixedSpatialMaterial::PARAM_DIFFUSE,Color(0.0,0.6,0.7,0.2) ); + mat->set_parameter( FixedSpatialMaterial::PARAM_EMISSION,Color(0.5,0.7,0.8) ); mat->set_blend_mode( Material::BLEND_MODE_ADD ); mat->set_flag(Material::FLAG_DOUBLE_SIDED,true); // mat->set_hint(Material::HINT_NO_DEPTH_DRAW,true); @@ -382,9 +382,9 @@ RES Particles::_get_gizmo_geometry() const { Ref mesh = surface_tool->commit(); - Ref mat_aabb( memnew( FixedMaterial )); + Ref mat_aabb( memnew( FixedSpatialMaterial )); - mat_aabb->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(0.8,0.8,0.9,0.7) ); + mat_aabb->set_parameter( FixedSpatialMaterial::PARAM_DIFFUSE,Color(0.8,0.8,0.9,0.7) ); mat_aabb->set_line_width(3); mat_aabb->set_flag( Material::FLAG_UNSHADED, true ); diff --git a/scene/main/scene_main_loop.cpp b/scene/main/scene_main_loop.cpp index c1552a4167d..cfc67533782 100644 --- a/scene/main/scene_main_loop.cpp +++ b/scene/main/scene_main_loop.cpp @@ -490,6 +490,8 @@ void SceneTree::input_event( const InputEvent& p_event ) { } + _call_idle_callbacks(); + } void SceneTree::init() { @@ -528,6 +530,7 @@ bool SceneTree::iteration(float p_time) { root_lock--; _flush_delete_queue(); + _call_idle_callbacks(); return _quit; } @@ -590,6 +593,8 @@ bool SceneTree::idle(float p_time){ E=N; } + _call_idle_callbacks(); + return _quit; } @@ -745,12 +750,12 @@ Ref SceneTree::get_debug_navigation_material() { if (navigation_material.is_valid()) return navigation_material; - Ref line_material = Ref( memnew( FixedMaterial )); + Ref line_material = Ref( memnew( FixedSpatialMaterial )); /* line_material->set_flag(Material::FLAG_UNSHADED, true); line_material->set_line_width(3.0); - line_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - line_material->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, true); - line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_navigation_color());*/ + line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, true); + line_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,get_debug_navigation_color());*/ navigation_material=line_material; @@ -763,12 +768,12 @@ Ref SceneTree::get_debug_navigation_disabled_material(){ if (navigation_disabled_material.is_valid()) return navigation_disabled_material; - Ref line_material = Ref( memnew( FixedMaterial )); + Ref line_material = Ref( memnew( FixedSpatialMaterial )); /* line_material->set_flag(Material::FLAG_UNSHADED, true); line_material->set_line_width(3.0); - line_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - line_material->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, true); - line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_navigation_disabled_color());*/ + line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, true); + line_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,get_debug_navigation_disabled_color());*/ navigation_disabled_material=line_material; @@ -781,12 +786,12 @@ Ref SceneTree::get_debug_collision_material() { return collision_material; - Ref line_material = Ref( memnew( FixedMaterial )); + Ref line_material = Ref( memnew( FixedSpatialMaterial )); /*line_material->set_flag(Material::FLAG_UNSHADED, true); line_material->set_line_width(3.0); - line_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - line_material->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, true); - line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_collisions_color());*/ + line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, true); + line_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,get_debug_collisions_color());*/ collision_material=line_material; @@ -800,11 +805,11 @@ Ref SceneTree::get_debug_contact_mesh() { debug_contact_mesh = Ref( memnew( Mesh ) ); - Ref mat = memnew( FixedMaterial ); + Ref mat = memnew( FixedSpatialMaterial ); /*mat->set_flag(Material::FLAG_UNSHADED,true); mat->set_flag(Material::FLAG_DOUBLE_SIDED,true); - mat->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); - mat->set_parameter(FixedMaterial::PARAM_DIFFUSE,get_debug_collision_contact_color());*/ + mat->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA,true); + mat->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,get_debug_collision_contact_color());*/ Vector3 diamond[6]={ Vector3(-1, 0, 0), @@ -2259,6 +2264,23 @@ void SceneTree::_bind_methods() { SceneTree *SceneTree::singleton=NULL; + +SceneTree::IdleCallback SceneTree::idle_callbacks[SceneTree::MAX_IDLE_CALLBACKS]; +int SceneTree::idle_callback_count=0; + +void SceneTree::_call_idle_callbacks() { + + for(int i=0;i=MAX_IDLE_CALLBACKS); + idle_callbacks[idle_callback_count++]=p_callback; +} + + SceneTree::SceneTree() { singleton=this; diff --git a/scene/main/scene_main_loop.h b/scene/main/scene_main_loop.h index 1c0f88862c4..4860f4a9bfa 100644 --- a/scene/main/scene_main_loop.h +++ b/scene/main/scene_main_loop.h @@ -74,6 +74,8 @@ class SceneTree : public MainLoop { public: + typedef void (*IdleCallback)(); + enum StretchMode { STRETCH_MODE_DISABLED, @@ -303,6 +305,15 @@ friend class Viewport; static void _live_edit_reparent_node_funcs(void* self,const NodePath& p_at,const NodePath& p_new_place,const String& p_new_name,int p_at_pos) { reinterpret_cast(self)->_live_edit_reparent_node_func(p_at,p_new_place,p_new_name,p_at_pos); } #endif + + enum { + MAX_IDLE_CALLBACKS=256 + }; + + static IdleCallback idle_callbacks[MAX_IDLE_CALLBACKS]; + static int idle_callback_count; + void _call_idle_callbacks(); + protected: @@ -430,6 +441,7 @@ public: void set_refuse_new_network_connections(bool p_refuse); bool is_refusing_new_network_connections() const; + static void add_idle_callback(IdleCallback p_callback); SceneTree(); ~SceneTree(); diff --git a/scene/main/viewport.cpp b/scene/main/viewport.cpp index e93a16371d3..35f72c5ce7c 100644 --- a/scene/main/viewport.cpp +++ b/scene/main/viewport.cpp @@ -2513,7 +2513,7 @@ Vector2 Viewport::get_camera_coords(const Vector2 &p_viewport_coords) const { Vector2 Viewport::get_camera_rect_size() const { - return last_vp_rect.size; + return size; } diff --git a/scene/register_scene_types.cpp b/scene/register_scene_types.cpp index ad845555a46..e2f5b56796e 100644 --- a/scene/register_scene_types.cpp +++ b/scene/register_scene_types.cpp @@ -544,14 +544,16 @@ void register_scene_types() { #ifndef _3D_DISABLED ObjectTypeDB::register_type(); ObjectTypeDB::register_virtual_type(); - ObjectTypeDB::register_type(); + ObjectTypeDB::register_type(); + SceneTree::add_idle_callback(FixedSpatialMaterial::flush_changes); + FixedSpatialMaterial::init_shaders(); // ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); // ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::add_compatibility_type("Shader","MaterialShader"); - ObjectTypeDB::add_compatibility_type("ParticleSystemMaterial","FixedMaterial"); - ObjectTypeDB::add_compatibility_type("UnshadedMaterial","FixedMaterial"); + ObjectTypeDB::add_compatibility_type("ParticleSystemMaterial","FixedSpatialMaterial"); + ObjectTypeDB::add_compatibility_type("UnshadedMaterial","FixedSpatialMaterial"); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); @@ -655,6 +657,7 @@ void unregister_scene_types() { memdelete( resource_loader_wav ); memdelete( resource_loader_dynamic_font ); + #ifdef TOOLS_ENABLED @@ -669,5 +672,7 @@ void unregister_scene_types() { if (resource_loader_text) { memdelete(resource_loader_text); } + + FixedSpatialMaterial::finish_shaders(); SceneStringNames::free(); } diff --git a/scene/resources/environment.cpp b/scene/resources/environment.cpp index 85d16da9151..3897c86d5b2 100644 --- a/scene/resources/environment.cpp +++ b/scene/resources/environment.cpp @@ -53,7 +53,7 @@ void Environment::set_skybox(const Ref& p_skybox){ sb_rid=bg_skybox->get_rid(); print_line("skybox valid: "+itos(sb_rid.is_valid())); - VS::get_singleton()->environment_set_skybox(environment,sb_rid,Globals::get_singleton()->get("rendering/skybox/radiance_cube_resolution"),Globals::get_singleton()->get("rendering/skybox/iradiance_cube_resolution")); + VS::get_singleton()->environment_set_skybox(environment,sb_rid,Globals::get_singleton()->get("rendering/skybox/radiance_cube_resolution")); } void Environment::set_skybox_scale(float p_scale) { @@ -80,17 +80,17 @@ void Environment::set_canvas_max_layer(int p_max_layer){ void Environment::set_ambient_light_color(const Color& p_color){ ambient_color=p_color; - VS::get_singleton()->environment_set_ambient_light(environment,ambient_color,ambient_energy,ambient_skybox_energy); + VS::get_singleton()->environment_set_ambient_light(environment,ambient_color,ambient_energy,ambient_skybox_contribution); } void Environment::set_ambient_light_energy(float p_energy){ ambient_energy=p_energy; - VS::get_singleton()->environment_set_ambient_light(environment,ambient_color,ambient_energy,ambient_skybox_energy); + VS::get_singleton()->environment_set_ambient_light(environment,ambient_color,ambient_energy,ambient_skybox_contribution); } -void Environment::set_ambient_light_skybox_energy(float p_energy){ +void Environment::set_ambient_light_skybox_contribution(float p_energy){ - ambient_skybox_energy=p_energy; - VS::get_singleton()->environment_set_ambient_light(environment,ambient_color,ambient_energy,ambient_skybox_energy); + ambient_skybox_contribution=p_energy; + VS::get_singleton()->environment_set_ambient_light(environment,ambient_color,ambient_energy,ambient_skybox_contribution); } Environment::BGMode Environment::get_background() const{ @@ -127,16 +127,16 @@ float Environment::get_ambient_light_energy() const{ return ambient_energy; } -float Environment::get_ambient_light_skybox_energy() const{ +float Environment::get_ambient_light_skybox_contribution() const{ - return ambient_skybox_energy; + return ambient_skybox_contribution; } void Environment::_validate_property(PropertyInfo& property) const { - if (property.name=="background/skybox" || property.name=="ambient_light/skybox_energy") { + if (property.name=="background/skybox" || property.name=="background/skybox_scale" || property.name=="ambient_light/skybox_contribution") { if (bg_mode!=BG_SKYBOX) { property.usage=PROPERTY_USAGE_NOEDITOR; } @@ -166,7 +166,7 @@ void Environment::_bind_methods() { ObjectTypeDB::bind_method(_MD("set_canvas_max_layer","layer"),&Environment::set_canvas_max_layer); ObjectTypeDB::bind_method(_MD("set_ambient_light_color","color"),&Environment::set_ambient_light_color); ObjectTypeDB::bind_method(_MD("set_ambient_light_energy","energy"),&Environment::set_ambient_light_energy); - ObjectTypeDB::bind_method(_MD("set_ambient_light_skybox_energy","energy"),&Environment::set_ambient_light_skybox_energy); + ObjectTypeDB::bind_method(_MD("set_ambient_light_skybox_contribution","energy"),&Environment::set_ambient_light_skybox_contribution); ObjectTypeDB::bind_method(_MD("get_background"),&Environment::get_background); @@ -177,7 +177,7 @@ void Environment::_bind_methods() { ObjectTypeDB::bind_method(_MD("get_canvas_max_layer"),&Environment::get_canvas_max_layer); ObjectTypeDB::bind_method(_MD("get_ambient_light_color"),&Environment::get_ambient_light_color); ObjectTypeDB::bind_method(_MD("get_ambient_light_energy"),&Environment::get_ambient_light_energy); - ObjectTypeDB::bind_method(_MD("get_ambient_light_skybox_energy"),&Environment::get_ambient_light_skybox_energy); + ObjectTypeDB::bind_method(_MD("get_ambient_light_skybox_contribution"),&Environment::get_ambient_light_skybox_contribution); ADD_PROPERTY(PropertyInfo(Variant::INT,"background/mode",PROPERTY_HINT_ENUM,"Clear Color,Custom Color,Skybox,Canvas,Keep"),_SCS("set_background"),_SCS("get_background") ); @@ -188,7 +188,7 @@ void Environment::_bind_methods() { ADD_PROPERTY(PropertyInfo(Variant::INT,"background/canvas_max_layer",PROPERTY_HINT_RANGE,"-1000,1000,1"),_SCS("set_canvas_max_layer"),_SCS("get_canvas_max_layer") ); ADD_PROPERTY(PropertyInfo(Variant::COLOR,"ambient_light/color"),_SCS("set_ambient_light_color"),_SCS("get_ambient_light_color") ); ADD_PROPERTY(PropertyInfo(Variant::REAL,"ambient_light/energy",PROPERTY_HINT_RANGE,"0,16,0.01"),_SCS("set_ambient_light_energy"),_SCS("get_ambient_light_energy") ); - ADD_PROPERTY(PropertyInfo(Variant::REAL,"ambient_light/skybox_energy",PROPERTY_HINT_RANGE,"0,16,0.01"),_SCS("set_ambient_light_skybox_energy"),_SCS("get_ambient_light_skybox_energy") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"ambient_light/skybox_contribution",PROPERTY_HINT_RANGE,"0,1,0.01"),_SCS("set_ambient_light_skybox_contribution"),_SCS("get_ambient_light_skybox_contribution") ); GLOBAL_DEF("rendering/skybox/irradiance_cube_resolution",256); GLOBAL_DEF("rendering/skybox/radiance_cube_resolution",64); @@ -209,10 +209,11 @@ void Environment::_bind_methods() { Environment::Environment() { bg_mode=BG_CLEAR_COLOR; + bg_skybox_scale=1.0; bg_energy=1.0; bg_canvas_max_layer=0; ambient_energy=1.0; - ambient_skybox_energy=0; + ambient_skybox_contribution=0; environment = VS::get_singleton()->environment_create(); diff --git a/scene/resources/environment.h b/scene/resources/environment.h index 0435ffc6cb8..6478f6420f7 100644 --- a/scene/resources/environment.h +++ b/scene/resources/environment.h @@ -66,7 +66,7 @@ private: int bg_canvas_max_layer; Color ambient_color; float ambient_energy; - float ambient_skybox_energy; + float ambient_skybox_contribution; protected: @@ -84,7 +84,7 @@ public: void set_canvas_max_layer(int p_max_layer); void set_ambient_light_color(const Color& p_color); void set_ambient_light_energy(float p_energy); - void set_ambient_light_skybox_energy(float p_energy); + void set_ambient_light_skybox_contribution(float p_energy); BGMode get_background() const; Ref get_skybox() const; @@ -94,7 +94,7 @@ public: int get_canvas_max_layer() const; Color get_ambient_light_color() const; float get_ambient_light_energy() const; - float get_ambient_light_skybox_energy() const; + float get_ambient_light_skybox_contribution() const; virtual RID get_rid() const; diff --git a/scene/resources/material.cpp b/scene/resources/material.cpp index 3ebe5b7800b..e3df1461c79 100644 --- a/scene/resources/material.cpp +++ b/scene/resources/material.cpp @@ -45,14 +45,762 @@ Material::~Material() { } -FixedMaterial::FixedMaterial() { - - -} - -FixedMaterial::~FixedMaterial() { - - -} - ///////////////////////////////// + +Mutex *FixedSpatialMaterial::material_mutex=NULL; +SelfList::List FixedSpatialMaterial::dirty_materials; +Map FixedSpatialMaterial::shader_map; +FixedSpatialMaterial::ShaderNames* FixedSpatialMaterial::shader_names=NULL; + +void FixedSpatialMaterial::init_shaders() { + +#ifndef NO_THREADS + material_mutex = Mutex::create(); +#endif + + shader_names = memnew( ShaderNames ); + + shader_names->albedo="albedo"; + shader_names->specular="specular"; + shader_names->roughness="roughness"; + shader_names->emission="emission"; + shader_names->normal_scale="normal_scale"; + shader_names->sheen="sheen"; + shader_names->sheen_color="sheen_color"; + shader_names->clearcoat="clearcoat"; + shader_names->clearcoat_gloss="clearcoat_gloss"; + shader_names->anisotropy="anisotropy"; + shader_names->height_scale="height_scale"; + shader_names->subsurface_scattering="subsurface_scattering"; + shader_names->refraction="refraction"; + shader_names->refraction_roughness="refraction_roughness"; + + shader_names->texture_names[TEXTURE_ALBEDO]="texture_albedo"; + shader_names->texture_names[TEXTURE_SPECULAR]="texture_specular"; + shader_names->texture_names[TEXTURE_EMISSION]="texture_emission"; + shader_names->texture_names[TEXTURE_NORMAL]="texture_normal"; + shader_names->texture_names[TEXTURE_SHEEN]="texture_sheen"; + shader_names->texture_names[TEXTURE_CLEARCOAT]="texture_clearcoat"; + shader_names->texture_names[TEXTURE_ANISOTROPY]="texture_anisotropy"; + shader_names->texture_names[TEXTURE_AMBIENT_OCCLUSION]="texture_ambient_occlusion"; + shader_names->texture_names[TEXTURE_HEIGHT]="texture_height"; + shader_names->texture_names[TEXTURE_SUBSURFACE_SCATTERING]="texture_subsurface_scattering"; + shader_names->texture_names[TEXTURE_REFRACTION]="texture_refraction"; + shader_names->texture_names[TEXTURE_REFRACTION_ROUGHNESS]="texture_refraction_roughness"; + shader_names->texture_names[TEXTURE_DETAIL_MASK]="texture_detail_mask"; + shader_names->texture_names[TEXTURE_DETAIL_ALBEDO]="texture_detail_albedo"; + shader_names->texture_names[TEXTURE_DETAIL_NORMAL]="texture_detail_normal"; + +} + +void FixedSpatialMaterial::finish_shaders(){ + +#ifndef NO_THREADS + memdelete( material_mutex ); +#endif + + memdelete( shader_names ); + +} + + + +void FixedSpatialMaterial::_update_shader() { + + dirty_materials.remove( &element ); + + MaterialKey mk = _compute_key(); + if (mk.key==current_key.key) + return; //no update required in the end + + if (shader_map.has(current_key)) { + shader_map[current_key].users--; + if (shader_map[current_key].users==0) { + //deallocate shader, as it's no longer in use + VS::get_singleton()->free(shader_map[current_key].shader); + shader_map.erase(current_key); + } + } + + current_key=mk; + + if (shader_map.has(mk)) { + + VS::get_singleton()->material_set_shader(_get_material(),shader_map[mk].shader); + shader_map[mk].users++; + return; + } + + //must create a shader! + + String code="render_mode "; + switch(blend_mode) { + case BLEND_MODE_MIX: code+="blend_mix"; break; + case BLEND_MODE_ADD: code+="blend_add"; break; + case BLEND_MODE_SUB: code+="blend_sub"; break; + case BLEND_MODE_MUL: code+="blend_mul"; break; + } + + switch(depth_draw_mode) { + case DEPTH_DRAW_OPAQUE_ONLY: code+=",depth_draw_opaque"; break; + case DEPTH_DRAW_ALWAYS: code+=",depth_draw_always"; break; + case DEPTH_DRAW_DISABLED: code+=",depth_draw_never"; break; + case DEPTH_DRAW_ALPHA_OPAQUE_PREPASS: code+=",depth_draw_alpha_prepass"; break; + } + + switch(cull_mode) { + case CULL_BACK: code+=",cull_back"; break; + case CULL_FRONT: code+=",cull_front"; break; + case CULL_DISABLED: code+=",cull_disable"; break; + + } + + if (flags[FLAG_UNSHADED]) { + code+=",unshaded"; + } + if (flags[FLAG_ONTOP]) { + code+=",ontop"; + } + + code+=";\n"; + + + code+="uniform vec4 albedo : hint_color;\n"; + code+="uniform sampler2D albedo_texture : hint_albedo;\n"; + code+="uniform vec4 specular : hint_color;\n"; + code+="uniform float roughness : hint_range(0,1);\n"; + code+="uniform sampler2D specular_texture : hint_white;\n"; + code+="\n\n"; + + code+="\n\n"; + code+="void fragment() {\n"; + code+="\tvec4 albedo_tex = texture(albedo_texture,UV);\n"; + + if (flags[FLAG_ALBEDO_FROM_VERTEX_COLOR]) { + code+="\talbedo_tex *= COLOR;\n"; + } + + code+="\tALBEDO = albedo.rgb * albedo_tex.rgb;\n"; + if (features[FEATURE_TRANSPARENT]) { + code+="\tALPHA = albedo.a * albedo_tex.a;\n"; + } + code+="\tvec4 specular_tex = texture(specular_texture,UV);\n"; + code+="\tSPECULAR = specular.rgb * specular_tex.rgb;\n"; + code+="\tROUGHNESS = specular.a * roughness;\n"; + code+="}\n"; + + ShaderData shader_data; + shader_data.shader = VS::get_singleton()->shader_create(VS::SHADER_SPATIAL); + shader_data.users=1; + + VS::get_singleton()->shader_set_code( shader_data.shader, code ); + + shader_map[mk]=shader_data; + + VS::get_singleton()->material_set_shader(_get_material(),shader_data.shader); + +} + +void FixedSpatialMaterial::flush_changes() { + + if (material_mutex) + material_mutex->lock(); + + while (dirty_materials.first()) { + + dirty_materials.first()->self()->_update_shader(); + } + + if (material_mutex) + material_mutex->unlock(); +} + +void FixedSpatialMaterial::_queue_shader_change() { + + if (material_mutex) + material_mutex->lock(); + + if (!element.in_list()) { + dirty_materials.add(&element); + } + + if (material_mutex) + material_mutex->unlock(); + + +} + +bool FixedSpatialMaterial::_is_shader_dirty() const { + + bool dirty=false; + + if (material_mutex) + material_mutex->lock(); + + dirty=element.in_list(); + + if (material_mutex) + material_mutex->unlock(); + + return dirty; +} +void FixedSpatialMaterial::set_albedo(const Color& p_albedo) { + + albedo=p_albedo; + + VS::get_singleton()->material_set_param(_get_material(),shader_names->albedo,p_albedo); +} + +Color FixedSpatialMaterial::get_albedo() const{ + + return albedo; +} + +void FixedSpatialMaterial::set_specular(const Color& p_specular){ + + specular=p_specular; + VS::get_singleton()->material_set_param(_get_material(),shader_names->specular,p_specular); + +} +Color FixedSpatialMaterial::get_specular() const{ + + return specular; +} + +void FixedSpatialMaterial::set_roughness(float p_roughness){ + + roughness=p_roughness; + VS::get_singleton()->material_set_param(_get_material(),shader_names->roughness,p_roughness); + +} +float FixedSpatialMaterial::get_roughness() const{ + + return roughness; +} + +void FixedSpatialMaterial::set_emission(const Color& p_emission){ + + emission=p_emission; + VS::get_singleton()->material_set_param(_get_material(),shader_names->emission,p_emission); + +} +Color FixedSpatialMaterial::get_emission() const{ + + return emission; +} + +void FixedSpatialMaterial::set_normal_scale(float p_normal_scale){ + + normal_scale=p_normal_scale; + VS::get_singleton()->material_set_param(_get_material(),shader_names->normal_scale,p_normal_scale); + +} +float FixedSpatialMaterial::get_normal_scale() const{ + + return normal_scale; +} + +void FixedSpatialMaterial::set_sheen(float p_sheen){ + + sheen=p_sheen; + VS::get_singleton()->material_set_param(_get_material(),shader_names->sheen,p_sheen); + + +} +float FixedSpatialMaterial::get_sheen() const{ + + return sheen; +} + +void FixedSpatialMaterial::set_sheen_color(const Color& p_sheen_color){ + + sheen_color=p_sheen_color; + VS::get_singleton()->material_set_param(_get_material(),shader_names->sheen_color,p_sheen_color); + +} +Color FixedSpatialMaterial::get_sheen_color() const{ + + return sheen_color; +} + +void FixedSpatialMaterial::set_clearcoat(float p_clearcoat){ + + clearcoat=p_clearcoat; + VS::get_singleton()->material_set_param(_get_material(),shader_names->clearcoat,p_clearcoat); + +} + +float FixedSpatialMaterial::get_clearcoat() const{ + + return clearcoat; +} + +void FixedSpatialMaterial::set_clearcoat_gloss(float p_clearcoat_gloss){ + + clearcoat_gloss=p_clearcoat_gloss; + VS::get_singleton()->material_set_param(_get_material(),shader_names->clearcoat_gloss,p_clearcoat_gloss); + + +} + +float FixedSpatialMaterial::get_clearcoat_gloss() const{ + + return clearcoat_gloss; +} + +void FixedSpatialMaterial::set_anisotropy(float p_anisotropy){ + + anisotropy=p_anisotropy; + VS::get_singleton()->material_set_param(_get_material(),shader_names->anisotropy,p_anisotropy); + +} +float FixedSpatialMaterial::get_anisotropy() const{ + + return anisotropy; +} + +void FixedSpatialMaterial::set_height_scale(float p_height_scale){ + + height_scale=p_height_scale; + VS::get_singleton()->material_set_param(_get_material(),shader_names->height_scale,p_height_scale); + + +} + +float FixedSpatialMaterial::get_height_scale() const{ + + return height_scale; +} + +void FixedSpatialMaterial::set_subsurface_scattering(float p_subsurface_scattering){ + + subsurface_scattering=p_subsurface_scattering; + VS::get_singleton()->material_set_param(_get_material(),shader_names->subsurface_scattering,subsurface_scattering); + + +} + +float FixedSpatialMaterial::get_subsurface_scattering() const{ + + return subsurface_scattering; +} + +void FixedSpatialMaterial::set_refraction(float p_refraction){ + + refraction=p_refraction; + VS::get_singleton()->material_set_param(_get_material(),shader_names->refraction,refraction); + +} + +float FixedSpatialMaterial::get_refraction() const { + + return refraction; +} + +void FixedSpatialMaterial::set_refraction_roughness(float p_refraction_roughness) { + + refraction_roughness=p_refraction_roughness; + VS::get_singleton()->material_set_param(_get_material(),shader_names->refraction_roughness,refraction_roughness); + + +} +float FixedSpatialMaterial::get_refraction_roughness() const { + + return refraction_roughness; +} + +void FixedSpatialMaterial::set_detail_uv(DetailUV p_detail_uv) { + + if (detail_uv==p_detail_uv) + return; + + detail_uv=p_detail_uv; + _queue_shader_change(); +} +FixedSpatialMaterial::DetailUV FixedSpatialMaterial::get_detail_uv() const { + + return detail_uv; +} + +void FixedSpatialMaterial::set_blend_mode(BlendMode p_mode) { + + if (blend_mode==p_mode) + return; + + blend_mode=p_mode; + _queue_shader_change(); +} +FixedSpatialMaterial::BlendMode FixedSpatialMaterial::get_blend_mode() const { + + return blend_mode; +} + +void FixedSpatialMaterial::set_detail_blend_mode(BlendMode p_mode) { + + detail_blend_mode=p_mode; + _queue_shader_change(); +} +FixedSpatialMaterial::BlendMode FixedSpatialMaterial::get_detail_blend_mode() const { + + return detail_blend_mode; +} + +void FixedSpatialMaterial::set_depth_draw_mode(DepthDrawMode p_mode) { + + if (depth_draw_mode==p_mode) + return; + + depth_draw_mode=p_mode; + _queue_shader_change(); +} +FixedSpatialMaterial::DepthDrawMode FixedSpatialMaterial::get_depth_draw_mode() const { + + return depth_draw_mode; +} + +void FixedSpatialMaterial::set_cull_mode(CullMode p_mode) { + + if (cull_mode==p_mode) + return; + + cull_mode=p_mode; + _queue_shader_change(); +} +FixedSpatialMaterial::CullMode FixedSpatialMaterial::get_cull_mode() const { + + return cull_mode; +} + +void FixedSpatialMaterial::set_diffuse_mode(DiffuseMode p_mode) { + + if (diffuse_mode==p_mode) + return; + + diffuse_mode=p_mode; + _queue_shader_change(); +} +FixedSpatialMaterial::DiffuseMode FixedSpatialMaterial::get_diffuse_mode() const { + + return diffuse_mode; +} + +void FixedSpatialMaterial::set_flag(Flags p_flag,bool p_enabled) { + + ERR_FAIL_INDEX(p_flag,FLAG_MAX); + + if (flags[p_flag]==p_enabled) + return; + + flags[p_flag]=p_enabled; + _queue_shader_change(); +} + +bool FixedSpatialMaterial::get_flag(Flags p_flag) const { + + ERR_FAIL_INDEX_V(p_flag,FLAG_MAX,false); + return flags[p_flag]; +} + +void FixedSpatialMaterial::set_feature(Feature p_feature,bool p_enabled) { + + ERR_FAIL_INDEX(p_feature,FEATURE_MAX); + if (features[p_feature]==p_enabled) + return; + + features[p_feature]=p_enabled; + _change_notify(); + _queue_shader_change(); + + +} + +bool FixedSpatialMaterial::get_feature(Feature p_feature) const { + + ERR_FAIL_INDEX_V(p_feature,FEATURE_MAX,false); + return features[p_feature]; +} + + + +void FixedSpatialMaterial::set_texture(TextureParam p_param, const Ref &p_texture) { + + ERR_FAIL_INDEX(p_param,TEXTURE_MAX); + textures[p_param]=p_texture; + VS::get_singleton()->material_set_param(_get_material(),shader_names->texture_names[p_param],p_texture); +} + +Ref FixedSpatialMaterial::get_texture(TextureParam p_param) const { + + ERR_FAIL_INDEX_V(p_param,TEXTURE_MAX,Ref()); + return textures[p_param]; +} + + +void FixedSpatialMaterial::_validate_feature(const String& text, Feature feature,PropertyInfo& property) const { + if (property.name.begins_with(text) && property.name!=text+"/enabled" && !features[feature]) { + property.usage=0; + } +} + +void FixedSpatialMaterial::_validate_property(PropertyInfo& property) const { + _validate_feature("normal",FEATURE_NORMAL_MAPPING,property); + _validate_feature("sheen",FEATURE_SHEEN,property); + _validate_feature("clearcoat",FEATURE_CLEARCOAT,property); + _validate_feature("anisotropy",FEATURE_ANISOTROPY,property); + _validate_feature("ao",FEATURE_AMBIENT_OCCLUSION,property); + _validate_feature("height",FEATURE_HEIGHT_MAPPING,property); + _validate_feature("subsurf_scatter",FEATURE_SUBSURACE_SCATTERING,property); + _validate_feature("refraction",FEATURE_REFRACTION,property); + _validate_feature("detail",FEATURE_DETAIL,property); + +} + +void FixedSpatialMaterial::_bind_methods() { + + + ObjectTypeDB::bind_method(_MD("set_albedo","albedo"),&FixedSpatialMaterial::set_albedo); + ObjectTypeDB::bind_method(_MD("get_albedo"),&FixedSpatialMaterial::get_albedo); + + ObjectTypeDB::bind_method(_MD("set_specular","specular"),&FixedSpatialMaterial::set_specular); + ObjectTypeDB::bind_method(_MD("get_specular"),&FixedSpatialMaterial::get_specular); + + ObjectTypeDB::bind_method(_MD("set_roughness","roughness"),&FixedSpatialMaterial::set_roughness); + ObjectTypeDB::bind_method(_MD("get_roughness"),&FixedSpatialMaterial::get_roughness); + + ObjectTypeDB::bind_method(_MD("set_emission","emission"),&FixedSpatialMaterial::set_emission); + ObjectTypeDB::bind_method(_MD("get_emission"),&FixedSpatialMaterial::get_emission); + + ObjectTypeDB::bind_method(_MD("set_normal_scale","normal_scale"),&FixedSpatialMaterial::set_normal_scale); + ObjectTypeDB::bind_method(_MD("get_normal_scale"),&FixedSpatialMaterial::get_normal_scale); + + ObjectTypeDB::bind_method(_MD("set_sheen","sheen"),&FixedSpatialMaterial::set_sheen); + ObjectTypeDB::bind_method(_MD("get_sheen"),&FixedSpatialMaterial::get_sheen); + + ObjectTypeDB::bind_method(_MD("set_sheen_color","sheen_color"),&FixedSpatialMaterial::set_sheen_color); + ObjectTypeDB::bind_method(_MD("get_sheen_color"),&FixedSpatialMaterial::get_sheen_color); + + ObjectTypeDB::bind_method(_MD("set_clearcoat","clearcoat"),&FixedSpatialMaterial::set_clearcoat); + ObjectTypeDB::bind_method(_MD("get_clearcoat"),&FixedSpatialMaterial::get_clearcoat); + + ObjectTypeDB::bind_method(_MD("set_clearcoat_gloss","clearcoat_gloss"),&FixedSpatialMaterial::set_clearcoat_gloss); + ObjectTypeDB::bind_method(_MD("get_clearcoat_gloss"),&FixedSpatialMaterial::get_clearcoat_gloss); + + ObjectTypeDB::bind_method(_MD("set_anisotropy","anisotropy"),&FixedSpatialMaterial::set_anisotropy); + ObjectTypeDB::bind_method(_MD("get_anisotropy"),&FixedSpatialMaterial::get_anisotropy); + + ObjectTypeDB::bind_method(_MD("set_height_scale","height_scale"),&FixedSpatialMaterial::set_height_scale); + ObjectTypeDB::bind_method(_MD("get_height_scale"),&FixedSpatialMaterial::get_height_scale); + + ObjectTypeDB::bind_method(_MD("set_subsurface_scattering","subsurface_scattering"),&FixedSpatialMaterial::set_subsurface_scattering); + ObjectTypeDB::bind_method(_MD("get_subsurface_scattering"),&FixedSpatialMaterial::get_subsurface_scattering); + + ObjectTypeDB::bind_method(_MD("set_refraction","refraction"),&FixedSpatialMaterial::set_refraction); + ObjectTypeDB::bind_method(_MD("get_refraction"),&FixedSpatialMaterial::get_refraction); + + ObjectTypeDB::bind_method(_MD("set_refraction_roughness","refraction_roughness"),&FixedSpatialMaterial::set_refraction_roughness); + ObjectTypeDB::bind_method(_MD("get_refraction_roughness"),&FixedSpatialMaterial::get_refraction_roughness); + + ObjectTypeDB::bind_method(_MD("set_detail_uv","detail_uv"),&FixedSpatialMaterial::set_detail_uv); + ObjectTypeDB::bind_method(_MD("get_detail_uv"),&FixedSpatialMaterial::get_detail_uv); + + ObjectTypeDB::bind_method(_MD("set_blend_mode","blend_mode"),&FixedSpatialMaterial::set_blend_mode); + ObjectTypeDB::bind_method(_MD("get_blend_mode"),&FixedSpatialMaterial::get_blend_mode); + + ObjectTypeDB::bind_method(_MD("set_depth_draw_mode","depth_draw_mode"),&FixedSpatialMaterial::set_depth_draw_mode); + ObjectTypeDB::bind_method(_MD("get_depth_draw_mode"),&FixedSpatialMaterial::get_depth_draw_mode); + + ObjectTypeDB::bind_method(_MD("set_cull_mode","cull_mode"),&FixedSpatialMaterial::set_cull_mode); + ObjectTypeDB::bind_method(_MD("get_cull_mode"),&FixedSpatialMaterial::get_cull_mode); + + ObjectTypeDB::bind_method(_MD("set_diffuse_mode","diffuse_mode"),&FixedSpatialMaterial::set_diffuse_mode); + ObjectTypeDB::bind_method(_MD("get_diffuse_mode"),&FixedSpatialMaterial::get_diffuse_mode); + + ObjectTypeDB::bind_method(_MD("set_flag","flag","enable"),&FixedSpatialMaterial::set_flag); + ObjectTypeDB::bind_method(_MD("get_flag"),&FixedSpatialMaterial::get_flag); + + ObjectTypeDB::bind_method(_MD("set_feature","feature","enable"),&FixedSpatialMaterial::set_feature); + ObjectTypeDB::bind_method(_MD("get_feature","feature"),&FixedSpatialMaterial::get_feature); + + ObjectTypeDB::bind_method(_MD("set_texture","param:Texture","texture"),&FixedSpatialMaterial::set_texture); + ObjectTypeDB::bind_method(_MD("get_texture:Texture","param:Texture"),&FixedSpatialMaterial::get_texture); + + ObjectTypeDB::bind_method(_MD("set_detail_blend_mode","detail_blend_mode"),&FixedSpatialMaterial::set_detail_blend_mode); + ObjectTypeDB::bind_method(_MD("get_detail_blend_mode"),&FixedSpatialMaterial::get_detail_blend_mode); + + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"flags/transparent"),_SCS("set_feature"),_SCS("get_feature"),FEATURE_TRANSPARENT); + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"flags/unshaded"),_SCS("set_flag"),_SCS("get_flag"),FLAG_UNSHADED); + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"flags/on_top"),_SCS("set_flag"),_SCS("get_flag"),FLAG_ONTOP); + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"flags/vcol_albedo"),_SCS("set_flag"),_SCS("get_flag"),FLAG_ALBEDO_FROM_VERTEX_COLOR); + + ADD_PROPERTY(PropertyInfo(Variant::INT,"params/diffuse_mode",PROPERTY_HINT_ENUM,"Labert,Lambert Wrap,Oren Nayar,Burley"),_SCS("set_diffuse_mode"),_SCS("get_diffuse_mode")); + ADD_PROPERTY(PropertyInfo(Variant::INT,"params/blend_mode",PROPERTY_HINT_ENUM,"Mix,Add,Sub,Mul"),_SCS("set_blend_mode"),_SCS("get_blend_mode")); + ADD_PROPERTY(PropertyInfo(Variant::INT,"params/cull_mode",PROPERTY_HINT_ENUM,"Back,Front,Disabled"),_SCS("set_cull_mode"),_SCS("get_cull_mode")); + ADD_PROPERTY(PropertyInfo(Variant::INT,"params/depth_draw_mode",PROPERTY_HINT_ENUM,"Opaque Only,Always,Never,Opaque Pre-Pass"),_SCS("set_depth_draw_mode"),_SCS("get_depth_draw_mode")); + + ADD_PROPERTY(PropertyInfo(Variant::COLOR,"albedo/color"),_SCS("set_albedo"),_SCS("get_albedo")); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"albedo/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_ALBEDO); + + ADD_PROPERTY(PropertyInfo(Variant::COLOR,"specular/color"),_SCS("set_specular"),_SCS("get_specular")); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"specular/roughness",PROPERTY_HINT_RANGE,"0,1,0.01"),_SCS("set_roughness"),_SCS("get_roughness")); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"specular/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_SPECULAR); + + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"normal/enabled"),_SCS("set_feature"),_SCS("get_feature"),FEATURE_NORMAL_MAPPING); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"normal/scale",PROPERTY_HINT_RANGE,"-16,16,0.01"),_SCS("set_normal_scale"),_SCS("get_normal_scale")); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"normal/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_NORMAL); + + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"sheen/enabled"),_SCS("set_feature"),_SCS("get_feature"),FEATURE_SHEEN); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"sheen/amount",PROPERTY_HINT_RANGE,"0,1,0.01"),_SCS("set_sheen"),_SCS("get_sheen")); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"sheen/color"),_SCS("set_sheen_color"),_SCS("get_sheen_color")); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"sheen/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_SHEEN); + + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"clearcoat/enabled"),_SCS("set_feature"),_SCS("get_feature"),FEATURE_CLEARCOAT); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"clearcoat/amount",PROPERTY_HINT_RANGE,"0,1,0.01"),_SCS("set_clearcoat"),_SCS("get_clearcoat")); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"clearcoat/gloss"),_SCS("set_clearcoat_gloss"),_SCS("get_clearcoat_gloss")); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"clearcoat/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_CLEARCOAT); + + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"anisotropy/enabled"),_SCS("set_feature"),_SCS("get_feature"),FEATURE_ANISOTROPY); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"anisotropy/amount",PROPERTY_HINT_RANGE,"0,1,0.01"),_SCS("set_anisotropy"),_SCS("get_anisotropy")); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"anisotropy/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_ANISOTROPY); + + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"ao/enabled"),_SCS("set_feature"),_SCS("get_feature"),FEATURE_AMBIENT_OCCLUSION); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"ao/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_AMBIENT_OCCLUSION); + + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"height/enabled"),_SCS("set_feature"),_SCS("get_feature"),FEATURE_HEIGHT_MAPPING); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"height/scale",PROPERTY_HINT_RANGE,"-16,16,0.01"),_SCS("set_height_scale"),_SCS("get_height_scale")); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"height/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_HEIGHT); + + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"subsurf_scatter/enabled"),_SCS("set_feature"),_SCS("get_feature"),FEATURE_SUBSURACE_SCATTERING); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"subsurf_scatter/amount",PROPERTY_HINT_RANGE,"0,1,0.01"),_SCS("set_subsurface_scattering"),_SCS("get_subsurface_scattering")); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"subsurf_scatter/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_SUBSURFACE_SCATTERING); + + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"refraction/enabled"),_SCS("set_feature"),_SCS("get_feature"),FEATURE_REFRACTION); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"refraction/displacement",PROPERTY_HINT_RANGE,"-1,1,0.01"),_SCS("set_refraction"),_SCS("get_refraction")); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"refraction/roughness",PROPERTY_HINT_RANGE,"0,1,0.01"),_SCS("set_refraction_roughness"),_SCS("get_refraction_roughness")); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"refraction/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_REFRACTION); + + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"detail/enabled"),_SCS("set_feature"),_SCS("get_feature"),FEATURE_DETAIL); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"detail/mask",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_DETAIL_MASK); + ADD_PROPERTY(PropertyInfo(Variant::INT,"detail/blend_mode",PROPERTY_HINT_ENUM,"Mix,Add,Sub,Mul"),_SCS("set_detail_blend_mode"),_SCS("get_detail_blend_mode")); + ADD_PROPERTY(PropertyInfo(Variant::INT,"detail/uv_layer",PROPERTY_HINT_ENUM,"UV1,UV2"),_SCS("set_detail_uv"),_SCS("get_detail_uv")); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"detail/albedo",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_DETAIL_ALBEDO); + ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"detail/normal",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_DETAIL_NORMAL); + + BIND_CONSTANT( TEXTURE_ALBEDO ); + BIND_CONSTANT( TEXTURE_SPECULAR ); + BIND_CONSTANT( TEXTURE_EMISSION ); + BIND_CONSTANT( TEXTURE_NORMAL ); + BIND_CONSTANT( TEXTURE_SHEEN ); + BIND_CONSTANT( TEXTURE_CLEARCOAT ); + BIND_CONSTANT( TEXTURE_ANISOTROPY ); + BIND_CONSTANT( TEXTURE_AMBIENT_OCCLUSION ); + BIND_CONSTANT( TEXTURE_HEIGHT ); + BIND_CONSTANT( TEXTURE_SUBSURFACE_SCATTERING ); + BIND_CONSTANT( TEXTURE_REFRACTION ); + BIND_CONSTANT( TEXTURE_REFRACTION_ROUGHNESS ); + BIND_CONSTANT( TEXTURE_DETAIL_MASK ); + BIND_CONSTANT( TEXTURE_DETAIL_ALBEDO ); + BIND_CONSTANT( TEXTURE_DETAIL_NORMAL ); + BIND_CONSTANT( TEXTURE_MAX ); + + + BIND_CONSTANT( DETAIL_UV_1 ); + BIND_CONSTANT( DETAIL_UV_2 ); + + BIND_CONSTANT( FEATURE_TRANSPARENT ); + BIND_CONSTANT( FEATURE_EMISSION ); + BIND_CONSTANT( FEATURE_NORMAL_MAPPING ); + BIND_CONSTANT( FEATURE_SHEEN ); + BIND_CONSTANT( FEATURE_CLEARCOAT ); + BIND_CONSTANT( FEATURE_ANISOTROPY ); + BIND_CONSTANT( FEATURE_AMBIENT_OCCLUSION ); + BIND_CONSTANT( FEATURE_HEIGHT_MAPPING ); + BIND_CONSTANT( FEATURE_SUBSURACE_SCATTERING ); + BIND_CONSTANT( FEATURE_REFRACTION ); + BIND_CONSTANT( FEATURE_DETAIL ); + BIND_CONSTANT( FEATURE_MAX ); + + BIND_CONSTANT( BLEND_MODE_MIX ); + BIND_CONSTANT( BLEND_MODE_ADD ); + BIND_CONSTANT( BLEND_MODE_SUB ); + BIND_CONSTANT( BLEND_MODE_MUL ); + + BIND_CONSTANT( DEPTH_DRAW_OPAQUE_ONLY ); + BIND_CONSTANT( DEPTH_DRAW_ALWAYS ); + BIND_CONSTANT( DEPTH_DRAW_DISABLED ); + BIND_CONSTANT( DEPTH_DRAW_ALPHA_OPAQUE_PREPASS ); + + + BIND_CONSTANT( CULL_BACK ); + BIND_CONSTANT( CULL_FRONT ); + BIND_CONSTANT( CULL_DISABLED ); + + BIND_CONSTANT( FLAG_UNSHADED ); + BIND_CONSTANT( FLAG_ONTOP ); + BIND_CONSTANT( FLAG_ALBEDO_FROM_VERTEX_COLOR ); + BIND_CONSTANT( FLAG_MAX ); + + BIND_CONSTANT( DIFFUSE_LAMBERT ); + BIND_CONSTANT( DIFFUSE_LAMBERT_WRAP ); + BIND_CONSTANT( DIFFUSE_OREN_NAYAR ); + BIND_CONSTANT( DIFFUSE_BURLEY ); +} + + +FixedSpatialMaterial::FixedSpatialMaterial() : element(this) { + + //initialize to right values + set_albedo(Color(0.7,0.7,0.7,1.0)); + set_specular(Color(0.1,0.1,0.1)); + set_roughness(0.0); + set_emission(Color(0,0,0)); + set_normal_scale(1); + set_sheen(0); + set_sheen_color(Color(1,1,1,1)); + set_clearcoat(0); + set_clearcoat_gloss(0.5); + set_anisotropy(0); + set_height_scale(1); + set_subsurface_scattering(0); + set_refraction(0); + set_refraction_roughness(0); + + detail_uv=DETAIL_UV_1; + blend_mode=BLEND_MODE_MIX; + detail_blend_mode=BLEND_MODE_MIX; + depth_draw_mode=DEPTH_DRAW_OPAQUE_ONLY; + cull_mode=CULL_BACK; + for(int i=0;ilock(); + + if (shader_map.has(current_key)) { + shader_map[current_key].users--; + if (shader_map[current_key].users==0) { + //deallocate shader, as it's no longer in use + VS::get_singleton()->free(shader_map[current_key].shader); + shader_map.erase(current_key); + } + + VS::get_singleton()->material_set_shader(_get_material(),RID()); + } + + + if (material_mutex) + material_mutex->unlock(); + +} diff --git a/scene/resources/material.h b/scene/resources/material.h index 1acc031641b..0f0bf48025f 100644 --- a/scene/resources/material.h +++ b/scene/resources/material.h @@ -34,7 +34,7 @@ #include "scene/resources/shader.h" #include "resource.h" #include "servers/visual/shader_language.h" - +#include "self_list.h" /** @author Juan Linietsky */ @@ -57,17 +57,298 @@ public: }; -class FixedMaterial : public Material { +class FixedSpatialMaterial : public Material { - OBJ_TYPE(FixedMaterial,Resource); + OBJ_TYPE(FixedSpatialMaterial,Material) public: - FixedMaterial(); - virtual ~FixedMaterial(); + enum TextureParam { + TEXTURE_ALBEDO, + TEXTURE_SPECULAR, + TEXTURE_EMISSION, + TEXTURE_NORMAL, + TEXTURE_SHEEN, + TEXTURE_CLEARCOAT, + TEXTURE_ANISOTROPY, + TEXTURE_AMBIENT_OCCLUSION, + TEXTURE_HEIGHT, + TEXTURE_SUBSURFACE_SCATTERING, + TEXTURE_REFRACTION, + TEXTURE_REFRACTION_ROUGHNESS, + TEXTURE_DETAIL_MASK, + TEXTURE_DETAIL_ALBEDO, + TEXTURE_DETAIL_NORMAL, + TEXTURE_MAX + + + }; + + + enum DetailUV { + DETAIL_UV_1, + DETAIL_UV_2 + }; + + enum Feature { + FEATURE_TRANSPARENT, + FEATURE_EMISSION, + FEATURE_NORMAL_MAPPING, + FEATURE_SHEEN, + FEATURE_CLEARCOAT, + FEATURE_ANISOTROPY, + FEATURE_AMBIENT_OCCLUSION, + FEATURE_HEIGHT_MAPPING, + FEATURE_SUBSURACE_SCATTERING, + FEATURE_REFRACTION, + FEATURE_DETAIL, + FEATURE_MAX + }; + + + enum BlendMode { + BLEND_MODE_MIX, + BLEND_MODE_ADD, + BLEND_MODE_SUB, + BLEND_MODE_MUL, + }; + + enum DepthDrawMode { + DEPTH_DRAW_OPAQUE_ONLY, + DEPTH_DRAW_ALWAYS, + DEPTH_DRAW_DISABLED, + DEPTH_DRAW_ALPHA_OPAQUE_PREPASS + + }; + + enum CullMode { + CULL_BACK, + CULL_FRONT, + CULL_DISABLED + }; + + enum Flags { + FLAG_UNSHADED, + FLAG_ONTOP, + FLAG_ALBEDO_FROM_VERTEX_COLOR, + FLAG_MAX + }; + + enum DiffuseMode { + DIFFUSE_LAMBERT, + DIFFUSE_LAMBERT_WRAP, + DIFFUSE_OREN_NAYAR, + DIFFUSE_BURLEY, + }; + +private: + union MaterialKey { + + struct { + uint32_t feature_mask : 16; + uint32_t detail_uv : 1; + uint32_t blend_mode : 2; + uint32_t depth_draw_mode : 2; + uint32_t cull_mode : 2; + uint32_t flags : 3; + uint32_t detail_blend_mode : 2; + uint32_t diffuse_mode : 2; + uint32_t invalid_key : 1; + }; + + uint32_t key; + + bool operator<(const MaterialKey& p_key) const { + return key < p_key.key; + } + + }; + + struct ShaderData { + RID shader; + int users; + }; + + static Map shader_map; + + MaterialKey current_key; + + _FORCE_INLINE_ MaterialKey _compute_key() const { + + MaterialKey mk; + mk.key=0; + for(int i=0;i::List dirty_materials; + static ShaderNames* shader_names; + + SelfList element; + + void _update_shader(); + _FORCE_INLINE_ void _queue_shader_change(); + _FORCE_INLINE_ bool _is_shader_dirty() const; + + Color albedo; + Color specular; + float roughness; + Color emission; + float normal_scale; + float sheen; + Color sheen_color; + float clearcoat; + float clearcoat_gloss; + float anisotropy; + float height_scale; + float subsurface_scattering; + float refraction; + float refraction_roughness; + + DetailUV detail_uv; + + BlendMode blend_mode; + BlendMode detail_blend_mode; + DepthDrawMode depth_draw_mode; + CullMode cull_mode; + bool flags[FLAG_MAX]; + DiffuseMode diffuse_mode; + + bool features[FEATURE_MAX]; + + Ref textures[TEXTURE_MAX]; + + _FORCE_INLINE_ void _validate_feature(const String& text, Feature feature,PropertyInfo& property) const; + +protected: + + static void _bind_methods(); + void _validate_property(PropertyInfo& property) const; + +public: + + + void set_albedo(const Color& p_albedo); + Color get_albedo() const; + + void set_specular(const Color& p_specular); + Color get_specular() const; + + void set_roughness(float p_roughness); + float get_roughness() const; + + void set_emission(const Color& p_emission); + Color get_emission() const; + + void set_normal_scale(float p_normal_scale); + float get_normal_scale() const; + + void set_sheen(float p_sheen); + float get_sheen() const; + + void set_sheen_color(const Color& p_sheen_color); + Color get_sheen_color() const; + + void set_clearcoat(float p_clearcoat); + float get_clearcoat() const; + + void set_clearcoat_gloss(float p_clearcoat_gloss); + float get_clearcoat_gloss() const; + + void set_anisotropy(float p_anisotropy); + float get_anisotropy() const; + + void set_height_scale(float p_height_scale); + float get_height_scale() const; + + void set_subsurface_scattering(float p_subsurface_scattering); + float get_subsurface_scattering() const; + + void set_refraction(float p_refraction); + float get_refraction() const; + + void set_refraction_roughness(float p_refraction_roughness); + float get_refraction_roughness() const; + + void set_detail_uv(DetailUV p_detail_uv); + DetailUV get_detail_uv() const; + + void set_blend_mode(BlendMode p_mode); + BlendMode get_blend_mode() const; + + void set_detail_blend_mode(BlendMode p_mode); + BlendMode get_detail_blend_mode() const; + + void set_depth_draw_mode(DepthDrawMode p_mode); + DepthDrawMode get_depth_draw_mode() const; + + void set_cull_mode(CullMode p_mode); + CullMode get_cull_mode() const; + + void set_diffuse_mode(DiffuseMode p_mode); + DiffuseMode get_diffuse_mode() const; + + void set_flag(Flags p_flag,bool p_enabled); + bool get_flag(Flags p_flag) const; + + void set_texture(TextureParam p_param,const Ref& p_texture); + Ref get_texture(TextureParam p_param) const; + + void set_feature(Feature p_feature,bool p_enabled); + bool get_feature(Feature p_feature) const; + + static void init_shaders(); + static void finish_shaders(); + static void flush_changes(); + + FixedSpatialMaterial(); + virtual ~FixedSpatialMaterial(); }; +VARIANT_ENUM_CAST( FixedSpatialMaterial::TextureParam ) +VARIANT_ENUM_CAST( FixedSpatialMaterial::DetailUV ) +VARIANT_ENUM_CAST( FixedSpatialMaterial::Feature ) +VARIANT_ENUM_CAST( FixedSpatialMaterial::BlendMode ) +VARIANT_ENUM_CAST( FixedSpatialMaterial::DepthDrawMode ) +VARIANT_ENUM_CAST( FixedSpatialMaterial::CullMode ) +VARIANT_ENUM_CAST( FixedSpatialMaterial::Flags ) +VARIANT_ENUM_CAST( FixedSpatialMaterial::DiffuseMode ) ////////////////////// diff --git a/servers/visual/rasterizer.cpp b/servers/visual/rasterizer.cpp index 2952098a3c0..2c62f22a619 100644 --- a/servers/visual/rasterizer.cpp +++ b/servers/visual/rasterizer.cpp @@ -55,10 +55,10 @@ RID Rasterizer::create_default_material() { /* Fixed MAterial SHADER API */ -RID Rasterizer::_create_shader(const FixedMaterialShaderKey& p_key) { +RID Rasterizer::_create_shader(const FixedSpatialMaterialShaderKey& p_key) { ERR_FAIL_COND_V(!p_key.valid,RID()); - Map::Element *E=fixed_material_shaders.find(p_key); + Map::Element *E=fixed_material_shaders.find(p_key); if (E) { E->get().refcount++; @@ -67,7 +67,7 @@ RID Rasterizer::_create_shader(const FixedMaterialShaderKey& p_key) { uint64_t t = OS::get_singleton()->get_ticks_usec(); - FixedMaterialShader fms; + FixedSpatialMaterialShader fms; fms.refcount=1; fms.shader=shader_create(); @@ -313,12 +313,12 @@ RID Rasterizer::_create_shader(const FixedMaterialShaderKey& p_key) { return fms.shader; } -void Rasterizer::_free_shader(const FixedMaterialShaderKey& p_key) { +void Rasterizer::_free_shader(const FixedSpatialMaterialShaderKey& p_key) { if (p_key.valid==0) return; //not a valid key - Map::Element *E=fixed_material_shaders.find(p_key); + Map::Element *E=fixed_material_shaders.find(p_key); ERR_FAIL_COND(!E); E->get().refcount--; @@ -330,12 +330,12 @@ void Rasterizer::_free_shader(const FixedMaterialShaderKey& p_key) { } -void Rasterizer::fixed_material_set_flag(RID p_material, VS::FixedMaterialFlags p_flag, bool p_enabled) { +void Rasterizer::fixed_material_set_flag(RID p_material, VS::FixedSpatialMaterialFlags p_flag, bool p_enabled) { - Map::Element *E = fixed_materials.find(p_material); + Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND(!E); - FixedMaterial &fm=*E->get(); + FixedSpatialMaterial &fm=*E->get(); switch(p_flag) { @@ -351,11 +351,11 @@ void Rasterizer::fixed_material_set_flag(RID p_material, VS::FixedMaterialFlags } -bool Rasterizer::fixed_material_get_flag(RID p_material, VS::FixedMaterialFlags p_flag) const{ +bool Rasterizer::fixed_material_get_flag(RID p_material, VS::FixedSpatialMaterialFlags p_flag) const{ - const Map::Element *E = fixed_materials.find(p_material); + const Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND_V(!E,false); - const FixedMaterial &fm=*E->get(); + const FixedSpatialMaterial &fm=*E->get(); switch(p_flag) { case VS::FIXED_MATERIAL_FLAG_USE_ALPHA: return fm.use_alpha;; break; @@ -374,8 +374,8 @@ bool Rasterizer::fixed_material_get_flag(RID p_material, VS::FixedMaterialFlags RID Rasterizer::fixed_material_create() { RID mat = material_create(); - fixed_materials[mat]=memnew( FixedMaterial() ); - FixedMaterial &fm=*fixed_materials[mat]; + fixed_materials[mat]=memnew( FixedSpatialMaterial() ); + FixedSpatialMaterial &fm=*fixed_materials[mat]; fm.self=mat; fm.get_key(); material_set_flag(mat,VS::MATERIAL_FLAG_COLOR_ARRAY_SRGB,true); @@ -391,11 +391,11 @@ RID Rasterizer::fixed_material_create() { -void Rasterizer::fixed_material_set_parameter(RID p_material, VS::FixedMaterialParam p_parameter, const Variant& p_value){ +void Rasterizer::fixed_material_set_parameter(RID p_material, VS::FixedSpatialMaterialParam p_parameter, const Variant& p_value){ - Map::Element *E = fixed_materials.find(p_material); + Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND(!E); - FixedMaterial &fm=*E->get(); + FixedSpatialMaterial &fm=*E->get(); RID material=E->key(); ERR_FAIL_INDEX(p_parameter,VS::FIXED_MATERIAL_PARAM_MAX); @@ -418,24 +418,24 @@ void Rasterizer::fixed_material_set_parameter(RID p_material, VS::FixedMaterialP } -Variant Rasterizer::fixed_material_get_parameter(RID p_material,VS::FixedMaterialParam p_parameter) const{ +Variant Rasterizer::fixed_material_get_parameter(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const{ - const Map::Element *E = fixed_materials.find(p_material); + const Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND_V(!E,Variant()); - const FixedMaterial &fm=*E->get(); + const FixedSpatialMaterial &fm=*E->get(); ERR_FAIL_INDEX_V(p_parameter,VS::FIXED_MATERIAL_PARAM_MAX,Variant()); return fm.param[p_parameter]; } -void Rasterizer::fixed_material_set_texture(RID p_material,VS::FixedMaterialParam p_parameter, RID p_texture){ +void Rasterizer::fixed_material_set_texture(RID p_material,VS::FixedSpatialMaterialParam p_parameter, RID p_texture){ - Map::Element *E = fixed_materials.find(p_material); + Map::Element *E = fixed_materials.find(p_material); if (!E) { print_line("Not found: "+itos(p_material.get_id())); } ERR_FAIL_COND(!E); - FixedMaterial &fm=*E->get(); + FixedSpatialMaterial &fm=*E->get(); ERR_FAIL_INDEX(p_parameter,VS::FIXED_MATERIAL_PARAM_MAX); @@ -450,22 +450,22 @@ void Rasterizer::fixed_material_set_texture(RID p_material,VS::FixedMaterialPara } -RID Rasterizer::fixed_material_get_texture(RID p_material,VS::FixedMaterialParam p_parameter) const{ +RID Rasterizer::fixed_material_get_texture(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const{ - const Map::Element *E = fixed_materials.find(p_material); + const Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND_V(!E,RID()); - const FixedMaterial &fm=*E->get(); + const FixedSpatialMaterial &fm=*E->get(); ERR_FAIL_INDEX_V(p_parameter,VS::FIXED_MATERIAL_PARAM_MAX,RID()); return fm.texture[p_parameter]; } -void Rasterizer::fixed_material_set_texcoord_mode(RID p_material,VS::FixedMaterialParam p_parameter, VS::FixedMaterialTexCoordMode p_mode) { +void Rasterizer::fixed_material_set_texcoord_mode(RID p_material,VS::FixedSpatialMaterialParam p_parameter, VS::FixedSpatialMaterialTexCoordMode p_mode) { - Map::Element *E = fixed_materials.find(p_material); + Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND(!E); - FixedMaterial &fm=*E->get(); + FixedSpatialMaterial &fm=*E->get(); ERR_FAIL_INDEX(p_parameter,VS::FIXED_MATERIAL_PARAM_MAX); fm.get_key(); @@ -477,11 +477,11 @@ void Rasterizer::fixed_material_set_texcoord_mode(RID p_material,VS::FixedMateri } -VS::FixedMaterialTexCoordMode Rasterizer::fixed_material_get_texcoord_mode(RID p_material,VS::FixedMaterialParam p_parameter) const { +VS::FixedSpatialMaterialTexCoordMode Rasterizer::fixed_material_get_texcoord_mode(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const { - const Map::Element *E = fixed_materials.find(p_material); + const Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND_V(!E,VS::FIXED_MATERIAL_TEXCOORD_UV); - const FixedMaterial &fm=*E->get(); + const FixedSpatialMaterial &fm=*E->get(); ERR_FAIL_INDEX_V(p_parameter,VS::FIXED_MATERIAL_PARAM_MAX,VS::FIXED_MATERIAL_TEXCOORD_UV); return fm.texture_tc[p_parameter]; @@ -489,9 +489,9 @@ VS::FixedMaterialTexCoordMode Rasterizer::fixed_material_get_texcoord_mode(RID p void Rasterizer::fixed_material_set_uv_transform(RID p_material,const Transform& p_transform) { - Map::Element *E = fixed_materials.find(p_material); + Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND(!E); - FixedMaterial &fm=*E->get(); + FixedSpatialMaterial &fm=*E->get(); RID material=E->key(); VS::get_singleton()->material_set_param(material,_fixed_material_uv_xform_name,p_transform); @@ -504,18 +504,18 @@ void Rasterizer::fixed_material_set_uv_transform(RID p_material,const Transform& Transform Rasterizer::fixed_material_get_uv_transform(RID p_material) const { - const Map::Element *E = fixed_materials.find(p_material); + const Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND_V(!E,Transform()); - const FixedMaterial &fm=*E->get(); + const FixedSpatialMaterial &fm=*E->get(); return fm.uv_xform; } -void Rasterizer::fixed_material_set_light_shader(RID p_material,VS::FixedMaterialLightShader p_shader) { +void Rasterizer::fixed_material_set_light_shader(RID p_material,VS::FixedSpatialMaterialLightShader p_shader) { - Map::Element *E = fixed_materials.find(p_material); + Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND(!E); - FixedMaterial &fm=*E->get(); + FixedSpatialMaterial &fm=*E->get(); fm.light_shader=p_shader; @@ -524,20 +524,20 @@ void Rasterizer::fixed_material_set_light_shader(RID p_material,VS::FixedMateria } -VS::FixedMaterialLightShader Rasterizer::fixed_material_get_light_shader(RID p_material) const { +VS::FixedSpatialMaterialLightShader Rasterizer::fixed_material_get_light_shader(RID p_material) const { - const Map::Element *E = fixed_materials.find(p_material); + const Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND_V(!E,VS::FIXED_MATERIAL_LIGHT_SHADER_LAMBERT); - const FixedMaterial &fm=*E->get(); + const FixedSpatialMaterial &fm=*E->get(); return fm.light_shader; } void Rasterizer::fixed_material_set_point_size(RID p_material,float p_size) { - Map::Element *E = fixed_materials.find(p_material); + Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND(!E); - FixedMaterial &fm=*E->get(); + FixedSpatialMaterial &fm=*E->get(); RID material=E->key(); VS::get_singleton()->material_set_param(material,_fixed_material_point_size_name,p_size); @@ -549,9 +549,9 @@ void Rasterizer::fixed_material_set_point_size(RID p_material,float p_size) { float Rasterizer::fixed_material_get_point_size(RID p_material) const{ - const Map::Element *E = fixed_materials.find(p_material); + const Map::Element *E = fixed_materials.find(p_material); ERR_FAIL_COND_V(!E,1.0); - const FixedMaterial &fm=*E->get(); + const FixedSpatialMaterial &fm=*E->get(); return fm.point_size; @@ -562,9 +562,9 @@ void Rasterizer::_update_fixed_materials() { while(fixed_material_dirty_list.first()) { - FixedMaterial &fm=*fixed_material_dirty_list.first()->self(); + FixedSpatialMaterial &fm=*fixed_material_dirty_list.first()->self(); - FixedMaterialShaderKey new_key = fm.get_key(); + FixedSpatialMaterialShaderKey new_key = fm.get_key(); if (new_key.key!=fm.current_key.key) { _free_shader(fm.current_key); @@ -594,7 +594,7 @@ void Rasterizer::_update_fixed_materials() { void Rasterizer::_free_fixed_material(const RID& p_material) { - Map::Element *E = fixed_materials.find(p_material); + Map::Element *E = fixed_materials.find(p_material); if (E) { @@ -637,7 +637,7 @@ Rasterizer::Rasterizer() { draw_viewport_func=NULL; - ERR_FAIL_COND( sizeof(FixedMaterialShaderKey)!=4); + ERR_FAIL_COND( sizeof(FixedSpatialMaterialShaderKey)!=4); } diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h index 81cdcf6a25e..78757c799f5 100644 --- a/servers/visual/rasterizer.h +++ b/servers/visual/rasterizer.h @@ -44,12 +44,12 @@ public: virtual RID environment_create()=0; virtual void environment_set_background(RID p_env,VS::EnvironmentBG p_bg)=0; - virtual void environment_set_skybox(RID p_env,RID p_skybox,int p_radiance_size,int p_irradiance_size)=0; + virtual void environment_set_skybox(RID p_env,RID p_skybox,int p_radiance_size)=0; virtual void environment_set_skybox_scale(RID p_env,float p_scale)=0; virtual void environment_set_bg_color(RID p_env,const Color& p_color)=0; virtual void environment_set_bg_energy(RID p_env,float p_energy)=0; virtual void environment_set_canvas_max_layer(RID p_env,int p_max_layer)=0; - virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0,float p_skybox_energy=0.0)=0; + virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0,float p_skybox_contribution=0.0)=0; virtual void environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,VS::EnvironmentGlowBlendMode p_blend_mode)=0; virtual void environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture)=0; @@ -90,6 +90,8 @@ public: bool billboard_y :8; bool receive_shadows : 8; + float depth; //used for sorting + SelfList dependency_item; virtual void base_removed()=0; @@ -146,7 +148,7 @@ public: virtual void texture_debug_usage(List *r_info)=0; - virtual RID texture_create_pbr_cubemap(RID p_source,VS::PBRCubeMapMode p_mode,int p_resolution=-1) const=0; + virtual RID texture_create_radiance_cubemap(RID p_source,int p_resolution=-1) const=0; /* SHADER API */ @@ -798,7 +800,7 @@ protected: /* Fixed Material Shader API */ - union FixedMaterialShaderKey { + union FixedSpatialMaterialShaderKey { struct { uint16_t texcoord_mask; @@ -814,21 +816,21 @@ protected: uint32_t key; - _FORCE_INLINE_ bool operator<(const FixedMaterialShaderKey& p_key) const { return key fixed_material_shaders; + Map fixed_material_shaders; - RID _create_shader(const FixedMaterialShaderKey& p_key); - void _free_shader(const FixedMaterialShaderKey& p_key); + RID _create_shader(const FixedSpatialMaterialShaderKey& p_key); + void _free_shader(const FixedSpatialMaterialShaderKey& p_key); - struct FixedMaterial { + struct FixedSpatialMaterial { RID self; @@ -839,19 +841,19 @@ protected: bool use_xy_normalmap; float point_size; Transform uv_xform; - VS::FixedMaterialLightShader light_shader; + VS::FixedSpatialMaterialLightShader light_shader; RID texture[VS::FIXED_MATERIAL_PARAM_MAX]; Variant param[VS::FIXED_MATERIAL_PARAM_MAX]; - VS::FixedMaterialTexCoordMode texture_tc[VS::FIXED_MATERIAL_PARAM_MAX]; + VS::FixedSpatialMaterialTexCoordMode texture_tc[VS::FIXED_MATERIAL_PARAM_MAX]; - SelfList dirty_list; + SelfList dirty_list; - FixedMaterialShaderKey current_key; + FixedSpatialMaterialShaderKey current_key; - _FORCE_INLINE_ FixedMaterialShaderKey get_key() const { + _FORCE_INLINE_ FixedSpatialMaterialShaderKey get_key() const { - FixedMaterialShaderKey k; + FixedSpatialMaterialShaderKey k; k.key=0; k.use_alpha=use_alpha; k.use_color_array=use_color_array; @@ -872,7 +874,7 @@ protected: } - FixedMaterial() : dirty_list(this) { + FixedSpatialMaterial() : dirty_list(this) { use_alpha=false; use_color_array=false; @@ -904,9 +906,9 @@ protected: StringName _fixed_material_uv_xform_name; StringName _fixed_material_point_size_name; - Map fixed_materials; + Map fixed_materials; - SelfList::List fixed_material_dirty_list; + SelfList::List fixed_material_dirty_list; protected: void _update_fixed_materials(); @@ -993,23 +995,23 @@ public: virtual RID fixed_material_create(); - virtual void fixed_material_set_flag(RID p_material, VS::FixedMaterialFlags p_flag, bool p_enabled); - virtual bool fixed_material_get_flag(RID p_material, VS::FixedMaterialFlags p_flag) const; + virtual void fixed_material_set_flag(RID p_material, VS::FixedSpatialMaterialFlags p_flag, bool p_enabled); + virtual bool fixed_material_get_flag(RID p_material, VS::FixedSpatialMaterialFlags p_flag) const; - virtual void fixed_material_set_parameter(RID p_material, VS::FixedMaterialParam p_parameter, const Variant& p_value); - virtual Variant fixed_material_get_parameter(RID p_material,VS::FixedMaterialParam p_parameter) const; + virtual void fixed_material_set_parameter(RID p_material, VS::FixedSpatialMaterialParam p_parameter, const Variant& p_value); + virtual Variant fixed_material_get_parameter(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const; - virtual void fixed_material_set_texture(RID p_material,VS::FixedMaterialParam p_parameter, RID p_texture); - virtual RID fixed_material_get_texture(RID p_material,VS::FixedMaterialParam p_parameter) const; + virtual void fixed_material_set_texture(RID p_material,VS::FixedSpatialMaterialParam p_parameter, RID p_texture); + virtual RID fixed_material_get_texture(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const; - virtual void fixed_material_set_texcoord_mode(RID p_material,VS::FixedMaterialParam p_parameter, VS::FixedMaterialTexCoordMode p_mode); - virtual VS::FixedMaterialTexCoordMode fixed_material_get_texcoord_mode(RID p_material,VS::FixedMaterialParam p_parameter) const; + virtual void fixed_material_set_texcoord_mode(RID p_material,VS::FixedSpatialMaterialParam p_parameter, VS::FixedSpatialMaterialTexCoordMode p_mode); + virtual VS::FixedSpatialMaterialTexCoordMode fixed_material_get_texcoord_mode(RID p_material,VS::FixedSpatialMaterialParam p_parameter) const; virtual void fixed_material_set_uv_transform(RID p_material,const Transform& p_transform); virtual Transform fixed_material_get_uv_transform(RID p_material) const; - virtual void fixed_material_set_light_shader(RID p_material,VS::FixedMaterialLightShader p_shader); - virtual VS::FixedMaterialLightShader fixed_material_get_light_shader(RID p_material) const; + virtual void fixed_material_set_light_shader(RID p_material,VS::FixedSpatialMaterialLightShader p_shader); + virtual VS::FixedSpatialMaterialLightShader fixed_material_get_light_shader(RID p_material) const; virtual void fixed_material_set_point_size(RID p_material,float p_size); virtual float fixed_material_get_point_size(RID p_material) const; diff --git a/servers/visual/shader_language.cpp b/servers/visual/shader_language.cpp index 065bb4d31ad..357546e5950 100644 --- a/servers/visual/shader_language.cpp +++ b/servers/visual/shader_language.cpp @@ -260,11 +260,11 @@ const ShaderLanguage::KeyWord ShaderLanguage::keyword_list[]={ {TK_UNIFORM,"uniform"}, {TK_VARYING,"varying"}, {TK_RENDER_MODE,"render_mode"}, - {TK_HINT_WHITE_TEXTURE,"white"}, - {TK_HINT_BLACK_TEXTURE,"black"}, - {TK_HINT_NORMAL_TEXTURE,"normal"}, - {TK_HINT_ALBEDO_TEXTURE,"albedo"}, - {TK_HINT_COLOR,"color"}, + {TK_HINT_WHITE_TEXTURE,"hint_white"}, + {TK_HINT_BLACK_TEXTURE,"hint_black"}, + {TK_HINT_NORMAL_TEXTURE,"hint_normal"}, + {TK_HINT_ALBEDO_TEXTURE,"hint_albedo"}, + {TK_HINT_COLOR,"hint_color"}, {TK_HINT_RANGE,"hint_range"}, {TK_ERROR,NULL} @@ -604,7 +604,7 @@ String ShaderLanguage::token_debug(const String& p_code) { Token tk = _get_token(); while(tk.type!=TK_EOF && tk.type!=TK_ERROR) { - print_line(get_token_text(tk)); + output+=itos(tk_line)+": "+get_token_text(tk)+"\n"; tk = _get_token(); } @@ -2232,9 +2232,7 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const Node *expr=NULL; TkPos prepos = _get_tkpos(); Token tk = _get_token(); - TkPos pos = _get_tkpos(); - - print_line("in expr: "+get_token_text(tk)); + TkPos pos = _get_tkpos(); if (tk.type==TK_PARENTHESIS_OPEN) { @@ -2301,8 +2299,6 @@ ShaderLanguage::Node* ShaderLanguage::_parse_expression(BlockNode* p_block,const } else if (is_token_nonvoid_datatype(tk.type)) { //basic type constructor - print_line("parse constructor"); - OperatorNode *func = alloc_node(); func->op=OP_CONSTRUCT; @@ -3193,17 +3189,17 @@ Error ShaderLanguage::_parse_shader(const Map< StringName, Maprender_modes.find(tk.text)!=-1) { - _set_error("Duplicate render mode: '"+String(tk.text)+"'"); + if (shader->render_modes.find(mode)!=-1) { + _set_error("Duplicate render mode: '"+String(mode)+"'"); return ERR_PARSE_ERROR; } - shader->render_modes.push_back(tk.text); + shader->render_modes.push_back(mode); tk = _get_token(); if (tk.type==TK_COMMA) { @@ -3262,7 +3258,7 @@ Error ShaderLanguage::_parse_shader(const Map< StringName, Mapuniforms.size(); + if (is_sampler_type(type)) { uniform.texture_order=texture_uniforms++; uniform.order=-1; @@ -3331,7 +3327,7 @@ Error ShaderLanguage::_parse_shader(const Map< StringName, Map >::Element *E=p_functions.front();E;E=E->next()) { r_options->push_back(E->key()); @@ -3635,7 +3629,6 @@ Error ShaderLanguage::complete(const String& p_code,const Map< StringName, Map matches; @@ -3709,7 +3702,6 @@ Error ShaderLanguage::complete(const String& p_code,const Map< StringName, Mapfunctions.size();i++) { if (!shader->functions[i].callable) continue; diff --git a/servers/visual/shader_types.cpp b/servers/visual/shader_types.cpp index 767d11bc845..8c54ef11fb5 100644 --- a/servers/visual/shader_types.cpp +++ b/servers/visual/shader_types.cpp @@ -136,16 +136,16 @@ ShaderTypes::ShaderTypes() shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["POINT_COORD"]=ShaderLanguage::TYPE_VEC2; shader_modes[VS::SHADER_CANVAS_ITEM].functions["light"]["TIME"]=ShaderLanguage::TYPE_FLOAT; - shader_modes[VS::SHADER_SPATIAL].modes.insert("skip_transform"); + shader_modes[VS::SHADER_CANVAS_ITEM].modes.insert("skip_transform"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_mix"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_add"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_sub"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_mul"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("blend_premul_alpha"); + shader_modes[VS::SHADER_CANVAS_ITEM].modes.insert("blend_mix"); + shader_modes[VS::SHADER_CANVAS_ITEM].modes.insert("blend_add"); + shader_modes[VS::SHADER_CANVAS_ITEM].modes.insert("blend_sub"); + shader_modes[VS::SHADER_CANVAS_ITEM].modes.insert("blend_mul"); + shader_modes[VS::SHADER_CANVAS_ITEM].modes.insert("blend_premul_alpha"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("unshaded"); - shader_modes[VS::SHADER_SPATIAL].modes.insert("light_only"); + shader_modes[VS::SHADER_CANVAS_ITEM].modes.insert("unshaded"); + shader_modes[VS::SHADER_CANVAS_ITEM].modes.insert("light_only"); diff --git a/servers/visual/visual_server_raster.cpp b/servers/visual/visual_server_raster.cpp index 70abfe2361a..bbad460f4f8 100644 --- a/servers/visual/visual_server_raster.cpp +++ b/servers/visual/visual_server_raster.cpp @@ -405,33 +405,33 @@ RID VisualServerRaster::fixed_material_create() { return rasterizer->fixed_material_create(); } -void VisualServerRaster::fixed_material_set_flag(RID p_material, FixedMaterialFlags p_flag, bool p_enabled) { +void VisualServerRaster::fixed_material_set_flag(RID p_material, FixedSpatialMaterialFlags p_flag, bool p_enabled) { rasterizer->fixed_material_set_flag(p_material,p_flag,p_enabled); } -bool VisualServerRaster::fixed_material_get_flag(RID p_material, FixedMaterialFlags p_flag) const { +bool VisualServerRaster::fixed_material_get_flag(RID p_material, FixedSpatialMaterialFlags p_flag) const { return rasterizer->fixed_material_get_flag(p_material,p_flag); } -void VisualServerRaster::fixed_material_set_param(RID p_material, FixedMaterialParam p_parameter, const Variant& p_value) { +void VisualServerRaster::fixed_material_set_param(RID p_material, FixedSpatialMaterialParam p_parameter, const Variant& p_value) { VS_CHANGED; rasterizer->fixed_material_set_parameter(p_material,p_parameter,p_value); } -Variant VisualServerRaster::fixed_material_get_param(RID p_material,FixedMaterialParam p_parameter) const { +Variant VisualServerRaster::fixed_material_get_param(RID p_material,FixedSpatialMaterialParam p_parameter) const { return rasterizer->fixed_material_get_parameter(p_material,p_parameter); } -void VisualServerRaster::fixed_material_set_texture(RID p_material,FixedMaterialParam p_parameter, RID p_texture) { +void VisualServerRaster::fixed_material_set_texture(RID p_material,FixedSpatialMaterialParam p_parameter, RID p_texture) { VS_CHANGED; rasterizer->fixed_material_set_texture(p_material,p_parameter,p_texture); } -RID VisualServerRaster::fixed_material_get_texture(RID p_material,FixedMaterialParam p_parameter) const { +RID VisualServerRaster::fixed_material_get_texture(RID p_material,FixedSpatialMaterialParam p_parameter) const { return rasterizer->fixed_material_get_texture(p_material,p_parameter); } @@ -439,12 +439,12 @@ RID VisualServerRaster::fixed_material_get_texture(RID p_material,FixedMaterialP -void VisualServerRaster::fixed_material_set_texcoord_mode(RID p_material,FixedMaterialParam p_parameter, FixedMaterialTexCoordMode p_mode) { +void VisualServerRaster::fixed_material_set_texcoord_mode(RID p_material,FixedSpatialMaterialParam p_parameter, FixedSpatialMaterialTexCoordMode p_mode) { VS_CHANGED; rasterizer->fixed_material_set_texcoord_mode(p_material,p_parameter,p_mode); } -VS::FixedMaterialTexCoordMode VisualServerRaster::fixed_material_get_texcoord_mode(RID p_material,FixedMaterialParam p_parameter) const { +VS::FixedSpatialMaterialTexCoordMode VisualServerRaster::fixed_material_get_texcoord_mode(RID p_material,FixedSpatialMaterialParam p_parameter) const { return rasterizer->fixed_material_get_texcoord_mode(p_material,p_parameter); } @@ -471,14 +471,14 @@ Transform VisualServerRaster::fixed_material_get_uv_transform(RID p_material) co return rasterizer->fixed_material_get_uv_transform(p_material); } -void VisualServerRaster::fixed_material_set_light_shader(RID p_material,FixedMaterialLightShader p_shader) { +void VisualServerRaster::fixed_material_set_light_shader(RID p_material,FixedSpatialMaterialLightShader p_shader) { VS_CHANGED; rasterizer->fixed_material_set_light_shader(p_material,p_shader); } -VisualServerRaster::FixedMaterialLightShader VisualServerRaster::fixed_material_get_light_shader(RID p_material) const{ +VisualServerRaster::FixedSpatialMaterialLightShader VisualServerRaster::fixed_material_get_light_shader(RID p_material) const{ return rasterizer->fixed_material_get_light_shader(p_material); } diff --git a/servers/visual/visual_server_raster.h b/servers/visual/visual_server_raster.h index a2502e89f9d..655ce410bcb 100644 --- a/servers/visual/visual_server_raster.h +++ b/servers/visual/visual_server_raster.h @@ -620,7 +620,7 @@ public: BIND1RC(uint32_t,texture_get_width,RID) BIND1RC(uint32_t,texture_get_height,RID) BIND3(texture_set_size_override,RID,int,int) - BIND3RC(RID,texture_create_pbr_cubemap,RID,PBRCubeMapMode,int) + BIND2RC(RID,texture_create_radiance_cubemap,RID,int) @@ -854,7 +854,7 @@ public: BIND0R(RID,environment_create) BIND2(environment_set_background,RID ,EnvironmentBG ) - BIND4(environment_set_skybox,RID,RID ,int,int ) + BIND3(environment_set_skybox,RID,RID ,int ) BIND2(environment_set_skybox_scale,RID,float) BIND2(environment_set_bg_color,RID,const Color& ) BIND2(environment_set_bg_energy,RID,float ) diff --git a/servers/visual/visual_server_scene.cpp b/servers/visual/visual_server_scene.cpp index 8fdf3d160d0..2f0f8f263f4 100644 --- a/servers/visual/visual_server_scene.cpp +++ b/servers/visual/visual_server_scene.cpp @@ -337,8 +337,7 @@ void VisualServerScene::instance_set_base(RID p_instance, RID p_base){ light->D=NULL; } VSG::scene_render->free(light->instance); - - } + } break; } if (instance->base_data) { @@ -492,12 +491,12 @@ void VisualServerScene::instance_set_base(RID p_instance, RID p_base){ light->instance = VSG::scene_render->light_instance_create(p_base); instance->base_data=light; - } + } break; case VS::INSTANCE_MESH: { InstanceGeometryData *geom = memnew( InstanceGeometryData ); instance->base_data=geom; - } + } break; } @@ -596,7 +595,7 @@ void VisualServerScene::instance_set_scenario(RID p_instance, RID p_scenario){ instance->scenario->directional_lights.erase( light->D ); light->D=NULL; } - } + } break; } instance->scenario=NULL; @@ -623,7 +622,7 @@ void VisualServerScene::instance_set_scenario(RID p_instance, RID p_scenario){ if (VSG::storage->light_get_type(instance->base)==VS::LIGHT_DIRECTIONAL) { light->D = scenario->directional_lights.push_back(instance); } - } + } break; } _instance_queue_update(instance,true,true); @@ -711,12 +710,59 @@ Vector VisualServerScene::instances_cull_convex(const Vector& p void VisualServerScene::instance_geometry_set_flag(RID p_instance,VS::InstanceFlags p_flags,bool p_enabled){ + Instance *instance = instance_owner.get( p_instance ); + ERR_FAIL_COND( !instance ); + + switch(p_flags) { + + case VS::INSTANCE_FLAG_VISIBLE: { + + instance->visible=p_enabled; + + } break; + case VS::INSTANCE_FLAG_BILLBOARD: { + + instance->billboard=p_enabled; + + } break; + case VS::INSTANCE_FLAG_BILLBOARD_FIX_Y: { + + instance->billboard_y=p_enabled; + + } break; + case VS::INSTANCE_FLAG_CAST_SHADOW: { + /*if (p_enabled == true) { + instance->cast_shadows = SHADOW_CASTING_SETTING_ON; + } + else { + instance->cast_shadows = SHADOW_CASTING_SETTING_OFF; + }*/ + + } break; + case VS::INSTANCE_FLAG_DEPH_SCALE: { + + instance->depth_scale=p_enabled; + + } break; + case VS::INSTANCE_FLAG_VISIBLE_IN_ALL_ROOMS: { + + instance->visible_in_all_rooms=p_enabled; + + } break; + + } } void VisualServerScene::instance_geometry_set_cast_shadows_setting(RID p_instance, VS::ShadowCastingSetting p_shadow_casting_setting) { } void VisualServerScene::instance_geometry_set_material_override(RID p_instance, RID p_material){ + Instance *instance = instance_owner.get( p_instance ); + ERR_FAIL_COND( !instance ); + + instance->material_override=p_material; + + } @@ -1151,7 +1197,8 @@ void VisualServerScene::render_camera(RID p_camera, RID p_scenario,Size2 p_viewp //failure } else if (ins->base_type==VS::INSTANCE_LIGHT && ins->visible) { - if (light_cull_countvisible && light_cull_count(ins->base_data); @@ -1253,6 +1300,8 @@ void VisualServerScene::render_camera(RID p_camera, RID p_scenario,Size2 p_viewp geom->lighting_dirty=false; } + ins->depth = near_plane.distance_to(ins->transform.origin); + } if (!keep) { diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index 97825c172b5..95636d2bc71 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -1036,30 +1036,23 @@ void VisualServer::mesh_add_surface_from_arrays(RID p_mesh,PrimitiveType p_primi } } - print_line("type "+itos(i)+" size: "+itos(elem_size)+" offset "+itos(total_elem_size)); offsets[i]=total_elem_size; total_elem_size+=elem_size; } - print_line("total elemn size: "+itos(total_elem_size)); - uint32_t mask = (1< vertex_array; vertex_array.resize(array_size); int index_array_size = offsets[VS::ARRAY_INDEX]*index_array_len; - print_line("index array size: "+itos(index_array_size)); - DVector index_array; index_array.resize(index_array_size); diff --git a/servers/visual_server.h b/servers/visual_server.h index 11828c7d265..bbe6c2c6831 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -125,12 +125,7 @@ public: virtual void texture_set_shrink_all_x2_on_set_data(bool p_enable)=0; - enum PBRCubeMapMode { - PBR_CUBEMAP_RADIANCE, - PBR_CUBEMAP_IRRADIANCE, - }; - - virtual RID texture_create_pbr_cubemap(RID p_source,PBRCubeMapMode p_mode,int p_resolution=-1) const=0; + virtual RID texture_create_radiance_cubemap(RID p_source,int p_resolution=-1) const=0; struct TextureInfo { RID texture; @@ -349,7 +344,9 @@ public: LIGHT_PARAM_ENERGY, LIGHT_PARAM_SPECULAR, LIGHT_PARAM_RANGE, + LIGHT_PARAM_ATTENUATION, LIGHT_PARAM_SPOT_ANGLE, + LIGHT_PARAM_SPOT_ATTENUATION, LIGHT_PARAM_SHADOW_MAX_DISTANCE, LIGHT_PARAM_SHADOW_DARKNESS, LIGHT_PARAM_SHADOW_SPLIT_1_OFFSET, @@ -357,10 +354,8 @@ public: LIGHT_PARAM_SHADOW_SPLIT_3_OFFSET, LIGHT_PARAM_SHADOW_SPLIT_4_OFFSET, LIGHT_PARAM_SHADOW_NORMAL_BIAS, - LIGHT_PARAM_SHADOW_BIAS_1, - LIGHT_PARAM_SHADOW_BIAS_2, - LIGHT_PARAM_SHADOW_BIAS_3, - LIGHT_PARAM_SHADOW_BIAS_4, + LIGHT_PARAM_SHADOW_BIAS, + LIGHT_PARAM_SHADOW_BIAS_SPLIT_SCALE, LIGHT_PARAM_MAX }; @@ -494,12 +489,12 @@ public: }; virtual void environment_set_background(RID p_env,EnvironmentBG p_bg)=0; - virtual void environment_set_skybox(RID p_env,RID p_skybox,int p_radiance_size,int p_irradiance_size)=0; + virtual void environment_set_skybox(RID p_env,RID p_skybox,int p_radiance_size)=0; virtual void environment_set_skybox_scale(RID p_env,float p_scale)=0; virtual void environment_set_bg_color(RID p_env,const Color& p_color)=0; virtual void environment_set_bg_energy(RID p_env,float p_energy)=0; virtual void environment_set_canvas_max_layer(RID p_env,int p_max_layer)=0; - virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0,float p_skybox_energy=0.0)=0; + virtual void environment_set_ambient_light(RID p_env,const Color& p_color,float p_energy=1.0,float p_skybox_contribution=0.0)=0; //set default SSAO options //set default SSR options diff --git a/tools/editor/io_plugins/editor_import_collada.cpp b/tools/editor/io_plugins/editor_import_collada.cpp index 6b1873d49a6..e8207c9575a 100644 --- a/tools/editor/io_plugins/editor_import_collada.cpp +++ b/tools/editor/io_plugins/editor_import_collada.cpp @@ -377,7 +377,7 @@ Error ColladaImport::_create_material(const String& p_target) { ERR_FAIL_COND_V(!collada.state.effect_map.has(src_mat.instance_effect),ERR_INVALID_PARAMETER); Collada::Effect &effect=collada.state.effect_map[src_mat.instance_effect]; - Ref material= memnew( FixedMaterial ); + Ref material= memnew( FixedSpatialMaterial ); if (src_mat.name!="") material->set_name(src_mat.name); @@ -394,14 +394,14 @@ Error ColladaImport::_create_material(const String& p_target) { Ref texture = ResourceLoader::load(texfile,"Texture"); if (texture.is_valid()) { -// material->set_texture(FixedMaterial::PARAM_DIFFUSE,texture); -// material->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1,1)); +// material->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,texture); +// material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1,1)); } else { missing_textures.push_back(texfile.get_file()); } } } else { -// material->set_parameter(FixedMaterial::PARAM_DIFFUSE,effect.diffuse.color); +// material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,effect.diffuse.color); } // SPECULAR @@ -414,15 +414,15 @@ Error ColladaImport::_create_material(const String& p_target) { Ref texture = ResourceLoader::load(texfile,"Texture"); if (texture.is_valid()) { -// material->set_texture(FixedMaterial::PARAM_SPECULAR,texture); -// material->set_parameter(FixedMaterial::PARAM_SPECULAR,Color(1,1,1,1)); +// material->set_texture(FixedSpatialMaterial::PARAM_SPECULAR,texture); +// material->set_parameter(FixedSpatialMaterial::PARAM_SPECULAR,Color(1,1,1,1)); } else { missing_textures.push_back(texfile.get_file()); } } } else { -// material->set_parameter(FixedMaterial::PARAM_SPECULAR,effect.specular.color); +// material->set_parameter(FixedSpatialMaterial::PARAM_SPECULAR,effect.specular.color); } // EMISSION @@ -435,15 +435,15 @@ Error ColladaImport::_create_material(const String& p_target) { Ref texture = ResourceLoader::load(texfile,"Texture"); if (texture.is_valid()) { -// material->set_texture(FixedMaterial::PARAM_EMISSION,texture); -// material->set_parameter(FixedMaterial::PARAM_EMISSION,Color(1,1,1,1)); +// material->set_texture(FixedSpatialMaterial::PARAM_EMISSION,texture); +// material->set_parameter(FixedSpatialMaterial::PARAM_EMISSION,Color(1,1,1,1)); }else { // missing_textures.push_back(texfile.get_file()); } } } else { -// material->set_parameter(FixedMaterial::PARAM_EMISSION,effect.emission.color); +// material->set_parameter(FixedSpatialMaterial::PARAM_EMISSION,effect.emission.color); } // NORMAL @@ -456,7 +456,7 @@ Error ColladaImport::_create_material(const String& p_target) { Ref texture = ResourceLoader::load(texfile,"Texture"); if (texture.is_valid()) { - // material->set_texture(FixedMaterial::PARAM_NORMAL,texture); + // material->set_texture(FixedSpatialMaterial::PARAM_NORMAL,texture); }else { // missing_textures.push_back(texfile.get_file()); } @@ -465,7 +465,7 @@ Error ColladaImport::_create_material(const String& p_target) { } -// material->set_parameter(FixedMaterial::PARAM_SPECULAR_EXP,effect.shininess); +// material->set_parameter(FixedSpatialMaterial::PARAM_SPECULAR_EXP,effect.shininess); // material->set_flag(Material::FLAG_DOUBLE_SIDED,effect.double_sided); // material->set_flag(Material::FLAG_UNSHADED,effect.unshaded); @@ -1042,7 +1042,7 @@ Error ColladaImport::_create_mesh_surfaces(bool p_optimize,Ref& p_mesh,con { - Ref material; + Ref material; //find material Mesh::PrimitiveType primitive=Mesh::PRIMITIVE_TRIANGLES; diff --git a/tools/editor/io_plugins/editor_scene_import_plugin.cpp b/tools/editor/io_plugins/editor_scene_import_plugin.cpp index 9d68807c461..41b245eb7d7 100644 --- a/tools/editor/io_plugins/editor_scene_import_plugin.cpp +++ b/tools/editor/io_plugins/editor_scene_import_plugin.cpp @@ -1392,7 +1392,7 @@ void EditorSceneImportPlugin::_find_resources(const Variant& p_var, Map::Element *E=pl.front();E;E=E->next()) { if (E->get().type==Variant::OBJECT || E->get().type==Variant::ARRAY || E->get().type==Variant::DICTIONARY) { - if (E->get().type==Variant::OBJECT && res->cast_to() && (E->get().name=="textures/diffuse" || E->get().name=="textures/detail" || E->get().name=="textures/emission")) { + if (E->get().type==Variant::OBJECT && res->cast_to() && (E->get().name=="textures/diffuse" || E->get().name=="textures/detail" || E->get().name=="textures/emission")) { Ref tex =res->get(E->get().name); if (tex.is_valid()) { @@ -1400,14 +1400,14 @@ void EditorSceneImportPlugin::_find_resources(const Variant& p_var, Mapget().type==Variant::OBJECT && res->cast_to() && (E->get().name=="textures/normal")) { + } else if (E->get().type==Variant::OBJECT && res->cast_to() && (E->get().name=="textures/normal")) { Ref tex =res->get(E->get().name); if (tex.is_valid()) { image_map.insert(tex,TEXTURE_ROLE_NORMALMAP); //if (p_flags&SCENE_FLAG_CONVERT_NORMALMAPS_TO_XY) - // res->cast_to()->set_fixed_flag(FixedMaterial::FLAG_USE_XY_NORMALMAP,true); + // res->cast_to()->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_XY_NORMALMAP,true); }// @@ -1514,12 +1514,12 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map Ref m = mi->get_mesh(); for(int i=0;iget_surface_count();i++) { - Ref fm = m->surface_get_material(i); + Ref fm = m->surface_get_material(i); if (fm.is_valid()) { // fm->set_flag(Material::FLAG_UNSHADED,true); // fm->set_flag(Material::FLAG_DOUBLE_SIDED,true); // fm->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - // fm->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); + // fm->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA,true); } } } @@ -1537,18 +1537,18 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map for(int i=0;iget_surface_count();i++) { - Ref mat = m->surface_get_material(i); + Ref mat = m->surface_get_material(i); if (!mat.is_valid()) continue; if (p_flags&SCENE_FLAG_DETECT_ALPHA && _teststr(mat->get_name(),"alpha")) { - // mat->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); + // mat->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA,true); // mat->set_name(_fixstr(mat->get_name(),"alpha")); } if (p_flags&SCENE_FLAG_DETECT_VCOLOR && _teststr(mat->get_name(),"vcol")) { - //mat->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY,true); + //mat->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY,true); //mat->set_name(_fixstr(mat->get_name(),"vcol")); } @@ -1623,12 +1623,12 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map Ref m = mi->get_mesh(); for(int i=0;iget_surface_count();i++) { - Ref fm = m->surface_get_material(i); + Ref fm = m->surface_get_material(i); if (fm.is_valid()) { // fm->set_flag(Material::FLAG_UNSHADED,true); // fm->set_flag(Material::FLAG_DOUBLE_SIDED,true); // fm->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - // fm->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); + // fm->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA,true); } } } @@ -1671,12 +1671,12 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map Ref m = mi->get_mesh(); for(int i=0;iget_surface_count();i++) { - Ref fm = m->surface_get_material(i); + Ref fm = m->surface_get_material(i); if (fm.is_valid()) { fm->set_flag(Material::FLAG_UNSHADED,true); fm->set_flag(Material::FLAG_DOUBLE_SIDED,true); fm->set_hint(Material::HINT_NO_DEPTH_DRAW,true); - fm->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); + fm->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA,true); } } }*/ @@ -2044,16 +2044,16 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map for(int i=0;iget_surface_count();i++) { - Ref fm = mesh->surface_get_material(i); + Ref fm = mesh->surface_get_material(i); if (fm.is_valid()) { String name = fm->get_name(); /* if (_teststr(name,"alpha")) { - fm->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); + fm->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA,true); name=_fixstr(name,"alpha"); } if (_teststr(name,"vcol")) { - fm->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY,true); + fm->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY,true); name=_fixstr(name,"vcol"); }*/ fm->set_name(name); diff --git a/tools/editor/io_plugins/editor_scene_importer_fbxconv.cpp b/tools/editor/io_plugins/editor_scene_importer_fbxconv.cpp index f117182365f..a954db829da 100644 --- a/tools/editor/io_plugins/editor_scene_importer_fbxconv.cpp +++ b/tools/editor/io_plugins/editor_scene_importer_fbxconv.cpp @@ -482,29 +482,29 @@ void EditorSceneImporterFBXConv::_parse_materials(State& state) { ERR_CONTINUE(!material.has("id")); String id = _id(material["id"]); - Ref mat = memnew( FixedMaterial ); + Ref mat = memnew( FixedSpatialMaterial ); if (material.has("diffuse")) { - mat->set_parameter(FixedMaterial::PARAM_DIFFUSE,_get_color(material["diffuse"])); + mat->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,_get_color(material["diffuse"])); } if (material.has("specular")) { - mat->set_parameter(FixedMaterial::PARAM_SPECULAR,_get_color(material["specular"])); + mat->set_parameter(FixedSpatialMaterial::PARAM_SPECULAR,_get_color(material["specular"])); } if (material.has("emissive")) { - mat->set_parameter(FixedMaterial::PARAM_EMISSION,_get_color(material["emissive"])); + mat->set_parameter(FixedSpatialMaterial::PARAM_EMISSION,_get_color(material["emissive"])); } if (material.has("shininess")) { float exp = material["shininess"]; - mat->set_parameter(FixedMaterial::PARAM_SPECULAR_EXP,exp); + mat->set_parameter(FixedSpatialMaterial::PARAM_SPECULAR_EXP,exp); } if (material.has("opacity")) { - Color c = mat->get_parameter(FixedMaterial::PARAM_DIFFUSE); + Color c = mat->get_parameter(FixedSpatialMaterial::PARAM_DIFFUSE); c.a=material["opacity"]; - mat->set_parameter(FixedMaterial::PARAM_DIFFUSE,c); + mat->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,c); } @@ -536,15 +536,15 @@ void EditorSceneImporterFBXConv::_parse_materials(State& state) { String type=texture["type"]; if (type=="DIFFUSE") - mat->set_texture(FixedMaterial::PARAM_DIFFUSE,tex); + mat->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,tex); else if (type=="SPECULAR") - mat->set_texture(FixedMaterial::PARAM_SPECULAR,tex); + mat->set_texture(FixedSpatialMaterial::PARAM_SPECULAR,tex); else if (type=="SHININESS") - mat->set_texture(FixedMaterial::PARAM_SPECULAR_EXP,tex); + mat->set_texture(FixedSpatialMaterial::PARAM_SPECULAR_EXP,tex); else if (type=="NORMAL") - mat->set_texture(FixedMaterial::PARAM_NORMAL,tex); + mat->set_texture(FixedSpatialMaterial::PARAM_NORMAL,tex); else if (type=="EMISSIVE") - mat->set_texture(FixedMaterial::PARAM_EMISSION,tex); + mat->set_texture(FixedSpatialMaterial::PARAM_EMISSION,tex); } } diff --git a/tools/editor/plugins/baked_light_baker.cpp b/tools/editor/plugins/baked_light_baker.cpp index 5cdae6aff3a..f31c8adf861 100644 --- a/tools/editor/plugins/baked_light_baker.cpp +++ b/tools/editor/plugins/baked_light_baker.cpp @@ -143,18 +143,18 @@ void BakedLightBaker::_add_mesh(const Ref& p_mesh,const Ref& p_m MeshMaterial mm; - Ref fm = mat; + Ref fm = mat; if (fm.is_valid()) { //fixed route - mm.diffuse.color=fm->get_parameter(FixedMaterial::PARAM_DIFFUSE); + mm.diffuse.color=fm->get_parameter(FixedSpatialMaterial::PARAM_DIFFUSE); if (linear_color) mm.diffuse.color=mm.diffuse.color.to_linear(); - mm.diffuse.tex=_get_mat_tex(fm->get_texture(FixedMaterial::PARAM_DIFFUSE)); - mm.specular.color=fm->get_parameter(FixedMaterial::PARAM_SPECULAR); + mm.diffuse.tex=_get_mat_tex(fm->get_texture(FixedSpatialMaterial::PARAM_DIFFUSE)); + mm.specular.color=fm->get_parameter(FixedSpatialMaterial::PARAM_SPECULAR); if (linear_color) mm.specular.color=mm.specular.color.to_linear(); - mm.specular.tex=_get_mat_tex(fm->get_texture(FixedMaterial::PARAM_SPECULAR)); + mm.specular.tex=_get_mat_tex(fm->get_texture(FixedSpatialMaterial::PARAM_SPECULAR)); } else { mm.diffuse.color=Color(1,1,1,1); diff --git a/tools/editor/plugins/collision_polygon_editor_plugin.cpp b/tools/editor/plugins/collision_polygon_editor_plugin.cpp index ccec7eaed11..472ada116d2 100644 --- a/tools/editor/plugins/collision_polygon_editor_plugin.cpp +++ b/tools/editor/plugins/collision_polygon_editor_plugin.cpp @@ -570,25 +570,25 @@ CollisionPolygonEditor::CollisionPolygonEditor(EditorNode *p_editor) { imgeom->set_transform(Transform(Matrix3(),Vector3(0,0,0.00001))); - line_material = Ref( memnew( FixedMaterial )); + line_material = Ref( memnew( FixedSpatialMaterial )); line_material->set_flag(Material::FLAG_UNSHADED, true); line_material->set_line_width(3.0); - line_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - line_material->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, true); - line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1)); + line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, true); + line_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1)); - handle_material = Ref( memnew( FixedMaterial )); + handle_material = Ref( memnew( FixedSpatialMaterial )); handle_material->set_flag(Material::FLAG_UNSHADED, true); - handle_material->set_fixed_flag(FixedMaterial::FLAG_USE_POINT_SIZE, true); - handle_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1)); - handle_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - handle_material->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, false); + handle_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_POINT_SIZE, true); + handle_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1)); + handle_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + handle_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, false); Ref handle=editor->get_gui_base()->get_icon("Editor3DHandle","EditorIcons"); handle_material->set_point_size(handle->get_width()); - handle_material->set_texture(FixedMaterial::PARAM_DIFFUSE,handle); + handle_material->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,handle); pointsm = memnew( MeshInstance ); imgeom->add_child(pointsm); diff --git a/tools/editor/plugins/collision_polygon_editor_plugin.h b/tools/editor/plugins/collision_polygon_editor_plugin.h index 4c1f45eca97..ed371e81b22 100644 --- a/tools/editor/plugins/collision_polygon_editor_plugin.h +++ b/tools/editor/plugins/collision_polygon_editor_plugin.h @@ -62,8 +62,8 @@ class CollisionPolygonEditor : public HBoxContainer { ToolButton *button_edit; - Ref line_material; - Ref handle_material; + Ref line_material; + Ref handle_material; EditorNode *editor; Panel *panel; diff --git a/tools/editor/plugins/path_editor_plugin.cpp b/tools/editor/plugins/path_editor_plugin.cpp index aa7071298ad..c56d526de8c 100644 --- a/tools/editor/plugins/path_editor_plugin.cpp +++ b/tools/editor/plugins/path_editor_plugin.cpp @@ -523,16 +523,16 @@ PathEditorPlugin::PathEditorPlugin(EditorNode *p_node) { editor=p_node; singleton=this; - path_material = Ref( memnew( FixedMaterial )); - path_material->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(0.5,0.5,1.0,0.8) ); - path_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); + path_material = Ref( memnew( FixedSpatialMaterial )); + path_material->set_parameter( FixedSpatialMaterial::PARAM_DIFFUSE,Color(0.5,0.5,1.0,0.8) ); + path_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); path_material->set_line_width(3); path_material->set_flag(Material::FLAG_DOUBLE_SIDED,true); path_material->set_flag(Material::FLAG_UNSHADED,true); - path_thin_material = Ref( memnew( FixedMaterial )); - path_thin_material->set_parameter( FixedMaterial::PARAM_DIFFUSE,Color(0.5,0.5,1.0,0.4) ); - path_thin_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); + path_thin_material = Ref( memnew( FixedSpatialMaterial )); + path_thin_material->set_parameter( FixedSpatialMaterial::PARAM_DIFFUSE,Color(0.5,0.5,1.0,0.4) ); + path_thin_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); path_thin_material->set_line_width(1); path_thin_material->set_flag(Material::FLAG_DOUBLE_SIDED,true); path_thin_material->set_flag(Material::FLAG_UNSHADED,true); diff --git a/tools/editor/plugins/path_editor_plugin.h b/tools/editor/plugins/path_editor_plugin.h index ee2196b47ff..ab9474f38d6 100644 --- a/tools/editor/plugins/path_editor_plugin.h +++ b/tools/editor/plugins/path_editor_plugin.h @@ -79,8 +79,8 @@ public: Path *get_edited_path() { return path; } static PathEditorPlugin* singleton; - Ref path_material; - Ref path_thin_material; + Ref path_material; + Ref path_thin_material; virtual bool forward_spatial_input_event(Camera* p_camera,const InputEvent& p_event); // virtual bool forward_input_event(const InputEvent& p_event) { return collision_polygon_editor->forward_input_event(p_event); } diff --git a/tools/editor/plugins/spatial_editor_plugin.cpp b/tools/editor/plugins/spatial_editor_plugin.cpp index 38a7706acc1..7a24ab41802 100644 --- a/tools/editor/plugins/spatial_editor_plugin.cpp +++ b/tools/editor/plugins/spatial_editor_plugin.cpp @@ -2548,11 +2548,11 @@ void SpatialEditor::_generate_selection_box() { } - Ref mat = memnew( FixedMaterial ); - /*mat->set_flag(Material::FLAG_UNSHADED,true); - mat->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1)); - mat->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); - mat->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY,true);*/ + Ref mat = memnew( FixedSpatialMaterial ); + mat->set_flag(FixedSpatialMaterial::FLAG_UNSHADED,true); + mat->set_albedo(Color(1,1,1)); + mat->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT,true); + mat->set_flag(FixedSpatialMaterial::FLAG_ALBEDO_FROM_VERTEX_COLOR,true); st->set_material(mat); selection_box = st->commit(); } @@ -3139,11 +3139,11 @@ void SpatialEditor::_init_indicators() { { - indicator_mat = VisualServer::get_singleton()->material_create(); - /*VisualServer::get_singleton()->material_set_flag( indicator_mat, VisualServer::MATERIAL_FLAG_UNSHADED, true ); - VisualServer::get_singleton()->material_set_flag( indicator_mat, VisualServer::MATERIAL_FLAG_ONTOP, false ); - VisualServer::get_singleton()->fixed_material_set_flag(indicator_mat, VisualServer::FIXED_MATERIAL_FLAG_USE_ALPHA,true); - VisualServer::get_singleton()->fixed_material_set_flag(indicator_mat, VisualServer::FIXED_MATERIAL_FLAG_USE_COLOR_ARRAY,true);*/ + indicator_mat.instance();; + indicator_mat->set_flag(FixedSpatialMaterial::FLAG_UNSHADED,true); + indicator_mat->set_flag(FixedSpatialMaterial::FLAG_ONTOP,true); + indicator_mat->set_flag(FixedSpatialMaterial::FLAG_ALBEDO_FROM_VERTEX_COLOR,true); + indicator_mat->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT,true); DVector grid_colors[3]; DVector grid_points[3]; @@ -3186,7 +3186,7 @@ void SpatialEditor::_init_indicators() { d[VisualServer::ARRAY_VERTEX]=grid_points[i]; d[VisualServer::ARRAY_COLOR]=grid_colors[i]; VisualServer::get_singleton()->mesh_add_surface_from_arrays(grid[i],VisualServer::PRIMITIVE_LINES,d); - VisualServer::get_singleton()->mesh_surface_set_material(grid[i],0,indicator_mat); + VisualServer::get_singleton()->mesh_surface_set_material(grid[i],0,indicator_mat->get_rid()); grid_instance[i] = VisualServer::get_singleton()->instance_create2(grid[i],get_tree()->get_root()->get_world()->get_scenario()); grid_visible[i]=false; @@ -3205,7 +3205,7 @@ void SpatialEditor::_init_indicators() { d[VisualServer::ARRAY_COLOR]=origin_colors; VisualServer::get_singleton()->mesh_add_surface_from_arrays(origin,VisualServer::PRIMITIVE_LINES,d); - VisualServer::get_singleton()->mesh_surface_set_material(origin,0,indicator_mat); + VisualServer::get_singleton()->mesh_surface_set_material(origin,0,indicator_mat->get_rid()); // origin = VisualServer::get_singleton()->poly_create(); @@ -3236,17 +3236,15 @@ void SpatialEditor::_init_indicators() { cursor_points.push_back(Vector3(0,-cs,0)); cursor_points.push_back(Vector3(0,0,+cs)); cursor_points.push_back(Vector3(0,0,-cs)); - cursor_material=VisualServer::get_singleton()->material_create(); - /*VisualServer::get_singleton()->fixed_material_set_param(cursor_material,VS::FIXED_MATERIAL_PARAM_DIFFUSE,Color(0,1,1)); - VisualServer::get_singleton()->material_set_flag( cursor_material, VisualServer::MATERIAL_FLAG_UNSHADED, true ); - VisualServer::get_singleton()->fixed_material_set_flag(cursor_material, VisualServer::FIXED_MATERIAL_FLAG_USE_ALPHA,true); - VisualServer::get_singleton()->fixed_material_set_flag(cursor_material, VisualServer::FIXED_MATERIAL_FLAG_USE_COLOR_ARRAY,true);*/ + cursor_material.instance(); + cursor_material->set_albedo(Color(0,1,1)); + cursor_material->set_flag(FixedSpatialMaterial::FLAG_UNSHADED,true); Array d; d.resize(VS::ARRAY_MAX); d[VS::ARRAY_VERTEX]=cursor_points; VisualServer::get_singleton()->mesh_add_surface_from_arrays(cursor_mesh,VS::PRIMITIVE_LINES,d); - VisualServer::get_singleton()->mesh_surface_set_material(cursor_mesh,0,cursor_material); + VisualServer::get_singleton()->mesh_surface_set_material(cursor_mesh,0,cursor_material->get_rid()); cursor_instance = VisualServer::get_singleton()->instance_create2(cursor_mesh,get_tree()->get_root()->get_world()->get_scenario()); VS::get_singleton()->instance_set_layer_mask(cursor_instance,1<get("3d_editor/manipulator_gizmo_opacity"); - gizmo_hl = Ref( memnew( FixedMaterial ) ); - /* gizmo_hl->set_flag(Material::FLAG_UNSHADED, true); - gizmo_hl->set_flag(Material::FLAG_ONTOP, true); - gizmo_hl->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - gizmo_hl->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1,gizmo_alph+0.2f));*/ + gizmo_hl = Ref( memnew( FixedSpatialMaterial ) ); + gizmo_hl->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + gizmo_hl->set_flag(FixedSpatialMaterial::FLAG_ONTOP, true); + gizmo_hl->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); + gizmo_hl->set_albedo(Color(1,1,1,gizmo_alph+0.2f)); for(int i=0;i<3;i++) { @@ -3276,14 +3274,14 @@ void SpatialEditor::_init_indicators() { rotate_gizmo[i]=Ref( memnew( Mesh ) ); - Ref mat = memnew( FixedMaterial ); - /* mat->set_flag(Material::FLAG_UNSHADED, true); - mat->set_flag(Material::FLAG_ONTOP, true); - mat->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); + Ref mat = memnew( FixedSpatialMaterial ); + mat->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + mat->set_flag(FixedSpatialMaterial::FLAG_ONTOP, true); + mat->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); Color col; col[i]=1.0; col.a= gizmo_alph; - mat->set_parameter(FixedMaterial::PARAM_DIFFUSE,col);*/ + mat->set_albedo(col); gizmo_color[i]=mat; @@ -3429,8 +3427,6 @@ void SpatialEditor::_finish_indicators() { VisualServer::get_singleton()->free(cursor_instance); VisualServer::get_singleton()->free(cursor_mesh); - VisualServer::get_singleton()->free(indicator_mat); - VisualServer::get_singleton()->free(cursor_material); } void SpatialEditor::_instance_scene() { diff --git a/tools/editor/plugins/spatial_editor_plugin.h b/tools/editor/plugins/spatial_editor_plugin.h index 4751cf70717..8c8a80bc3ae 100644 --- a/tools/editor/plugins/spatial_editor_plugin.h +++ b/tools/editor/plugins/spatial_editor_plugin.h @@ -331,8 +331,8 @@ private: bool grid_enabled; Ref move_gizmo[3], rotate_gizmo[3]; - Ref gizmo_color[3]; - Ref gizmo_hl; + Ref gizmo_color[3]; + Ref gizmo_hl; int over_gizmo_handle; @@ -344,8 +344,8 @@ private: RID indicators_instance; RID cursor_mesh; RID cursor_instance; - RID indicator_mat; - RID cursor_material; + Ref indicator_mat; + Ref cursor_material; /* struct Selected { diff --git a/tools/editor/spatial_editor_gizmos.cpp b/tools/editor/spatial_editor_gizmos.cpp index 716b03b7b7f..bc76f6c20c9 100644 --- a/tools/editor/spatial_editor_gizmos.cpp +++ b/tools/editor/spatial_editor_gizmos.cpp @@ -2976,25 +2976,25 @@ Ref SpatialEditorGizmos::get_gizmo(Spatial *p_spatial) { } -Ref SpatialEditorGizmos::create_line_material(const Color& p_base_color) { +Ref SpatialEditorGizmos::create_line_material(const Color& p_base_color) { - Ref line_material = Ref( memnew( FixedMaterial )); + Ref line_material = Ref( memnew( FixedSpatialMaterial )); line_material->set_flag(Material::FLAG_UNSHADED, true); line_material->set_line_width(3.0); - line_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - line_material->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, true); - line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,p_base_color); + line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, true); + line_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,p_base_color); return line_material; } -Ref SpatialEditorGizmos::create_solid_material(const Color& p_base_color) { +Ref SpatialEditorGizmos::create_solid_material(const Color& p_base_color) { - Ref line_material = Ref( memnew( FixedMaterial )); + Ref line_material = Ref( memnew( FixedSpatialMaterial )); line_material->set_flag(Material::FLAG_UNSHADED, true); - line_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - line_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,p_base_color); + line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + line_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,p_base_color); return line_material; @@ -3004,50 +3004,50 @@ SpatialEditorGizmos::SpatialEditorGizmos() { singleton=this; - handle_material = Ref( memnew( FixedMaterial )); + handle_material = Ref( memnew( FixedSpatialMaterial )); handle_material->set_flag(Material::FLAG_UNSHADED, true); - handle_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(0.8,0.8,0.8)); + handle_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(0.8,0.8,0.8)); - handle2_material = Ref( memnew( FixedMaterial )); + handle2_material = Ref( memnew( FixedSpatialMaterial )); handle2_material->set_flag(Material::FLAG_UNSHADED, true); - handle2_material->set_fixed_flag(FixedMaterial::FLAG_USE_POINT_SIZE, true); + handle2_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_POINT_SIZE, true); handle_t = SpatialEditor::get_singleton()->get_icon("Editor3DHandle","EditorIcons"); handle2_material->set_point_size(handle_t->get_width()); - handle2_material->set_texture(FixedMaterial::PARAM_DIFFUSE,handle_t); - handle2_material->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1)); - handle2_material->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - handle2_material->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, true); + handle2_material->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,handle_t); + handle2_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1)); + handle2_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + handle2_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, true); light_material = create_line_material(Color(1,1,0.2)); - light_material_omni_icon = Ref( memnew( FixedMaterial )); + light_material_omni_icon = Ref( memnew( FixedSpatialMaterial )); light_material_omni_icon->set_flag(Material::FLAG_UNSHADED, true); light_material_omni_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); light_material_omni_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - light_material_omni_icon->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - light_material_omni_icon->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); - light_material_omni_icon->set_texture(FixedMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoLight","EditorIcons")); + light_material_omni_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + light_material_omni_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); + light_material_omni_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoLight","EditorIcons")); - light_material_directional_icon = Ref( memnew( FixedMaterial )); + light_material_directional_icon = Ref( memnew( FixedSpatialMaterial )); light_material_directional_icon->set_flag(Material::FLAG_UNSHADED, true); light_material_directional_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); light_material_directional_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - light_material_directional_icon->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - light_material_directional_icon->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); - light_material_directional_icon->set_texture(FixedMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoDirectionalLight","EditorIcons")); + light_material_directional_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + light_material_directional_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); + light_material_directional_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoDirectionalLight","EditorIcons")); camera_material = create_line_material(Color(1.0,0.5,1.0)); navmesh_edge_material = create_line_material(Color(0.1,0.8,1.0)); navmesh_solid_material = create_solid_material(Color(0.1,0.8,1.0,0.4)); - navmesh_edge_material->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, false); + navmesh_edge_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, false); navmesh_solid_material->set_flag(Material::FLAG_DOUBLE_SIDED,true); navmesh_edge_material_disabled = create_line_material(Color(1.0,0.8,0.1)); navmesh_solid_material_disabled = create_solid_material(Color(1.0,0.8,0.1,0.4)); - navmesh_edge_material_disabled->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, false); + navmesh_edge_material_disabled->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, false); navmesh_solid_material_disabled->set_flag(Material::FLAG_DOUBLE_SIDED,true); skeleton_material = create_line_material(Color(0.6,1.0,0.3)); @@ -3077,10 +3077,10 @@ SpatialEditorGizmos::SpatialEditorGizmos() { cursor_colors.push_back(Color(0.5,0.5,1,0.7)); cursor_colors.push_back(Color(0.5,0.5,1,0.7)); - Ref mat = memnew( FixedMaterial ); + Ref mat = memnew( FixedSpatialMaterial ); mat->set_flag(Material::FLAG_UNSHADED,true); - mat->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY,true); - mat->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA,true); + mat->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY,true); + mat->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA,true); mat->set_line_width(3); Array d; d.resize(VS::ARRAY_MAX); @@ -3100,10 +3100,10 @@ SpatialEditorGizmos::SpatialEditorGizmos() { cursor_colors.push_back(Color(0.5, 0.5, 0.5, 0.7)); cursor_colors.push_back(Color(0.5, 0.5, 0.5, 0.7)); - Ref mat = memnew(FixedMaterial); + Ref mat = memnew(FixedSpatialMaterial); mat->set_flag(Material::FLAG_UNSHADED, true); - mat->set_fixed_flag(FixedMaterial::FLAG_USE_COLOR_ARRAY, true); - mat->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); + mat->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, true); + mat->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); mat->set_line_width(3); Array d; d.resize(VS::ARRAY_MAX); @@ -3114,13 +3114,13 @@ SpatialEditorGizmos::SpatialEditorGizmos() { } - sample_player_icon = Ref( memnew( FixedMaterial )); + sample_player_icon = Ref( memnew( FixedSpatialMaterial )); sample_player_icon->set_flag(Material::FLAG_UNSHADED, true); sample_player_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); sample_player_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - sample_player_icon->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - sample_player_icon->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); - sample_player_icon->set_texture(FixedMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoSpatialSamplePlayer","EditorIcons")); + sample_player_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + sample_player_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); + sample_player_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoSpatialSamplePlayer","EditorIcons")); room_material = create_line_material(Color(1.0,0.6,0.9)); portal_material = create_line_material(Color(1.0,0.8,0.6)); @@ -3129,29 +3129,29 @@ SpatialEditorGizmos::SpatialEditorGizmos() { visibility_notifier_material = create_line_material(Color(1.0,0.5,1.0)); joint_material = create_line_material(Color(0.6,0.8,1.0)); - stream_player_icon = Ref( memnew( FixedMaterial )); + stream_player_icon = Ref( memnew( FixedSpatialMaterial )); stream_player_icon->set_flag(Material::FLAG_UNSHADED, true); stream_player_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); stream_player_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - stream_player_icon->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - stream_player_icon->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); - stream_player_icon->set_texture(FixedMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoSpatialStreamPlayer","EditorIcons")); + stream_player_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + stream_player_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); + stream_player_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoSpatialStreamPlayer","EditorIcons")); - visibility_notifier_icon = Ref( memnew( FixedMaterial )); + visibility_notifier_icon = Ref( memnew( FixedSpatialMaterial )); visibility_notifier_icon->set_flag(Material::FLAG_UNSHADED, true); visibility_notifier_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); visibility_notifier_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - visibility_notifier_icon->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - visibility_notifier_icon->set_parameter(FixedMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); - visibility_notifier_icon->set_texture(FixedMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("Visible","EditorIcons")); + visibility_notifier_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + visibility_notifier_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); + visibility_notifier_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("Visible","EditorIcons")); - listener_icon = Ref(memnew(FixedMaterial)); + listener_icon = Ref(memnew(FixedSpatialMaterial)); listener_icon->set_flag(Material::FLAG_UNSHADED, true); listener_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); listener_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - listener_icon->set_fixed_flag(FixedMaterial::FLAG_USE_ALPHA, true); - listener_icon->set_parameter(FixedMaterial::PARAM_DIFFUSE, Color(1, 1, 1, 0.9)); - listener_icon->set_texture(FixedMaterial::PARAM_DIFFUSE, SpatialEditor::get_singleton()->get_icon("GizmoListener", "EditorIcons")); + listener_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + listener_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE, Color(1, 1, 1, 0.9)); + listener_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE, SpatialEditor::get_singleton()->get_icon("GizmoListener", "EditorIcons")); { diff --git a/tools/editor/spatial_editor_gizmos.h b/tools/editor/spatial_editor_gizmos.h index a7a6af4b181..64c6a6e8249 100644 --- a/tools/editor/spatial_editor_gizmos.h +++ b/tools/editor/spatial_editor_gizmos.h @@ -464,34 +464,34 @@ public: class SpatialEditorGizmos { public: - Ref create_line_material(const Color& p_base_color); - Ref create_solid_material(const Color& p_base_color); - Ref handle2_material; - Ref handle_material; - Ref light_material; - Ref light_material_omni_icon; - Ref light_material_directional_icon; - Ref camera_material; - Ref skeleton_material; - Ref room_material; - Ref portal_material; - Ref raycast_material; - Ref visibility_notifier_material; - Ref car_wheel_material; - Ref joint_material; + Ref create_line_material(const Color& p_base_color); + Ref create_solid_material(const Color& p_base_color); + Ref handle2_material; + Ref handle_material; + Ref light_material; + Ref light_material_omni_icon; + Ref light_material_directional_icon; + Ref camera_material; + Ref skeleton_material; + Ref room_material; + Ref portal_material; + Ref raycast_material; + Ref visibility_notifier_material; + Ref car_wheel_material; + Ref joint_material; - Ref navmesh_edge_material; - Ref navmesh_solid_material; - Ref navmesh_edge_material_disabled; - Ref navmesh_solid_material_disabled; + Ref navmesh_edge_material; + Ref navmesh_solid_material; + Ref navmesh_edge_material_disabled; + Ref navmesh_solid_material_disabled; - Ref listener_icon; + Ref listener_icon; - Ref sample_player_icon; - Ref stream_player_icon; - Ref visibility_notifier_icon; + Ref sample_player_icon; + Ref stream_player_icon; + Ref visibility_notifier_icon; - Ref shape_material; + Ref shape_material; Ref handle_t; Ref pos3d_mesh; From 51ad1c16683589aa1ebc73e29416e1b0fc50d30d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Sat, 29 Oct 2016 03:34:53 +0200 Subject: [PATCH 017/223] Adopt simpler strategy for big libs on Windows --- drivers/SCsub | 52 +++------------------------------------------------ methods.py | 10 +++++++++- 2 files changed, 12 insertions(+), 50 deletions(-) diff --git a/drivers/SCsub b/drivers/SCsub index 1f1509efa87..9c561b5c13b 100644 --- a/drivers/SCsub +++ b/drivers/SCsub @@ -35,52 +35,6 @@ if (env["tools"]=="yes"): if env['vsproj']=="yes": env.AddToVSProject(env.drivers_sources) - -# Split drivers, this used to be needed for windows until separate builders for windows were created -# FIXME: Check if still needed now that the drivers were made more lightweight -if (env.split_drivers): - import string - - num = 0 - cur_base = "" - max_src = 64 - list = [] - lib_list = [] - - for f in env.drivers_sources: - fname = "" - if type(f) == type(""): - fname = env.File(f).path - else: - fname = env.File(f)[0].path - fname = fname.replace("\\", "/") - base = string.join(fname.split("/")[:2], "/") - if base != cur_base and len(list) > max_src: - if num > 0: - lib = env.Library("drivers"+str(num), list) - lib_list.append(lib) - list = [] - num = num+1 - cur_base = base - list.append(f) - - lib = env.Library("drivers"+str(num), list) - lib_list.append(lib) - - if len(lib_list) > 0: - import os, sys - if os.name=='posix' and sys.platform=='msys': - env.Replace(ARFLAGS=['rcsT']) - - lib = env.Library("drivers_collated", lib_list) - lib_list = [lib] - - drivers_base=[] - env.add_source_files(drivers_base,"*.cpp") - lib_list.insert(0, env.Library("drivers", drivers_base)) - - env.Prepend(LIBS=lib_list) -else: - env.add_source_files(env.drivers_sources,"*.cpp") - lib = env.Library("drivers",env.drivers_sources) - env.Prepend(LIBS=[lib]) +env.add_source_files(env.drivers_sources,"*.cpp") +lib = env.Library("drivers",env.drivers_sources) +env.Prepend(LIBS=[lib]) diff --git a/methods.py b/methods.py index 477fe4f12f2..26c1c6e68e0 100755 --- a/methods.py +++ b/methods.py @@ -1352,7 +1352,15 @@ def use_windows_spawn_fix(self, platform=None): if (os.name!="nt"): return #not needed, only for windows - self.split_drivers=True + # On Windows, due to the limited command line length, when creating a static library + # from a very high number of objects SCons will invoke "ar" once per object file; + # that makes object files with same names to be overwritten so the last wins and + # the library looses symbols defined by overwritten objects. + # By enabling quick append instead of the default mode (replacing), libraries will + # got built correctly regardless the invokation strategy. + # Furthermore, since SCons will rebuild the library from scratch when an object file + # changes, no multiple versions of the same object file will be present. + self.Replace(ARFLAGS='q') import subprocess From 313b7a50b6a51767fe21d3fce0e782b399bee2e1 Mon Sep 17 00:00:00 2001 From: Ignacio Etcheverry Date: Sat, 29 Oct 2016 22:45:26 +0200 Subject: [PATCH 018/223] Core: Register TriangleMesh type --- core/register_core_types.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/register_core_types.cpp b/core/register_core_types.cpp index c3a127afb9d..4ceda6fc51d 100644 --- a/core/register_core_types.cpp +++ b/core/register_core_types.cpp @@ -34,6 +34,7 @@ #include "os/main_loop.h" #include "io/packet_peer.h" #include "math/a_star.h" +#include "math/triangle_mesh.h" #include "globals.h" #include "object_type_db.h" #include "geometry.h" @@ -134,6 +135,7 @@ void register_core_types() { ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); + ObjectTypeDB::register_type(); ObjectTypeDB::register_virtual_type(); From d6567010bf1c65abcbe09b959cde63664778d923 Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Sat, 29 Oct 2016 20:48:09 -0300 Subject: [PATCH 019/223] -Many many fixes -Gizmos work again --- drivers/gles3/rasterizer_canvas_gles3.cpp | 23 +- drivers/gles3/rasterizer_gles3.cpp | 6 +- drivers/gles3/rasterizer_scene_gles3.cpp | 316 ++++++++++++++---- drivers/gles3/rasterizer_scene_gles3.h | 17 +- drivers/gles3/rasterizer_storage_gles3.cpp | 216 ++++++------ drivers/gles3/rasterizer_storage_gles3.h | 40 ++- drivers/gles3/shader_compiler_gles3.cpp | 88 ++++- drivers/gles3/shader_compiler_gles3.h | 2 + drivers/gles3/shaders/copy.glsl | 27 +- drivers/gles3/shaders/cubemap_filter.glsl | 2 +- drivers/gles3/shaders/scene.glsl | 76 +++-- methods.py | 1 + scene/3d/visual_instance.h | 1 + scene/resources/environment.cpp | 224 +++++++++++++ scene/resources/environment.h | 69 ++++ scene/resources/material.cpp | 69 +++- scene/resources/material.h | 16 +- servers/visual/rasterizer.h | 9 +- servers/visual/visual_server_raster.h | 11 +- servers/visual/visual_server_scene.cpp | 78 ++++- servers/visual/visual_server_scene.h | 2 +- servers/visual_server.h | 13 +- tools/editor/editor_node.cpp | 2 +- .../editor/plugins/spatial_editor_plugin.cpp | 9 +- tools/editor/plugins/spatial_editor_plugin.h | 2 +- tools/editor/spatial_editor_gizmos.cpp | 173 +++++----- tools/editor/spatial_editor_gizmos.h | 3 +- 27 files changed, 1143 insertions(+), 352 deletions(-) diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index b2228a6cfa0..c10c5fee651 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -914,6 +914,7 @@ void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const } + if (shader_ptr && shader_ptr!=shader_cache) { state.canvas_shader.set_custom_shader(shader_ptr->custom_code_id); @@ -925,6 +926,7 @@ void RasterizerCanvasGLES3::canvas_render_items(Item *p_item_list,int p_z,const int tc = material_ptr->textures.size(); RID* textures = material_ptr->textures.ptr(); + ShaderLanguage::ShaderNode::Uniform::Hint* texture_hints = shader_ptr->texture_hints.ptr(); for(int i=0;itexture_owner.getornull( textures[i] ); if (!t) { + + switch(texture_hints[i]) { + case ShaderLanguage::ShaderNode::Uniform::HINT_BLACK: { + glBindTexture(GL_TEXTURE_2D,storage->resources.black_tex); + } break; + case ShaderLanguage::ShaderNode::Uniform::HINT_NORMAL: { + glBindTexture(GL_TEXTURE_2D,storage->resources.normal_tex); + } break; + default: { + glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + } break; + } + //check hints - glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + continue; } + if (storage->config.srgb_decode_supported && t->using_srgb) { + //no srgb in 2D + glTexParameteri(t->target,_TEXTURE_SRGB_DECODE_EXT,_SKIP_DECODE_EXT); + t->using_srgb=false; + } + glBindTexture(t->target,t->tex_id); } diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index 83c40edc1da..e838020c45d 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -253,10 +253,8 @@ void RasterizerGLES3::make_current() { void RasterizerGLES3::register_config() { - GLOBAL_DEF("rendering/gles3/framebuffer_format",RasterizerStorageGLES3::FBO_FORMAT_FLOAT); - Globals::get_singleton()->set_custom_property_info("rendering/gles3/framebuffer_format",PropertyInfo(Variant::INT,"",PROPERTY_HINT_ENUM,"16 Bits,32 Bits,Half Float")); - GLOBAL_DEF("rendering/gles3/lighting_technique",1); - Globals::get_singleton()->set_custom_property_info("rendering/gles3/lighting_technique",PropertyInfo(Variant::INT,"",PROPERTY_HINT_ENUM,"Forward,Deferred")); + GLOBAL_DEF("rendering/gles3/render_architecture",0); + Globals::get_singleton()->set_custom_property_info("rendering/gles3/render_architecture",PropertyInfo(Variant::INT,"",PROPERTY_HINT_ENUM,"Desktop,Mobile")); GLOBAL_DEF("rendering/gles3/use_nearest_mipmap_filter",false); GLOBAL_DEF("rendering/gles3/anisotropic_filter_level",4.0); diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index aadf9e63360..f7baf1a30b4 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -150,27 +150,19 @@ void RasterizerSceneGLES3::environment_set_fog(RID p_env,bool p_enable,float p_b } -void RasterizerSceneGLES3::environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,VS::EnvironmentToneMapper p_tone_mapper){ - -} -void RasterizerSceneGLES3::environment_set_brightness(RID p_env,bool p_enable,float p_brightness){ - -} -void RasterizerSceneGLES3::environment_set_contrast(RID p_env,bool p_enable,float p_contrast){ - -} -void RasterizerSceneGLES3::environment_set_saturation(RID p_env,bool p_enable,float p_saturation){ - -} -void RasterizerSceneGLES3::environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp){ +void RasterizerSceneGLES3::environment_set_tonemap(RID p_env, bool p_enable, float p_exposure, float p_white, float p_min_luminance, float p_max_luminance, float p_auto_exp_speed, float p_auto_exp_scale, VS::EnvironmentToneMapper p_tone_mapper){ } +void RasterizerSceneGLES3::environment_set_adjustment(RID p_env,bool p_enable,float p_brightness,float p_contrast,float p_saturation,RID p_ramp) { +} + RID RasterizerSceneGLES3::light_instance_create(RID p_light) { + print_line("hello light"); LightInstance *light_instance = memnew( LightInstance ); light_instance->light=p_light; @@ -204,6 +196,29 @@ bool RasterizerSceneGLES3::_setup_material(RasterizerStorageGLES3::Material* p_m glEnable(GL_CULL_FACE); } + if (state.current_line_width!=p_material->line_width) { + glLineWidth(p_material->line_width); + state.current_line_width=p_material->line_width; + } + + if (state.current_depth_draw!=p_material->shader->spatial.depth_draw_mode) { + switch(p_material->shader->spatial.depth_draw_mode) { + case RasterizerStorageGLES3::Shader::Spatial::DEPTH_DRAW_ALPHA_PREPASS: + case RasterizerStorageGLES3::Shader::Spatial::DEPTH_DRAW_OPAQUE: { + + glDepthMask(!p_alpha_pass); + } break; + case RasterizerStorageGLES3::Shader::Spatial::DEPTH_DRAW_ALWAYS: { + glDepthMask(GL_TRUE); + } break; + case RasterizerStorageGLES3::Shader::Spatial::DEPTH_DRAW_NEVER: { + glDepthMask(GL_FALSE); + } break; + } + + state.current_depth_draw=p_material->shader->spatial.depth_draw_mode; + } + //glPolygonMode(GL_FRONT_AND_BACK,GL_LINE); /* @@ -216,7 +231,7 @@ bool RasterizerSceneGLES3::_setup_material(RasterizerStorageGLES3::Material* p_m //if (p_material->line_width) // glLineWidth(p_material->line_width); - +#if 0 //blend mode if (state.current_blend_mode!=p_material->shader->spatial.blend_mode) { @@ -256,7 +271,7 @@ bool RasterizerSceneGLES3::_setup_material(RasterizerStorageGLES3::Material* p_m state.current_blend_mode=p_material->shader->spatial.blend_mode; } - +#endif //material parameters @@ -273,6 +288,7 @@ bool RasterizerSceneGLES3::_setup_material(RasterizerStorageGLES3::Material* p_m int tc = p_material->textures.size(); RID* textures = p_material->textures.ptr(); + ShaderLanguage::ShaderNode::Uniform::Hint* texture_hints = p_material->shader->texture_hints.ptr(); for(int i=0;itexture_owner.getornull( textures[i] ); if (!t) { //check hints + switch(texture_hints[i]) { + case ShaderLanguage::ShaderNode::Uniform::HINT_BLACK: { + glBindTexture(GL_TEXTURE_2D,storage->resources.black_tex); + } break; + case ShaderLanguage::ShaderNode::Uniform::HINT_NORMAL: { + glBindTexture(GL_TEXTURE_2D,storage->resources.normal_tex); + } break; + default: { + glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); + } break; + } glBindTexture(GL_TEXTURE_2D,storage->resources.white_tex); continue; } + if (storage->config.srgb_decode_supported) { + //if SRGB decode extension is present, simply switch the texture to whathever is needed + bool must_srgb=false; + + if (t->srgb && texture_hints[i]==ShaderLanguage::ShaderNode::Uniform::HINT_ALBEDO) { + must_srgb=true; + } + + if (t->using_srgb!=must_srgb) { + if (must_srgb) { + glTexParameteri(t->target,_TEXTURE_SRGB_DECODE_EXT,_DECODE_EXT); +#ifdef TOOLS_ENABLED + if (!(t->flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)) { + t->flags|=VS::TEXTURE_FLAG_CONVERT_TO_LINEAR; + //notify that texture must be set to linear beforehand, so it works in other platforms when exported + } +#endif + + } else { + glTexParameteri(t->target,_TEXTURE_SRGB_DECODE_EXT,_SKIP_DECODE_EXT); + } + t->using_srgb=must_srgb; + } + } + + glBindTexture(t->target,t->tex_id); } @@ -348,7 +401,60 @@ void RasterizerSceneGLES3::_setup_light(LightInstance *p_light) { glBindBufferBase(GL_UNIFORM_BUFFER,3,p_light->light_ubo); //bind light uniform } +void RasterizerSceneGLES3::_setup_transform(InstanceBase *p_instance,const Transform& p_view_transform,const CameraMatrix& p_projection) { + if (p_instance->billboard || p_instance->billboard_y || p_instance->depth_scale) { + + Transform xf=p_instance->transform; + if (p_instance->depth_scale) { + + if (p_projection.matrix[3][3]) { + //orthogonal matrix, try to do about the same + //with viewport size + //real_t w = Math::abs( 1.0/(2.0*(p_projection.matrix[0][0])) ); + real_t h = Math::abs( 1.0/(2.0*p_projection.matrix[1][1]) ); + float sc = (h*2.0); //consistent with Y-fov + xf.basis.scale( Vector3(sc,sc,sc)); + } else { + //just scale by depth + real_t sc = Plane(p_view_transform.origin,-p_view_transform.get_basis().get_axis(2)).distance_to(xf.origin); + xf.basis.scale( Vector3(sc,sc,sc)); + } + } + + if (p_instance->billboard) { + + Vector3 scale = xf.basis.get_scale(); + + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) { + xf.set_look_at(xf.origin, xf.origin + p_view_transform.get_basis().get_axis(2), -p_view_transform.get_basis().get_axis(1)); + } else { + xf.set_look_at(xf.origin, xf.origin + p_view_transform.get_basis().get_axis(2), p_view_transform.get_basis().get_axis(1)); + } + + xf.basis.scale(scale); + } + + if (p_instance->billboard_y) { + + Vector3 scale = xf.basis.get_scale(); + Vector3 look_at = p_view_transform.get_origin(); + look_at.y = 0.0; + Vector3 look_at_norm = look_at.normalized(); + + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) { + xf.set_look_at(xf.origin,xf.origin + look_at_norm, Vector3(0.0, -1.0, 0.0)); + } else { + xf.set_look_at(xf.origin,xf.origin + look_at_norm, Vector3(0.0, 1.0, 0.0)); + } + xf.basis.scale(scale); + } + state.scene_shader.set_uniform(SceneShaderGLES3::WORLD_TRANSFORM, xf); + + } else { + state.scene_shader.set_uniform(SceneShaderGLES3::WORLD_TRANSFORM, p_instance->transform); + } +} void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements,int p_element_count,const Transform& p_view_transform,const CameraMatrix& p_projection,RasterizerStorageGLES3::Texture* p_base_env,bool p_reverse_cull,bool p_alpha_pass) { @@ -372,12 +478,18 @@ void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements,int p_e glActiveTexture(GL_TEXTURE0+storage->config.max_texture_image_units-2); glBindTexture(p_base_env->target,p_base_env->tex_id); state.scene_shader.set_conditional(SceneShaderGLES3::USE_RADIANCE_CUBEMAP,true); + } else { + state.scene_shader.set_conditional(SceneShaderGLES3::USE_RADIANCE_CUBEMAP,false); + } + state.scene_shader.set_conditional(SceneShaderGLES3::USE_SKELETON,false); state.current_blend_mode=-1; + state.current_line_width=-1; + state.current_depth_draw=-1; glDisable(GL_BLEND); @@ -390,6 +502,8 @@ void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements,int p_e int prev_blend=-1; int current_blend_mode=-1; + bool prev_additive=false; + for (int i=0;imirror,p_reverse_cull); state.scene_shader.set_uniform(SceneShaderGLES3::NORMAL_MULT, e->instance->mirror?-1.0:1.0); - state.scene_shader.set_uniform(SceneShaderGLES3::WORLD_TRANSFORM, e->instance->transform); + + _setup_transform(e->instance,p_view_transform,p_projection); // _render(e->geometry, material, skeleton,e->owner,e->instance->transform); @@ -567,6 +687,10 @@ void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements,int p_e prev_material=material; prev_base_type=e->instance->base_type; prev_geometry=e->geometry; + prev_additive=additive; + prev_light_type=light_type; + prev_light_index=light_index; + } //print_line("shaderchanges: "+itos(p_alpha_pass)+": "+itos(_rinfo.shader_change_count)); @@ -824,6 +948,19 @@ void RasterizerSceneGLES3::_draw_skybox(RID p_skybox,CameraMatrix& p_projection, glActiveTexture(GL_TEXTURE0); glBindTexture(tex->target,tex->tex_id); + + if (storage->config.srgb_decode_supported && tex->srgb && !tex->using_srgb) { + + glTexParameteri(tex->target,_TEXTURE_SRGB_DECODE_EXT,_DECODE_EXT); + tex->using_srgb=true; +#ifdef TOOLS_ENABLED + if (!(tex->flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)) { + tex->flags|=VS::TEXTURE_FLAG_CONVERT_TO_LINEAR; + //notify that texture must be set to linear beforehand, so it works in other platforms when exported + } +#endif + } + glDepthMask(GL_TRUE); glEnable(GL_DEPTH_TEST); glDisable(GL_CULL_FACE); @@ -876,6 +1013,7 @@ void RasterizerSceneGLES3::_draw_skybox(RID p_skybox,CameraMatrix& p_projection, glDrawArrays(GL_TRIANGLE_FAN,0,4); glBindVertexArray(0); + glColorMask(1,1,1,1); storage->shaders.copy.set_conditional(CopyShaderGLES3::USE_CUBEMAP,false); @@ -977,9 +1115,10 @@ void RasterizerSceneGLES3::_setup_lights(RID *p_light_cull_result,int p_light_cu ERR_FAIL_COND( directional_light_instance_count >= RenderList::MAX_LIGHTS); directional_light_instances[directional_light_instance_count++]=li; - li->light_ubo_data.light_color_energy[0]=li->light_ptr->color.r; - li->light_ubo_data.light_color_energy[1]=li->light_ptr->color.g; - li->light_ubo_data.light_color_energy[2]=li->light_ptr->color.b; + Color linear_col = li->light_ptr->color.to_linear(); + li->light_ubo_data.light_color_energy[0]=linear_col.r; + li->light_ubo_data.light_color_energy[1]=linear_col.g; + li->light_ubo_data.light_color_energy[2]=linear_col.b; li->light_ubo_data.light_color_energy[3]=li->light_ptr->param[VS::LIGHT_PARAM_ENERGY]; //omni, keep at 0 @@ -1018,9 +1157,10 @@ void RasterizerSceneGLES3::_setup_lights(RID *p_light_cull_result,int p_light_cu } break; case VS::LIGHT_OMNI: { - li->light_ubo_data.light_color_energy[0]=li->light_ptr->color.r; - li->light_ubo_data.light_color_energy[1]=li->light_ptr->color.g; - li->light_ubo_data.light_color_energy[2]=li->light_ptr->color.b; + Color linear_col = li->light_ptr->color.to_linear(); + li->light_ubo_data.light_color_energy[0]=linear_col.r; + li->light_ubo_data.light_color_energy[1]=linear_col.g; + li->light_ubo_data.light_color_energy[2]=linear_col.b; li->light_ubo_data.light_color_energy[3]=li->light_ptr->param[VS::LIGHT_PARAM_ENERGY]; Vector3 pos = p_camera_inverse_transform.xform(li->transform.origin); @@ -1050,9 +1190,10 @@ void RasterizerSceneGLES3::_setup_lights(RID *p_light_cull_result,int p_light_cu } break; case VS::LIGHT_SPOT: { - li->light_ubo_data.light_color_energy[0]=li->light_ptr->color.r; - li->light_ubo_data.light_color_energy[1]=li->light_ptr->color.g; - li->light_ubo_data.light_color_energy[2]=li->light_ptr->color.b; + Color linear_col = li->light_ptr->color.to_linear(); + li->light_ubo_data.light_color_energy[0]=linear_col.r; + li->light_ubo_data.light_color_energy[1]=linear_col.g; + li->light_ubo_data.light_color_energy[2]=linear_col.b; li->light_ubo_data.light_color_energy[3]=li->light_ptr->param[VS::LIGHT_PARAM_ENERGY]; Vector3 pos = p_camera_inverse_transform.xform(li->transform.origin); @@ -1108,6 +1249,51 @@ void RasterizerSceneGLES3::_setup_lights(RID *p_light_cull_result,int p_light_cu } +void RasterizerSceneGLES3::_copy_screen() { + + glBindVertexArray(storage->resources.quadie_array); + glDrawArrays(GL_TRIANGLE_FAN,0,4); + glBindVertexArray(0); + +} + +void RasterizerSceneGLES3::_copy_to_front_buffer(Environment *env) { + + //copy to front buffer + glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->front.fbo); + + glDepthMask(GL_FALSE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_CULL_FACE); + glDisable(GL_BLEND); + glDepthFunc(GL_LEQUAL); + glColorMask(1,1,1,1); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D,storage->frame.current_rt->buffers.diffuse); + + storage->shaders.copy.set_conditional(CopyShaderGLES3::DISABLE_ALPHA,true); + + if (!env) { + //no environment, simply convert from linear to srgb + storage->shaders.copy.set_conditional(CopyShaderGLES3::LINEAR_TO_SRGB,true); + } else { + storage->shaders.copy.set_conditional(CopyShaderGLES3::LINEAR_TO_SRGB,true); + + } + + storage->shaders.copy.bind(); + + _copy_screen(); + + + //turn off everything used + storage->shaders.copy.set_conditional(CopyShaderGLES3::LINEAR_TO_SRGB,false); + storage->shaders.copy.set_conditional(CopyShaderGLES3::DISABLE_ALPHA,false); + + +} + void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraMatrix& p_cam_projection,bool p_cam_ortogonal,InstanceBase** p_cull_result,int p_cull_count,RID* p_light_cull_result,int p_light_cull_count,RID* p_directional_lights,int p_directional_light_count,RID p_environment){ //first of all, make a new render pass @@ -1125,6 +1311,8 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM current_material_index=0; + bool use_mrt=false; + //fill list for(int i=0;iframe.current_rt->front.fbo); - RasterizerStorageGLES3::Texture* env_radiance_tex; + RasterizerStorageGLES3::Texture* env_radiance_tex=NULL; + + if (use_mrt) { + + glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->buffers.fbo); + state.scene_shader.set_conditional(SceneShaderGLES3::USE_MULTIPLE_RENDER_TARGETS,true); + + Color black(0,0,0,0); + glClearBufferfv(GL_COLOR,1,black.components); // specular + glClearBufferfv(GL_COLOR,2,black.components); // normal metal rough + + } else { + + glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->buffers.alpha_fbo); + state.scene_shader.set_conditional(SceneShaderGLES3::USE_MULTIPLE_RENDER_TARGETS,false); + + } + + + glClearDepth(1.0); + glClear(GL_DEPTH_BUFFER_BIT); + + Color clear_color(0,0,0,0); if (!env || env->bg_mode==VS::ENV_BG_CLEAR_COLOR) { if (storage->frame.clear_request) { - glClearColor( storage->frame.clear_request_color.r, storage->frame.clear_request_color.g, storage->frame.clear_request_color.b, storage->frame.clear_request_color.a ); - glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); + clear_color = storage->frame.clear_request_color.to_linear(); storage->frame.clear_request=false; } } else if (env->bg_mode==VS::ENV_BG_COLOR) { - - glClearColor( env->bg_color.r, env->bg_color.g, env->bg_color.b, env->bg_color.a ); - glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); + clear_color = env->bg_color.to_linear(); storage->frame.clear_request=false; } else if (env->bg_mode==VS::ENV_BG_SKYBOX) { if (env->skybox_radiance.is_valid()) { env_radiance_tex = storage->texture_owner.getornull(env->skybox_radiance); } - glClear(GL_DEPTH_BUFFER_BIT); storage->frame.clear_request=false; } else { - glClear(GL_DEPTH_BUFFER_BIT); storage->frame.clear_request=false; - } - state.current_depth_test=true; - state.current_depth_mask=true; + glClearBufferfv(GL_COLOR,0,clear_color.components); // specular + + state.texscreen_copied=false; glBlendEquation(GL_FUNC_ADD); @@ -1214,43 +1417,33 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM } glDisable(GL_BLEND); - //current_blend_mode=VS::MATERIAL_BLEND_MODE_MIX; - render_list.sort_by_key(false); - //_render_list_forward(&opaque_render_list,camera_transform,camera_transform_inverse,camera_projection,false,fragment_lighting); -/* - if (draw_tex_background) { - - //most 3D vendors recommend drawing a texture bg or skybox here, - //after opaque geometry has been drawn - //so the zbuffer can get rid of most pixels - _draw_tex_bg(); - } -*/ if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); } else { glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); } -// glDisable(GL_BLEND); -// current_blend_mode=VS::MATERIAL_BLEND_MODE_MIX; -// state.scene_shader.set_conditional(SceneShaderGLES3::USE_GLOW,false); -// if (current_env && current_env->fx_enabled[VS::ENV_FX_GLOW]) { -// glColorMask(1,1,1,0); //don't touch alpha -// } - - _render_list(render_list.elements,render_list.element_count,p_cam_transform,p_cam_projection,env_radiance_tex,false,false); + state.scene_shader.set_conditional(SceneShaderGLES3::USE_MULTIPLE_RENDER_TARGETS,false); + if (env && env->bg_mode==VS::ENV_BG_SKYBOX) { + if (use_mrt) { + glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->buffers.alpha_fbo); //switch to alpha fbo for skybox, only diffuse/ambient matters + } + _draw_skybox(env->skybox_color,p_cam_projection,p_cam_transform,storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP],env->skybox_scale); } + + + + //_render_list_forward(&alpha_render_list,camera_transform,camera_transform_inverse,camera_projection,false,fragment_lighting,true); //glColorMask(1,1,1,1); @@ -1261,13 +1454,15 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM glDepthMask(GL_TRUE); glEnable(GL_DEPTH_TEST); glDisable(GL_SCISSOR_TEST); - glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->front.fbo); + glBindFramebuffer(GL_FRAMEBUFFER,storage->frame.current_rt->buffers.alpha_fbo); render_list.sort_by_depth(true); - _render_list(&render_list.elements[render_list.max_elements-render_list.alpha_element_count],render_list.alpha_element_count,p_cam_transform,p_cam_projection,env_radiance_tex,false,false); + _render_list(&render_list.elements[render_list.max_elements-render_list.alpha_element_count],render_list.alpha_element_count,p_cam_transform,p_cam_projection,env_radiance_tex,false,true); + _copy_to_front_buffer(env); + #if 0 if (use_fb) { @@ -1418,6 +1613,7 @@ bool RasterizerSceneGLES3::free(RID p_rid) { if (light_instance_owner.owns(p_rid)) { + print_line("bye light"); LightInstance *light_instance = light_instance_owner.getptr(p_rid); glDeleteBuffers(1,&light_instance->light_ubo); light_instance_owner.free(p_rid); diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h index 4b1b77f13c1..5457b5cde3b 100644 --- a/drivers/gles3/rasterizer_scene_gles3.h +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -18,10 +18,11 @@ public: struct State { - bool current_depth_test; - bool current_depth_mask; + bool texscreen_copied; int current_blend_mode; + float current_line_width; + int current_depth_draw; SceneShaderGLES3 scene_shader; @@ -57,6 +58,8 @@ public: GLuint skybox_verts; GLuint skybox_array; + + } state; @@ -109,11 +112,8 @@ public: virtual void environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,VS::EnvironmentGlowBlendMode p_blend_mode); virtual void environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture); - virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,VS::EnvironmentToneMapper p_tone_mapper); - virtual void environment_set_brightness(RID p_env,bool p_enable,float p_brightness); - virtual void environment_set_contrast(RID p_env,bool p_enable,float p_contrast); - virtual void environment_set_saturation(RID p_env,bool p_enable,float p_saturation); - virtual void environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp); + virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,float p_auto_exp_scale,VS::EnvironmentToneMapper p_tone_mapper); + virtual void environment_set_adjustment(RID p_env,bool p_enable,float p_brightness,float p_contrast,float p_saturation,RID p_ramp); /* LIGHT INSTANCE */ @@ -314,6 +314,7 @@ public: RenderList render_list; _FORCE_INLINE_ bool _setup_material(RasterizerStorageGLES3::Material* p_material,bool p_alpha_pass); + _FORCE_INLINE_ void _setup_transform(InstanceBase *p_instance,const Transform& p_view_transform,const CameraMatrix& p_projection); _FORCE_INLINE_ void _setup_geometry(RenderList::Element *e); _FORCE_INLINE_ void _render_geometry(RenderList::Element *e); _FORCE_INLINE_ void _setup_light(LightInstance *p_light); @@ -327,6 +328,8 @@ public: void _setup_environment(Environment *env,CameraMatrix& p_cam_projection, const Transform& p_cam_transform); void _setup_lights(RID *p_light_cull_result, int p_light_cull_count, const Transform &p_camera_inverse_transform); + void _copy_screen(); + void _copy_to_front_buffer(Environment *env); virtual void render_scene(const Transform& p_cam_transform,CameraMatrix& p_cam_projection,bool p_cam_ortogonal,InstanceBase** p_cull_result,int p_cull_count,RID* p_light_cull_result,int p_light_cull_count,RID* p_directional_lights,int p_directional_light_count,RID p_environment); diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index 1141a605c5e..a678c460f59 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -57,9 +57,6 @@ #define _EXT_ATC_RGBA_INTERPOLATED_ALPHA_AMD 0x87EE -#define _TEXTURE_SRGB_DECODE_EXT 0x8A48 -#define _DECODE_EXT 0x8A49 -#define _SKIP_DECODE_EXT 0x8A4A #define _GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE @@ -679,8 +676,10 @@ void RasterizerStorageGLES3::texture_set_data(RID p_texture,const Image& p_image if (texture->flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR) { glTexParameteri(texture->target,_TEXTURE_SRGB_DECODE_EXT,_DECODE_EXT); + texture->using_srgb=true; } else { glTexParameteri(texture->target,_TEXTURE_SRGB_DECODE_EXT,_SKIP_DECODE_EXT); + texture->using_srgb=false; } } @@ -892,8 +891,10 @@ void RasterizerStorageGLES3::texture_set_flags(RID p_texture,uint32_t p_flags) { if (texture->flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR) { glTexParameteri(texture->target,_TEXTURE_SRGB_DECODE_EXT,_DECODE_EXT); + texture->using_srgb=true; } else { glTexParameteri(texture->target,_TEXTURE_SRGB_DECODE_EXT,_SKIP_DECODE_EXT); + texture->using_srgb=false; } } @@ -1024,6 +1025,19 @@ RID RasterizerStorageGLES3::texture_create_radiance_cubemap(RID p_source,int p_r glActiveTexture(GL_TEXTURE0); glBindTexture(texture->target, texture->tex_id); + if (config.srgb_decode_supported && texture->srgb && !texture->using_srgb) { + + glTexParameteri(texture->target,_TEXTURE_SRGB_DECODE_EXT,_DECODE_EXT); + texture->using_srgb=true; +#ifdef TOOLS_ENABLED + if (!(texture->flags&VS::TEXTURE_FLAG_CONVERT_TO_LINEAR)) { + texture->flags|=VS::TEXTURE_FLAG_CONVERT_TO_LINEAR; + //notify that texture must be set to linear beforehand, so it works in other platforms when exported + } +#endif + } + + glActiveTexture(GL_TEXTURE1); GLuint new_cubemap; glGenTextures(1, &new_cubemap); @@ -1302,6 +1316,7 @@ void RasterizerStorageGLES3::_update_shader(Shader* p_shader) const { p_shader->ubo_size=gen_code.uniform_total_size; p_shader->ubo_offsets=gen_code.uniform_offsets; p_shader->texture_count=gen_code.texture_uniforms.size(); + p_shader->texture_hints=gen_code.texture_hints; //all materials using this shader will have to be invalidated, unfortunately @@ -1510,7 +1525,17 @@ Variant RasterizerStorageGLES3::material_get_param(RID p_material, const StringN return Variant(); } -_FORCE_INLINE_ static void _fill_std140_variant_ubo_value(ShaderLanguage::DataType type, const Variant& value, uint8_t *data) { +void RasterizerStorageGLES3::material_set_line_width(RID p_material, float p_width) { + + Material *material = material_owner.get( p_material ); + ERR_FAIL_COND(!material); + + material->line_width=p_width; + + +} + +_FORCE_INLINE_ static void _fill_std140_variant_ubo_value(ShaderLanguage::DataType type, const Variant& value, uint8_t *data,bool p_linear_color) { switch(type) { case ShaderLanguage::TYPE_BOOL: { @@ -1683,6 +1708,10 @@ _FORCE_INLINE_ static void _fill_std140_variant_ubo_value(ShaderLanguage::DataTy if (value.get_type()==Variant::COLOR) { Color v=value; + if (p_linear_color) { + v=v.to_linear(); + } + gui[0]=v.r; gui[1]=v.g; gui[2]=v.b; @@ -2019,7 +2048,7 @@ void RasterizerStorageGLES3::_update_material(Material* material) { if (V) { //user provided - _fill_std140_variant_ubo_value(E->get().type,V->get(),data); + _fill_std140_variant_ubo_value(E->get().type,V->get(),data,material->shader->mode==VS::SHADER_SPATIAL); } else if (E->get().default_value.size()){ //default value _fill_std140_ubo_value(E->get().type,E->get().default_value,data); @@ -2641,9 +2670,7 @@ void RasterizerStorageGLES3::mesh_remove_surface(RID p_mesh, int p_surface){ Surface *surface = mesh->surfaces[p_surface]; - ERR_FAIL_COND(surface->index_array_len==0); - - glDeleteBuffers(1,&surface->array_id); + glDeleteBuffers(1,&surface->vertex_id); if (surface->index_id) { glDeleteBuffers(1,&surface->index_id); } @@ -3211,14 +3238,14 @@ void RasterizerStorageGLES3::_render_target_clear(RenderTarget *rt) { rt->back.fbo=0; } - if (rt->deferred.fbo) { - glDeleteFramebuffers(1,&rt->deferred.fbo); - glDeleteFramebuffers(1,&rt->deferred.fbo_color); - glDeleteTextures(1,&rt->deferred.albedo_ao); - glDeleteTextures(1,&rt->deferred.normal_special); - glDeleteTextures(1,&rt->deferred.metal_rough_motion); - rt->deferred.fbo=0; - rt->deferred.fbo_color=0; + if (rt->buffers.fbo) { + glDeleteFramebuffers(1,&rt->buffers.fbo); + glDeleteFramebuffers(1,&rt->buffers.alpha_fbo); + glDeleteTextures(1,&rt->buffers.diffuse); + glDeleteTextures(1,&rt->buffers.specular); + glDeleteTextures(1,&rt->buffers.normal_sr); + rt->buffers.fbo=0; + rt->buffers.alpha_fbo=0; } if (rt->depth) { @@ -3239,26 +3266,6 @@ void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ if (rt->width<=0 || rt->height<=0) return; - glActiveTexture(GL_TEXTURE0); - - glGenFramebuffers(1, &rt->front.fbo); - glBindFramebuffer(GL_FRAMEBUFFER, rt->front.fbo); - - - glGenRenderbuffers(1, &rt->depth); - glBindRenderbuffer(GL_RENDERBUFFER, rt->depth ); - if (config.fbo_format==FBO_FORMAT_16_BITS) { - glRenderbufferStorage(GL_RENDERBUFFER,GL_DEPTH_COMPONENT16, rt->width, rt->height); - } else { - glRenderbufferStorage(GL_RENDERBUFFER,GL_DEPTH24_STENCIL8, rt->width, rt->height); - } - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rt->depth); - glBindRenderbuffer(GL_RENDERBUFFER, 0 ); - - - glGenTextures(1, &rt->front.color); - glBindTexture(GL_TEXTURE_2D, rt->front.color); - GLuint color_internal_format; GLuint color_format; @@ -3266,21 +3273,8 @@ void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ Image::Format image_format; - if (config.fbo_format==FBO_FORMAT_16_BITS) { - if (rt->flags[RENDER_TARGET_TRANSPARENT]) { - color_internal_format=GL_RGB5_A1; - color_format=GL_RGBA; - color_type=GL_UNSIGNED_SHORT_5_5_5_1; - image_format=Image::FORMAT_RGBA5551; - } else { - color_internal_format=GL_RGB565; - color_format=GL_RGB; - color_type=GL_UNSIGNED_SHORT_5_6_5; - image_format=Image::FORMAT_RGB565; - } - - } else if (config.fbo_format==FBO_FORMAT_32_BITS || (config.fbo_format==FBO_FORMAT_FLOAT && rt->flags[RENDER_TARGET_NO_3D])) { + if (config.render_arch==RENDER_ARCH_MOBILE || rt->flags[RENDER_TARGET_NO_3D]) { if (rt->flags[RENDER_TARGET_TRANSPARENT]) { color_internal_format=GL_RGBA8; @@ -3293,53 +3287,75 @@ void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ color_type=GL_UNSIGNED_INT_2_10_10_10_REV; image_format=Image::FORMAT_RGBA8;//todo } - } else if (config.fbo_format==FBO_FORMAT_FLOAT) { - + } else { color_internal_format=GL_RGBA16F; color_format=GL_RGBA; color_type=GL_HALF_FLOAT; image_format=Image::FORMAT_RGBAH; } - glTexImage2D(GL_TEXTURE_2D, 0, color_internal_format, rt->width, rt->height, 0, color_format, color_type, NULL); - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->front.color, 0); - { + /* FRONT FBO */ + + glActiveTexture(GL_TEXTURE0); + + glGenFramebuffers(1, &rt->front.fbo); + glBindFramebuffer(GL_FRAMEBUFFER, rt->front.fbo); + + + glGenRenderbuffers(1, &rt->depth); + glBindRenderbuffer(GL_RENDERBUFFER, rt->depth ); + + + glRenderbufferStorage(GL_RENDERBUFFER,GL_DEPTH24_STENCIL8, rt->width, rt->height); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rt->depth); + glBindRenderbuffer(GL_RENDERBUFFER, 0 ); + + + glGenTextures(1, &rt->front.color); + glBindTexture(GL_TEXTURE_2D, rt->front.color); + + glTexImage2D(GL_TEXTURE_2D, 0, color_internal_format, rt->width, rt->height, 0, color_format, color_type, NULL); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->front.color, 0); + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); glBindFramebuffer(GL_FRAMEBUFFER, config.system_fbo); ERR_FAIL_COND( status != GL_FRAMEBUFFER_COMPLETE ); + + Texture *tex = texture_owner.get(rt->texture); + tex->format=image_format; + tex->gl_format_cache=color_format; + tex->gl_type_cache=color_type; + tex->gl_internal_format_cache=color_internal_format; + tex->tex_id=rt->front.color; + tex->width=rt->width; + tex->alloc_width=rt->width; + tex->height=rt->height; + tex->alloc_height=rt->height; + + + texture_set_flags(rt->texture,tex->flags); + } - Texture *tex = texture_owner.get(rt->texture); - tex->format=image_format; - tex->gl_format_cache=color_format; - tex->gl_type_cache=color_type; - tex->gl_internal_format_cache=color_internal_format; - tex->tex_id=rt->front.color; - tex->width=rt->width; - tex->alloc_width=rt->width; - tex->height=rt->height; - tex->alloc_height=rt->height; - - texture_set_flags(rt->texture,tex->flags); + /* BACK FBO */ if (!rt->flags[RENDER_TARGET_NO_SAMPLING]) { glGenFramebuffers(1, &rt->back.fbo); glBindFramebuffer(GL_FRAMEBUFFER, rt->back.fbo); - + glBindRenderbuffer(GL_RENDERBUFFER, rt->depth ); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rt->depth); glGenTextures(1, &rt->back.color); glBindTexture(GL_TEXTURE_2D, rt->back.color); - glTexImage2D(GL_TEXTURE_2D, 0, color_internal_format, rt->width, rt->height, 0, color_format, color_type, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -3357,43 +3373,42 @@ void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ } } + if (config.render_arch==RENDER_ARCH_DESKTOP && !rt->flags[RENDER_TARGET_NO_3D]) { - if (config.fbo_deferred && !rt->flags[RENDER_TARGET_NO_3D]) { - //regular fbo - glGenFramebuffers(1, &rt->deferred.fbo); - glBindFramebuffer(GL_FRAMEBUFFER, rt->deferred.fbo); + glGenFramebuffers(1, &rt->buffers.fbo); + glBindFramebuffer(GL_FRAMEBUFFER, rt->buffers.fbo); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rt->depth); - glGenTextures(1, &rt->deferred.albedo_ao); - glBindTexture(GL_TEXTURE_2D, rt->deferred.albedo_ao); + glGenTextures(1, &rt->buffers.diffuse); + glBindTexture(GL_TEXTURE_2D, rt->buffers.diffuse); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, rt->width, rt->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->deferred.albedo_ao, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->buffers.diffuse, 0); - glGenTextures(1, &rt->deferred.metal_rough_motion); - glBindTexture(GL_TEXTURE_2D, rt->deferred.metal_rough_motion); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, rt->width, rt->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glGenTextures(1, &rt->buffers.specular); + glBindTexture(GL_TEXTURE_2D, rt->buffers.specular); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, rt->width, rt->height, 0, GL_RGBA, GL_HALF_FLOAT, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, rt->deferred.metal_rough_motion, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, rt->buffers.specular, 0); - glGenTextures(1, &rt->deferred.normal_special); - glBindTexture(GL_TEXTURE_2D, rt->deferred.normal_special); + glGenTextures(1, &rt->buffers.normal_sr); + glBindTexture(GL_TEXTURE_2D, rt->buffers.normal_sr); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, rt->width, rt->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT2, GL_TEXTURE_2D, rt->deferred.normal_special, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT2, GL_TEXTURE_2D, rt->buffers.normal_sr, 0); GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); @@ -3402,19 +3417,15 @@ void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ if (status != GL_FRAMEBUFFER_COMPLETE) { _render_target_clear(rt); ERR_FAIL_COND( status != GL_FRAMEBUFFER_COMPLETE ); - } + } - //regular fbo with color attachment (needed for emission or objects rendered as forward) - glGenFramebuffers(1, &rt->deferred.fbo_color); - glBindFramebuffer(GL_FRAMEBUFFER, rt->deferred.fbo_color); + //alpha fbo + glGenFramebuffers(1, &rt->buffers.alpha_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, rt->buffers.alpha_fbo); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rt->depth); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->deferred.albedo_ao, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, rt->deferred.metal_rough_motion, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT2, GL_TEXTURE_2D, rt->deferred.normal_special, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT3, GL_TEXTURE_2D, rt->front.color, 0); - + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt->buffers.diffuse, 0); status = glCheckFramebufferStatus(GL_FRAMEBUFFER); glBindFramebuffer(GL_FRAMEBUFFER, config.system_fbo); @@ -3423,6 +3434,7 @@ void RasterizerStorageGLES3::_render_target_allocate(RenderTarget *rt){ _render_target_clear(rt); ERR_FAIL_COND( status != GL_FRAMEBUFFER_COMPLETE ); } + } @@ -3797,8 +3809,8 @@ bool RasterizerStorageGLES3::free(RID p_rid){ void RasterizerStorageGLES3::initialize() { - config.fbo_format=FBOFormat(int(Globals::get_singleton()->get("rendering/gles3/framebuffer_format"))); - config.fbo_deferred=int(Globals::get_singleton()->get("rendering/gles3/lighting_technique")); + config.render_arch=RENDER_ARCH_DESKTOP; + //config.fbo_deferred=int(Globals::get_singleton()->get("rendering/gles3/lighting_technique")); config.system_fbo=0; @@ -3925,10 +3937,10 @@ void RasterizerStorageGLES3::initialize() { glGenVertexArrays(1,&resources.quadie_array); glBindVertexArray(resources.quadie_array); glBindBuffer(GL_ARRAY_BUFFER,resources.quadie); - glVertexAttribPointer(0,2,GL_FLOAT,GL_FALSE,sizeof(float)*4,0); + glVertexAttribPointer(VS::ARRAY_VERTEX,2,GL_FLOAT,GL_FALSE,sizeof(float)*4,0); glEnableVertexAttribArray(0); - glVertexAttribPointer(1,2,GL_FLOAT,GL_FALSE,sizeof(float)*4,((uint8_t*)NULL)+8); - glEnableVertexAttribArray(1); + glVertexAttribPointer(VS::ARRAY_TEX_UV,2,GL_FLOAT,GL_FALSE,sizeof(float)*4,((uint8_t*)NULL)+8); + glEnableVertexAttribArray(4); glBindVertexArray(0); glBindBuffer(GL_ARRAY_BUFFER,0); //unbind } diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index f052aa8019c..b12c2d93b6f 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -13,6 +13,9 @@ class RasterizerCanvasGLES3; class RasterizerSceneGLES3; +#define _TEXTURE_SRGB_DECODE_EXT 0x8A48 +#define _DECODE_EXT 0x8A49 +#define _SKIP_DECODE_EXT 0x8A4A class RasterizerStorageGLES3 : public RasterizerStorage { public: @@ -20,16 +23,15 @@ public: RasterizerCanvasGLES3 *canvas; RasterizerSceneGLES3 *scene; - enum FBOFormat { - FBO_FORMAT_16_BITS, - FBO_FORMAT_32_BITS, - FBO_FORMAT_FLOAT, + enum RenderArchitecture { + RENDER_ARCH_MOBILE, + RENDER_ARCH_DESKTOP, }; struct Config { - FBOFormat fbo_format; - bool fbo_deferred; + RenderArchitecture render_arch; + GLuint system_fbo; //on some devices, such as apple, screen is rendered to yet another fbo. bool shrink_textures_x2; @@ -135,12 +137,15 @@ public: bool active; GLuint tex_id; + bool using_srgb; + uint16_t stored_cube_sides; RenderTarget *render_target; Texture() { + using_srgb=false; stored_cube_sides=0; ignore_mipmaps=false; render_target=NULL; @@ -218,6 +223,8 @@ public: Map default_textures; + Vector texture_hints; + bool valid; String path; @@ -319,12 +326,14 @@ public: SelfList list; SelfList dirty_list; Vector textures; + float line_width; uint32_t index; uint64_t last_pass; Material() : list(this), dirty_list(this) { shader=NULL; + line_width=1.0; ubo_id=0; ubo_size=0; last_pass=0; @@ -346,6 +355,8 @@ public: virtual void material_set_param(RID p_material, const StringName& p_param, const Variant& p_value); virtual Variant material_get_param(RID p_material, const StringName& p_param) const; + virtual void material_set_line_width(RID p_material, float p_width); + void _update_material(Material* material); void update_dirty_materials(); @@ -655,14 +666,13 @@ public: GLuint depth; - struct Deferred { + struct Buffers { GLuint fbo; - GLuint fbo_color; - - GLuint albedo_ao; - GLuint metal_rough_motion; - GLuint normal_special; - } deferred; + GLuint alpha_fbo; //single buffer, just diffuse (for alpha pass) + GLuint specular; + GLuint diffuse; + GLuint normal_sr; + } buffers; int width,height; @@ -679,8 +689,8 @@ public: depth=0; front.fbo=0; back.fbo=0; - deferred.fbo=0; - deferred.fbo_color=0; + buffers.fbo=0; + buffers.alpha_fbo=0; used_in_frame=false; flags[RENDER_TARGET_VFLIP]=false; diff --git a/drivers/gles3/shader_compiler_gles3.cpp b/drivers/gles3/shader_compiler_gles3.cpp index da8f6689d0e..0dff53bfb90 100644 --- a/drivers/gles3/shader_compiler_gles3.cpp +++ b/drivers/gles3/shader_compiler_gles3.cpp @@ -75,25 +75,81 @@ static String _mkid(const String& p_id) { return "m_"+p_id; } +static String f2sp0(float p_float) { + + if (int(p_float)==p_float) + return itos(p_float)+".0"; + else + return rtoss(p_float); +} + static String get_constant_text(SL::DataType p_type, const Vector& p_values) { switch(p_type) { case SL::TYPE_BOOL: return p_values[0].boolean?"true":"false"; - case SL::TYPE_BVEC2: return String()+"bvec2("+(p_values[0].boolean?"true":"false")+(p_values[1].boolean?"true":"false")+")"; - case SL::TYPE_BVEC3: return String()+"bvec3("+(p_values[0].boolean?"true":"false")+","+(p_values[1].boolean?"true":"false")+","+(p_values[2].boolean?"true":"false")+")"; - case SL::TYPE_BVEC4: return String()+"bvec4("+(p_values[0].boolean?"true":"false")+","+(p_values[1].boolean?"true":"false")+","+(p_values[2].boolean?"true":"false")+","+(p_values[3].boolean?"true":"false")+")"; - case SL::TYPE_INT: return rtos(p_values[0].sint); - case SL::TYPE_IVEC2: return String()+"ivec2("+rtos(p_values[0].sint)+","+rtos(p_values[1].sint)+")"; - case SL::TYPE_IVEC3: return String()+"ivec3("+rtos(p_values[0].sint)+","+rtos(p_values[1].sint)+","+rtos(p_values[2].sint)+")"; - case SL::TYPE_IVEC4: return String()+"ivec4("+rtos(p_values[0].sint)+","+rtos(p_values[1].sint)+","+rtos(p_values[2].sint)+","+rtos(p_values[3].sint)+")"; - case SL::TYPE_UINT: return rtos(p_values[0].real); - case SL::TYPE_UVEC2: return String()+"uvec2("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+")"; - case SL::TYPE_UVEC3: return String()+"uvec3("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+")"; - case SL::TYPE_UVEC4: return String()+"uvec4("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+","+rtos(p_values[3].real)+")"; - case SL::TYPE_FLOAT: return rtos(p_values[0].real); - case SL::TYPE_VEC2: return String()+"vec2("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+")"; - case SL::TYPE_VEC3: return String()+"vec3("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+")"; - case SL::TYPE_VEC4: return String()+"vec4("+rtos(p_values[0].real)+","+rtos(p_values[1].real)+","+rtos(p_values[2].real)+","+rtos(p_values[3].real)+")"; + case SL::TYPE_BVEC2: + case SL::TYPE_BVEC3: + case SL::TYPE_BVEC4: { + + + String text="bvec"+itos(p_type-SL::TYPE_BOOL+1)+"("; + for(int i=0;i0) + text+=","; + + text+=p_values[i].boolean?"true":"false"; + } + text+=")"; + return text; + } + + case SL::TYPE_INT: return itos(p_values[0].sint); + case SL::TYPE_IVEC2: + case SL::TYPE_IVEC3: + case SL::TYPE_IVEC4: { + + String text="ivec"+itos(p_type-SL::TYPE_INT+1)+"("; + for(int i=0;i0) + text+=","; + + text+=itos(p_values[i].sint); + } + text+=")"; + return text; + + } break; + case SL::TYPE_UINT: return itos(p_values[0].uint)+"u"; + case SL::TYPE_UVEC2: + case SL::TYPE_UVEC3: + case SL::TYPE_UVEC4: { + + String text="uvec"+itos(p_type-SL::TYPE_UINT+1)+"("; + for(int i=0;i0) + text+=","; + + text+=itos(p_values[i].uint)+"u"; + } + text+=")"; + return text; + } break; + case SL::TYPE_FLOAT: return f2sp0(p_values[0].real)+"f"; + case SL::TYPE_VEC2: + case SL::TYPE_VEC3: + case SL::TYPE_VEC4: { + + String text="vec"+itos(p_type-SL::TYPE_FLOAT+1)+"("; + for(int i=0;i0) + text+=","; + + text+=f2sp0(p_values[i].real); + } + text+=")"; + return text; + + } break; default: ERR_FAIL_V(String()); } } @@ -189,6 +245,7 @@ String ShaderCompilerGLES3::_dump_node_code(SL::Node *p_node, int p_level, Gener } r_gen_code.texture_uniforms.resize(max_texture_uniforms); + r_gen_code.texture_hints.resize(max_texture_uniforms); Vector uniform_sizes; uniform_sizes.resize(max_uniforms); @@ -209,6 +266,7 @@ String ShaderCompilerGLES3::_dump_node_code(SL::Node *p_node, int p_level, Gener r_gen_code.vertex_global+=ucode; r_gen_code.fragment_global+=ucode; r_gen_code.texture_uniforms[E->get().texture_order]=_mkid(E->key()); + r_gen_code.texture_hints[E->get().texture_order]=E->get().hint; } else { if (r_gen_code.uniforms.empty()) { diff --git a/drivers/gles3/shader_compiler_gles3.h b/drivers/gles3/shader_compiler_gles3.h index ad51b999272..dcea82d7735 100644 --- a/drivers/gles3/shader_compiler_gles3.h +++ b/drivers/gles3/shader_compiler_gles3.h @@ -21,6 +21,8 @@ public: Vector defines; Vector texture_uniforms; + Vector texture_hints; + Vector uniform_offsets; uint32_t uniform_total_size; String uniforms; diff --git a/drivers/gles3/shaders/copy.glsl b/drivers/gles3/shaders/copy.glsl index eb58d664315..79982ecf25d 100644 --- a/drivers/gles3/shaders/copy.glsl +++ b/drivers/gles3/shaders/copy.glsl @@ -5,9 +5,9 @@ layout(location=0) in highp vec4 vertex_attrib; #ifdef USE_CUBEMAP layout(location=4) in vec3 cube_in; #else -layout(location=4) in vec2 uv_in; // attrib:4 +layout(location=4) in vec2 uv_in; #endif -layout(location=5) in vec2 uv2_in; // attrib:5 +layout(location=5) in vec2 uv2_in; #ifdef USE_CUBEMAP out vec3 cube_interp; @@ -40,6 +40,15 @@ uniform sampler2D source; //texunit:0 #endif +float sRGB_gamma_correct(float c){ + float a = 0.055; + if(c < 0.0031308) + return 12.92*c; + else + return (1.0+a)*pow(c, 1.0/2.4) - a; +} + + uniform float stuff; in vec2 uv2_interp; @@ -57,6 +66,20 @@ void main() { vec4 color = texture( source, uv_interp ); #endif +#ifdef LINEAR_TO_SRGB + //regular Linear -> SRGB conversion + vec3 a = vec3(0.055); + color.rgb = mix( (vec3(1.0)+a)*pow(color.rgb,vec3(1.0/2.4))-a , 12.92*color.rgb , lessThan(color.rgb,vec3(0.0031308))); +#endif + +#ifdef DEBUG_GRADIENT + color.rg=uv_interp; + color.b=0.0; +#endif + +#ifdef DISABLE_ALPHA + color.a=1.0; +#endif frag_color = color; } diff --git a/drivers/gles3/shaders/cubemap_filter.glsl b/drivers/gles3/shaders/cubemap_filter.glsl index d3d4cbd435e..998a59833ec 100644 --- a/drivers/gles3/shaders/cubemap_filter.glsl +++ b/drivers/gles3/shaders/cubemap_filter.glsl @@ -3,7 +3,7 @@ layout(location=0) in highp vec2 vertex; -layout(location=1) in highp vec2 uv; +layout(location=4) in highp vec2 uv; out highp vec2 uv_interp; diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index 3f942526068..60ac015a176 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -269,11 +269,16 @@ in vec3 normal_interp; /* PBR CHANNELS */ +//used on forward mainly +uniform bool no_ambient_light; + + #ifdef USE_RADIANCE_CUBEMAP uniform sampler2D brdf_texture; //texunit:-1 uniform samplerCube radiance_cube; //texunit:-2 + layout(std140) uniform Radiance { //ubo:2 mat4 radiance_inverse_xform; @@ -346,8 +351,18 @@ in mediump vec4 forward_shadow_pos4; #endif +#ifdef USE_MULTIPLE_RENDER_TARGETS + +layout(location=0) out vec4 diffuse_buffer; +layout(location=1) out vec4 specular_buffer; +layout(location=2) out vec4 normal_mr_buffer; + +#else + layout(location=0) out vec4 frag_color; +#endif + // GGX Specular // Source: http://www.filmicworlds.com/images/ggx-opt/optimized-ggx.hlsl @@ -470,39 +485,46 @@ FRAGMENT_SHADER_CODE /////////////////////// LIGHTING ////////////////////////////// vec3 specular_light = vec3(0.0,0.0,0.0); - vec3 ambient_light = ambient_light_color.rgb; + vec3 ambient_light; vec3 diffuse_light = vec3(0.0,0.0,0.0); vec3 eye_vec = -normalize( vertex_interp ); #ifdef USE_RADIANCE_CUBEMAP - { + if (no_ambient_light) { + ambient_light=vec3(0.0,0.0,0.0); + } else { + { - float ndotv = clamp(dot(normal,eye_vec),0.0,1.0); - vec2 brdf = texture(brdf_texture, vec2(roughness, ndotv)).xy; + float ndotv = clamp(dot(normal,eye_vec),0.0,1.0); + vec2 brdf = texture(brdf_texture, vec2(roughness, ndotv)).xy; - float lod = roughness * 5.0; - vec3 r = reflect(-eye_vec,normal); //2.0 * ndotv * normal - view; // reflect(v, n); - r=normalize((radiance_inverse_xform * vec4(r,0.0)).xyz); - vec3 radiance = textureLod(radiance_cube, r, lod).xyz * ( brdf.x + brdf.y); + float lod = roughness * 5.0; + vec3 r = reflect(-eye_vec,normal); //2.0 * ndotv * normal - view; // reflect(v, n); + r=normalize((radiance_inverse_xform * vec4(r,0.0)).xyz); + vec3 radiance = textureLod(radiance_cube, r, lod).xyz * ( brdf.x + brdf.y); - specular_light=mix(albedo,radiance,specular); + specular_light=mix(albedo,radiance,specular); + } + + { + + vec3 ambient_dir=normalize((radiance_inverse_xform * vec4(normal,0.0)).xyz); + vec3 env_ambient=textureLod(radiance_cube, ambient_dir, 5.0).xyz; + + ambient_light=mix(ambient_light_color.rgb,env_ambient,radiance_ambient_contribution); + } } - { - - vec3 ambient_dir=normalize((radiance_inverse_xform * vec4(normal,0.0)).xyz); - vec3 env_ambient=textureLod(radiance_cube, ambient_dir, 5.0).xyz; - - ambient_light=mix(ambient_light,env_ambient,radiance_ambient_contribution); - } - - #else - ambient_light=albedo; + if (no_ambient_light){ + ambient_light=vec3(0.0,0.0,0.0); + } else { + ambient_light=ambient_light_color.rgb; + } #endif @@ -538,11 +560,25 @@ LIGHT_SHADER_CODE } #endif -#ifdef SHADELESS +#ifdef USE_MULTIPLE_RENDER_TARGETS + //approximate ambient scale for SSAO, since we will lack full ambient + float max_ambient=max(ambient_light.r,max(ambient_light.g,ambient_light.b)); + float max_diffuse=max(diffuse_light.r,max(diffuse_light.g,diffuse_light.b)); + float total_ambient = max_ambient+max_diffuse; + float ambient_scale = (total_ambient>0.0) ? max_ambient/total_ambient : 0.0; + + diffuse_buffer=vec4(diffuse_light+ambient_light,ambient_scale); + specular_buffer=vec4(specular_light,0.0); + normal_mr_buffer=vec4(normal.x,normal.y,max(specular.r,max(specular.g,specular.b)),roughness); + +#else + +#ifdef SHADELESS frag_color=vec4(albedo,alpha); #else frag_color=vec4(ambient_light+diffuse_light+specular_light,alpha); +#endif #endif diff --git a/methods.py b/methods.py index 7ad42a559a7..f6bae4a64e5 100755 --- a/methods.py +++ b/methods.py @@ -233,6 +233,7 @@ def build_glsl_header( filename ): fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n"); fd.write("\t#define _FU if (get_uniform(p_uniform)<0) return; ERR_FAIL_COND( get_active()!=this );\n\n "); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, bool p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value?1:0); }\n\n"); fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); diff --git a/scene/3d/visual_instance.h b/scene/3d/visual_instance.h index eb0587815f7..b168bcbfe5c 100644 --- a/scene/3d/visual_instance.h +++ b/scene/3d/visual_instance.h @@ -97,6 +97,7 @@ public: FLAG_MAX=VS::INSTANCE_FLAG_MAX, }; + enum ShadowCastingSetting { SHADOW_CASTING_SETTING_OFF=VS::SHADOW_CASTING_SETTING_OFF, SHADOW_CASTING_SETTING_ON = VS::SHADOW_CASTING_SETTING_ON, diff --git a/scene/resources/environment.cpp b/scene/resources/environment.cpp index 3897c86d5b2..47f4370cc2d 100644 --- a/scene/resources/environment.cpp +++ b/scene/resources/environment.cpp @@ -134,6 +134,151 @@ float Environment::get_ambient_light_skybox_contribution() const{ +void Environment::set_tonemapper(ToneMapper p_tone_mapper) { + + tone_mapper=p_tone_mapper; + VS::get_singleton()->environment_set_tonemap(environment,tonemap_auto_exposure,tonemap_exposure,tonemap_white,tonemap_auto_exposure_min,tonemap_auto_exposure_max,tonemap_auto_exposure_speed,tonemap_auto_exposure_scale,VS::EnvironmentToneMapper(tone_mapper)); +} + +Environment::ToneMapper Environment::get_tonemapper() const{ + + return tone_mapper; +} + +void Environment::set_tonemap_exposure(float p_exposure){ + + tonemap_exposure=p_exposure; + VS::get_singleton()->environment_set_tonemap(environment,tonemap_auto_exposure,tonemap_exposure,tonemap_white,tonemap_auto_exposure_min,tonemap_auto_exposure_max,tonemap_auto_exposure_speed,tonemap_auto_exposure_scale,VS::EnvironmentToneMapper(tone_mapper)); +} + +float Environment::get_tonemap_exposure() const{ + + return get_tonemap_auto_exposure(); +} + +void Environment::set_tonemap_white(float p_white){ + + tonemap_white=p_white; + VS::get_singleton()->environment_set_tonemap(environment,tonemap_auto_exposure,tonemap_exposure,tonemap_white,tonemap_auto_exposure_min,tonemap_auto_exposure_max,tonemap_auto_exposure_speed,tonemap_auto_exposure_scale,VS::EnvironmentToneMapper(tone_mapper)); + +} +float Environment::get_tonemap_white() const { + + return tonemap_white; +} + +void Environment::set_tonemap_auto_exposure(bool p_enabled) { + + tonemap_auto_exposure=p_enabled; + VS::get_singleton()->environment_set_tonemap(environment,tonemap_auto_exposure,tonemap_exposure,tonemap_white,tonemap_auto_exposure_min,tonemap_auto_exposure_max,tonemap_auto_exposure_speed,tonemap_auto_exposure_scale,VS::EnvironmentToneMapper(tone_mapper)); + +} +bool Environment::get_tonemap_auto_exposure() const { + + return tonemap_auto_exposure; +} + +void Environment::set_tonemap_auto_exposure_max(float p_auto_exposure_max) { + + tonemap_auto_exposure_max=p_auto_exposure_max; + VS::get_singleton()->environment_set_tonemap(environment,tonemap_auto_exposure,tonemap_exposure,tonemap_white,tonemap_auto_exposure_min,tonemap_auto_exposure_max,tonemap_auto_exposure_speed,tonemap_auto_exposure_scale,VS::EnvironmentToneMapper(tone_mapper)); + +} +float Environment::get_tonemap_auto_exposure_max() const { + + return tonemap_auto_exposure_max; +} + +void Environment::set_tonemap_auto_exposure_min(float p_auto_exposure_min) { + + tonemap_auto_exposure_min=p_auto_exposure_min; + VS::get_singleton()->environment_set_tonemap(environment,tonemap_auto_exposure,tonemap_exposure,tonemap_white,tonemap_auto_exposure_min,tonemap_auto_exposure_max,tonemap_auto_exposure_speed,tonemap_auto_exposure_scale,VS::EnvironmentToneMapper(tone_mapper)); + +} +float Environment::get_tonemap_auto_exposure_min() const { + + return tonemap_auto_exposure_min; +} + +void Environment::set_tonemap_auto_exposure_speed(float p_auto_exposure_speed) { + + tonemap_auto_exposure_speed=p_auto_exposure_speed; + VS::get_singleton()->environment_set_tonemap(environment,tonemap_auto_exposure,tonemap_exposure,tonemap_white,tonemap_auto_exposure_min,tonemap_auto_exposure_max,tonemap_auto_exposure_speed,tonemap_auto_exposure_scale,VS::EnvironmentToneMapper(tone_mapper)); + +} +float Environment::get_tonemap_auto_exposure_speed() const { + + return tonemap_auto_exposure_speed; +} + +void Environment::set_tonemap_auto_exposure_scale(float p_auto_exposure_scale) { + + tonemap_auto_exposure_scale=p_auto_exposure_scale; + VS::get_singleton()->environment_set_tonemap(environment,tonemap_auto_exposure,tonemap_exposure,tonemap_white,tonemap_auto_exposure_min,tonemap_auto_exposure_max,tonemap_auto_exposure_speed,tonemap_auto_exposure_scale,VS::EnvironmentToneMapper(tone_mapper)); + +} +float Environment::get_tonemap_auto_exposure_scale() const { + + return tonemap_auto_exposure_scale; +} + +void Environment::set_adjustment_enable(bool p_enable) { + + adjustment_enabled=p_enable; + VS::get_singleton()->environment_set_adjustment(environment,adjustment_enabled,adjustment_brightness,adjustment_contrast,adjustment_saturation,adjustment_color_correction.is_valid()?adjustment_color_correction->get_rid():RID()); +} + +bool Environment::is_adjustment_enabled() const { + + return adjustment_enabled; +} + + +void Environment::set_adjustment_brightness(float p_brightness) { + + adjustment_brightness=p_brightness; + VS::get_singleton()->environment_set_adjustment(environment,adjustment_enabled,adjustment_brightness,adjustment_contrast,adjustment_saturation,adjustment_color_correction.is_valid()?adjustment_color_correction->get_rid():RID()); + +} +float Environment::get_adjustment_brightness() const { + + return adjustment_brightness; +} + +void Environment::set_adjustment_contrast(float p_contrast) { + + adjustment_contrast=p_contrast; + VS::get_singleton()->environment_set_adjustment(environment,adjustment_enabled,adjustment_brightness,adjustment_contrast,adjustment_saturation,adjustment_color_correction.is_valid()?adjustment_color_correction->get_rid():RID()); + +} +float Environment::get_adjustment_contrast() const { + + return adjustment_contrast; +} + +void Environment::set_adjustment_saturation(float p_saturation) { + + adjustment_saturation=p_saturation; + VS::get_singleton()->environment_set_adjustment(environment,adjustment_enabled,adjustment_brightness,adjustment_contrast,adjustment_saturation,adjustment_color_correction.is_valid()?adjustment_color_correction->get_rid():RID()); + +} +float Environment::get_adjustment_saturation() const { + + return adjustment_saturation; +} + +void Environment::set_adjustment_color_correction(const Ref& p_ramp) { + + adjustment_color_correction=p_ramp; + VS::get_singleton()->environment_set_adjustment(environment,adjustment_enabled,adjustment_brightness,adjustment_contrast,adjustment_saturation,adjustment_color_correction.is_valid()?adjustment_color_correction->get_rid():RID()); + +} +Ref Environment::get_adjustment_color_correction() const { + + return adjustment_color_correction; +} + + void Environment::_validate_property(PropertyInfo& property) const { if (property.name=="background/skybox" || property.name=="background/skybox_scale" || property.name=="ambient_light/skybox_contribution") { @@ -190,6 +335,61 @@ void Environment::_bind_methods() { ADD_PROPERTY(PropertyInfo(Variant::REAL,"ambient_light/energy",PROPERTY_HINT_RANGE,"0,16,0.01"),_SCS("set_ambient_light_energy"),_SCS("get_ambient_light_energy") ); ADD_PROPERTY(PropertyInfo(Variant::REAL,"ambient_light/skybox_contribution",PROPERTY_HINT_RANGE,"0,1,0.01"),_SCS("set_ambient_light_skybox_contribution"),_SCS("get_ambient_light_skybox_contribution") ); + ObjectTypeDB::bind_method(_MD("set_tonemapper","mode"),&Environment::set_tonemapper); + ObjectTypeDB::bind_method(_MD("get_tonemapper"),&Environment::get_tonemapper); + + ObjectTypeDB::bind_method(_MD("set_tonemap_exposure","exposure"),&Environment::set_tonemap_exposure); + ObjectTypeDB::bind_method(_MD("get_tonemap_exposure"),&Environment::get_tonemap_exposure); + + ObjectTypeDB::bind_method(_MD("set_tonemap_white","white"),&Environment::set_tonemap_white); + ObjectTypeDB::bind_method(_MD("get_tonemap_white"),&Environment::get_tonemap_white); + + ObjectTypeDB::bind_method(_MD("set_tonemap_auto_exposure","auto_exposure"),&Environment::set_tonemap_auto_exposure); + ObjectTypeDB::bind_method(_MD("get_tonemap_auto_exposure"),&Environment::get_tonemap_auto_exposure); + + ObjectTypeDB::bind_method(_MD("set_tonemap_auto_exposure_max","exposure_max"),&Environment::set_tonemap_auto_exposure_max); + ObjectTypeDB::bind_method(_MD("get_tonemap_auto_exposure_max"),&Environment::get_tonemap_auto_exposure_max); + + ObjectTypeDB::bind_method(_MD("set_tonemap_auto_exposure_min","exposure_min"),&Environment::set_tonemap_auto_exposure_min); + ObjectTypeDB::bind_method(_MD("get_tonemap_auto_exposure_min"),&Environment::get_tonemap_auto_exposure_min); + + ObjectTypeDB::bind_method(_MD("set_tonemap_auto_exposure_speed","exposure_speed"),&Environment::set_tonemap_auto_exposure_speed); + ObjectTypeDB::bind_method(_MD("get_tonemap_auto_exposure_speed"),&Environment::get_tonemap_auto_exposure_speed); + + ObjectTypeDB::bind_method(_MD("set_tonemap_auto_exposure_scale","exposure_scale"),&Environment::set_tonemap_auto_exposure_scale); + ObjectTypeDB::bind_method(_MD("get_tonemap_auto_exposure_scale"),&Environment::get_tonemap_auto_exposure_scale); + + ADD_PROPERTY(PropertyInfo(Variant::INT,"tonemap/mode",PROPERTY_HINT_ENUM,"Linear,Log,Reindhart,Filmic,Aces"),_SCS("set_tonemapper"),_SCS("get_tonemapper") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"tonemap/exposure",PROPERTY_HINT_RANGE,"0,16,0.01"),_SCS("set_tonemap_exposure"),_SCS("get_tonemap_exposure") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"tonemap/white",PROPERTY_HINT_RANGE,"0,16,0.01"),_SCS("set_tonemap_white"),_SCS("get_tonemap_white") ); + ADD_PROPERTY(PropertyInfo(Variant::BOOL,"auto_exposure/enable"),_SCS("set_tonemap_auto_exposure"),_SCS("get_tonemap_auto_exposure") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"auto_exposure/scale",PROPERTY_HINT_RANGE,"0.01,64,0.01"),_SCS("set_tonemap_auto_exposure_scale"),_SCS("get_tonemap_auto_exposure_scale") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"auto_exposure/min_luma",PROPERTY_HINT_RANGE,"0,16,0.01"),_SCS("set_tonemap_auto_exposure_min"),_SCS("get_tonemap_auto_exposure_min") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"auto_exposure/max_luma",PROPERTY_HINT_RANGE,"0,16,0.01"),_SCS("set_tonemap_auto_exposure_max"),_SCS("get_tonemap_auto_exposure_max") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"auto_exposure/speed",PROPERTY_HINT_RANGE,"0.01,64,0.01"),_SCS("set_tonemap_auto_exposure_speed"),_SCS("get_tonemap_auto_exposure_speed") ); + + ObjectTypeDB::bind_method(_MD("set_adjustment_enable","enabled"),&Environment::set_adjustment_enable); + ObjectTypeDB::bind_method(_MD("is_adjustment_enabled"),&Environment::is_adjustment_enabled); + + ObjectTypeDB::bind_method(_MD("set_adjustment_brightness","brightness"),&Environment::set_adjustment_brightness); + ObjectTypeDB::bind_method(_MD("get_adjustment_brightness"),&Environment::get_adjustment_brightness); + + ObjectTypeDB::bind_method(_MD("set_adjustment_contrast","contrast"),&Environment::set_adjustment_contrast); + ObjectTypeDB::bind_method(_MD("get_adjustment_contrast"),&Environment::get_adjustment_contrast); + + ObjectTypeDB::bind_method(_MD("set_adjustment_saturation","saturation"),&Environment::set_adjustment_saturation); + ObjectTypeDB::bind_method(_MD("get_adjustment_saturation"),&Environment::get_adjustment_saturation); + + ObjectTypeDB::bind_method(_MD("set_adjustment_color_correction","color_correction"),&Environment::set_adjustment_color_correction); + ObjectTypeDB::bind_method(_MD("get_adjustment_color_correction"),&Environment::get_adjustment_color_correction); + + ADD_PROPERTY(PropertyInfo(Variant::BOOL,"adjustment/enabled"),_SCS("set_adjustment_enable"),_SCS("is_adjustment_enabled") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"adjustment/brightness",PROPERTY_HINT_RANGE,"0.01,8,0.01"),_SCS("set_adjustment_brightness"),_SCS("get_adjustment_brightness") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"adjustment/contrast",PROPERTY_HINT_RANGE,"0.01,8,0.01"),_SCS("set_adjustment_contrast"),_SCS("get_adjustment_contrast") ); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"adjustment/saturation",PROPERTY_HINT_RANGE,"0.01,8,0.01"),_SCS("set_adjustment_saturation"),_SCS("get_adjustment_saturation") ); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT,"adjustment/color_correction",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_adjustment_color_correction"),_SCS("get_adjustment_color_correction") ); + + GLOBAL_DEF("rendering/skybox/irradiance_cube_resolution",256); GLOBAL_DEF("rendering/skybox/radiance_cube_resolution",64); @@ -203,6 +403,12 @@ void Environment::_bind_methods() { BIND_CONSTANT(GLOW_BLEND_MODE_SCREEN); BIND_CONSTANT(GLOW_BLEND_MODE_SOFTLIGHT); BIND_CONSTANT(GLOW_BLEND_MODE_DISABLED); + BIND_CONSTANT(TONE_MAPPER_LINEAR); + BIND_CONSTANT(TONE_MAPPER_LOG); + BIND_CONSTANT(TONE_MAPPER_REINHARDT); + BIND_CONSTANT(TONE_MAPPER_FILMIC); + BIND_CONSTANT(TONE_MAPPER_ACES_FILMIC); + } @@ -216,6 +422,24 @@ Environment::Environment() { ambient_skybox_contribution=0; + tone_mapper=TONE_MAPPER_LINEAR; + tonemap_exposure=1.0; + tonemap_white=1.0; + tonemap_auto_exposure=false; + tonemap_auto_exposure_max=8; + tonemap_auto_exposure_min=0.4; + tonemap_auto_exposure_speed=0.5; + tonemap_auto_exposure_scale=0.4; + + set_tonemapper(tone_mapper); //update + + adjustment_enabled=false; + adjustment_contrast=1.0; + adjustment_saturation=1.0; + adjustment_brightness=1.0; + + set_adjustment_enable(adjustment_enabled); //update + environment = VS::get_singleton()->environment_create(); } diff --git a/scene/resources/environment.h b/scene/resources/environment.h index 6478f6420f7..394b67dadff 100644 --- a/scene/resources/environment.h +++ b/scene/resources/environment.h @@ -55,6 +55,16 @@ public: GLOW_BLEND_MODE_DISABLED, }; + enum ToneMapper { + TONE_MAPPER_LINEAR, + TONE_MAPPER_LOG, + TONE_MAPPER_REINHARDT, + TONE_MAPPER_FILMIC, + TONE_MAPPER_ACES_FILMIC + }; + + + private: RID environment; @@ -68,6 +78,21 @@ private: float ambient_energy; float ambient_skybox_contribution; + ToneMapper tone_mapper; + float tonemap_exposure; + float tonemap_white; + bool tonemap_auto_exposure; + float tonemap_auto_exposure_max; + float tonemap_auto_exposure_min; + float tonemap_auto_exposure_speed; + float tonemap_auto_exposure_scale; + + bool adjustment_enabled; + float adjustment_contrast; + float adjustment_saturation; + float adjustment_brightness; + Ref adjustment_color_correction; + protected: static void _bind_methods(); @@ -97,14 +122,58 @@ public: float get_ambient_light_skybox_contribution() const; + void set_tonemapper(ToneMapper p_tone_mapper); + ToneMapper get_tonemapper() const; + + void set_tonemap_exposure(float p_exposure); + float get_tonemap_exposure() const; + + void set_tonemap_white(float p_white); + float get_tonemap_white() const; + + void set_tonemap_auto_exposure(bool p_enabled); + bool get_tonemap_auto_exposure() const; + + void set_tonemap_auto_exposure_max(float p_auto_exposure_max); + float get_tonemap_auto_exposure_max() const; + + void set_tonemap_auto_exposure_min(float p_auto_exposure_min); + float get_tonemap_auto_exposure_min() const; + + void set_tonemap_auto_exposure_speed(float p_auto_exposure_speed); + float get_tonemap_auto_exposure_speed() const; + + void set_tonemap_auto_exposure_scale(float p_auto_exposure_scale); + float get_tonemap_auto_exposure_scale() const; + + void set_adjustment_enable(bool p_enable); + bool is_adjustment_enabled() const; + + void set_adjustment_brightness(float p_brightness); + float get_adjustment_brightness() const; + + void set_adjustment_contrast(float p_contrast); + float get_adjustment_contrast() const; + + void set_adjustment_saturation(float p_saturation); + float get_adjustment_saturation() const; + + void set_adjustment_color_correction(const Ref& p_ramp); + Ref get_adjustment_color_correction() const; + + virtual RID get_rid() const; Environment(); ~Environment(); }; + + + VARIANT_ENUM_CAST(Environment::BGMode) VARIANT_ENUM_CAST(Environment::GlowBlendMode) +VARIANT_ENUM_CAST(Environment::ToneMapper) #endif // ENVIRONMENT_H diff --git a/scene/resources/material.cpp b/scene/resources/material.cpp index e3df1461c79..3c7b0fbe6c6 100644 --- a/scene/resources/material.cpp +++ b/scene/resources/material.cpp @@ -74,6 +74,7 @@ void FixedSpatialMaterial::init_shaders() { shader_names->subsurface_scattering="subsurface_scattering"; shader_names->refraction="refraction"; shader_names->refraction_roughness="refraction_roughness"; + shader_names->point_size="point_size"; shader_names->texture_names[TEXTURE_ALBEDO]="texture_albedo"; shader_names->texture_names[TEXTURE_SPECULAR]="texture_specular"; @@ -166,15 +167,33 @@ void FixedSpatialMaterial::_update_shader() { code+="uniform vec4 albedo : hint_color;\n"; - code+="uniform sampler2D albedo_texture : hint_albedo;\n"; + code+="uniform sampler2D texture_albedo : hint_albedo;\n"; code+="uniform vec4 specular : hint_color;\n"; code+="uniform float roughness : hint_range(0,1);\n"; - code+="uniform sampler2D specular_texture : hint_white;\n"; + code+="uniform float point_size : hint_range(0,128);\n"; + code+="uniform sampler2D texture_specular : hint_white;\n"; code+="\n\n"; + code+="void vertex() {\n"; + + if (flags[FLAG_SRGB_VERTEX_COLOR]) { + + code+="\tCOLOR.rgb = mix( pow((COLOR.rgb + vec3(0.055)) * (1.0 / (1.0 + 0.055)), vec3(2.4)), COLOR.rgb* (1.0 / 12.92), lessThan(COLOR.rgb,vec3(0.04045)) );\n"; + } + if (flags[FLAG_USE_POINT_SIZE]) { + + code+="\tPOINT_SIZE=point_size;\n"; + } + + code+="}\n"; code+="\n\n"; code+="void fragment() {\n"; - code+="\tvec4 albedo_tex = texture(albedo_texture,UV);\n"; + + if (flags[FLAG_USE_POINT_SIZE]) { + code+="\tvec4 albedo_tex = texture(texture_albedo,POINT_COORD);\n"; + } else { + code+="\tvec4 albedo_tex = texture(texture_albedo,UV);\n"; + } if (flags[FLAG_ALBEDO_FROM_VERTEX_COLOR]) { code+="\talbedo_tex *= COLOR;\n"; @@ -184,7 +203,7 @@ void FixedSpatialMaterial::_update_shader() { if (features[FEATURE_TRANSPARENT]) { code+="\tALPHA = albedo.a * albedo_tex.a;\n"; } - code+="\tvec4 specular_tex = texture(specular_texture,UV);\n"; + code+="\tvec4 specular_tex = texture(texture_specular,UV);\n"; code+="\tSPECULAR = specular.rgb * specular_tex.rgb;\n"; code+="\tROUGHNESS = specular.a * roughness;\n"; code+="}\n"; @@ -526,7 +545,8 @@ void FixedSpatialMaterial::set_texture(TextureParam p_param, const Ref ERR_FAIL_INDEX(p_param,TEXTURE_MAX); textures[p_param]=p_texture; - VS::get_singleton()->material_set_param(_get_material(),shader_names->texture_names[p_param],p_texture); + RID rid = p_texture.is_valid() ? p_texture->get_rid() : RID(); + VS::get_singleton()->material_set_param(_get_material(),shader_names->texture_names[p_param],rid); } Ref FixedSpatialMaterial::get_texture(TextureParam p_param) const { @@ -540,6 +560,7 @@ void FixedSpatialMaterial::_validate_feature(const String& text, Feature feature if (property.name.begins_with(text) && property.name!=text+"/enabled" && !features[feature]) { property.usage=0; } + } void FixedSpatialMaterial::_validate_property(PropertyInfo& property) const { @@ -555,6 +576,28 @@ void FixedSpatialMaterial::_validate_property(PropertyInfo& property) const { } +void FixedSpatialMaterial::set_line_width(float p_line_width) { + + line_width=p_line_width; + VS::get_singleton()->material_set_line_width(_get_material(),line_width); +} + +float FixedSpatialMaterial::get_line_width() const { + + return line_width; +} + +void FixedSpatialMaterial::set_point_size(float p_point_size) { + + point_size=p_point_size; + VS::get_singleton()->material_set_param(_get_material(),shader_names->point_size,p_point_size); +} + +float FixedSpatialMaterial::get_point_size() const { + + return point_size; +} + void FixedSpatialMaterial::_bind_methods() { @@ -600,6 +643,12 @@ void FixedSpatialMaterial::_bind_methods() { ObjectTypeDB::bind_method(_MD("set_refraction_roughness","refraction_roughness"),&FixedSpatialMaterial::set_refraction_roughness); ObjectTypeDB::bind_method(_MD("get_refraction_roughness"),&FixedSpatialMaterial::get_refraction_roughness); + ObjectTypeDB::bind_method(_MD("set_line_width","line_width"),&FixedSpatialMaterial::set_line_width); + ObjectTypeDB::bind_method(_MD("get_line_width"),&FixedSpatialMaterial::get_line_width); + + ObjectTypeDB::bind_method(_MD("set_point_size","point_size"),&FixedSpatialMaterial::set_point_size); + ObjectTypeDB::bind_method(_MD("get_point_size"),&FixedSpatialMaterial::get_point_size); + ObjectTypeDB::bind_method(_MD("set_detail_uv","detail_uv"),&FixedSpatialMaterial::set_detail_uv); ObjectTypeDB::bind_method(_MD("get_detail_uv"),&FixedSpatialMaterial::get_detail_uv); @@ -630,12 +679,16 @@ void FixedSpatialMaterial::_bind_methods() { ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"flags/transparent"),_SCS("set_feature"),_SCS("get_feature"),FEATURE_TRANSPARENT); ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"flags/unshaded"),_SCS("set_flag"),_SCS("get_flag"),FLAG_UNSHADED); ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"flags/on_top"),_SCS("set_flag"),_SCS("get_flag"),FLAG_ONTOP); - ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"flags/vcol_albedo"),_SCS("set_flag"),_SCS("get_flag"),FLAG_ALBEDO_FROM_VERTEX_COLOR); + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"flags/use_point_size"),_SCS("set_flag"),_SCS("get_flag"),FLAG_USE_POINT_SIZE); + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"vertex_color/use_as_albedo"),_SCS("set_flag"),_SCS("get_flag"),FLAG_ALBEDO_FROM_VERTEX_COLOR); + ADD_PROPERTYI(PropertyInfo(Variant::BOOL,"vertex_color/is_srgb"),_SCS("set_flag"),_SCS("get_flag"),FLAG_SRGB_VERTEX_COLOR); ADD_PROPERTY(PropertyInfo(Variant::INT,"params/diffuse_mode",PROPERTY_HINT_ENUM,"Labert,Lambert Wrap,Oren Nayar,Burley"),_SCS("set_diffuse_mode"),_SCS("get_diffuse_mode")); ADD_PROPERTY(PropertyInfo(Variant::INT,"params/blend_mode",PROPERTY_HINT_ENUM,"Mix,Add,Sub,Mul"),_SCS("set_blend_mode"),_SCS("get_blend_mode")); ADD_PROPERTY(PropertyInfo(Variant::INT,"params/cull_mode",PROPERTY_HINT_ENUM,"Back,Front,Disabled"),_SCS("set_cull_mode"),_SCS("get_cull_mode")); ADD_PROPERTY(PropertyInfo(Variant::INT,"params/depth_draw_mode",PROPERTY_HINT_ENUM,"Opaque Only,Always,Never,Opaque Pre-Pass"),_SCS("set_depth_draw_mode"),_SCS("get_depth_draw_mode")); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"params/line_width",PROPERTY_HINT_RANGE,"0.1,128,0.1"),_SCS("set_line_width"),_SCS("get_line_width")); + ADD_PROPERTY(PropertyInfo(Variant::REAL,"params/point_size",PROPERTY_HINT_RANGE,"0.1,128,0.1"),_SCS("set_point_size"),_SCS("get_point_size")); ADD_PROPERTY(PropertyInfo(Variant::COLOR,"albedo/color"),_SCS("set_albedo"),_SCS("get_albedo")); ADD_PROPERTYI(PropertyInfo(Variant::OBJECT,"albedo/texture",PROPERTY_HINT_RESOURCE_TYPE,"Texture"),_SCS("set_texture"),_SCS("get_texture"),TEXTURE_ALBEDO); @@ -737,6 +790,8 @@ void FixedSpatialMaterial::_bind_methods() { BIND_CONSTANT( FLAG_UNSHADED ); BIND_CONSTANT( FLAG_ONTOP ); BIND_CONSTANT( FLAG_ALBEDO_FROM_VERTEX_COLOR ); + BIND_CONSTANT( FLAG_SRGB_VERTEX_COLOR ) + BIND_CONSTANT( FLAG_USE_POINT_SIZE ) BIND_CONSTANT( FLAG_MAX ); BIND_CONSTANT( DIFFUSE_LAMBERT ); @@ -763,6 +818,8 @@ FixedSpatialMaterial::FixedSpatialMaterial() : element(this) { set_subsurface_scattering(0); set_refraction(0); set_refraction_roughness(0); + set_line_width(1); + set_point_size(1); detail_uv=DETAIL_UV_1; blend_mode=BLEND_MODE_MIX; diff --git a/scene/resources/material.h b/scene/resources/material.h index 0f0bf48025f..35f01d47561 100644 --- a/scene/resources/material.h +++ b/scene/resources/material.h @@ -132,6 +132,8 @@ public: FLAG_UNSHADED, FLAG_ONTOP, FLAG_ALBEDO_FROM_VERTEX_COLOR, + FLAG_SRGB_VERTEX_COLOR, + FLAG_USE_POINT_SIZE, FLAG_MAX }; @@ -146,12 +148,12 @@ private: union MaterialKey { struct { - uint32_t feature_mask : 16; + uint32_t feature_mask : 15; uint32_t detail_uv : 1; uint32_t blend_mode : 2; uint32_t depth_draw_mode : 2; uint32_t cull_mode : 2; - uint32_t flags : 3; + uint32_t flags : 5; uint32_t detail_blend_mode : 2; uint32_t diffuse_mode : 2; uint32_t invalid_key : 1; @@ -213,7 +215,9 @@ private: StringName subsurface_scattering; StringName refraction; StringName refraction_roughness; + StringName point_size; StringName texture_names[TEXTURE_MAX]; + }; static Mutex *material_mutex; @@ -240,6 +244,8 @@ private: float subsurface_scattering; float refraction; float refraction_roughness; + float line_width; + float point_size; DetailUV detail_uv; @@ -306,6 +312,12 @@ public: void set_refraction_roughness(float p_refraction_roughness); float get_refraction_roughness() const; + void set_line_width(float p_line_width); + float get_line_width() const; + + void set_point_size(float p_point_size); + float get_point_size() const; + void set_detail_uv(DetailUV p_detail_uv); DetailUV get_detail_uv() const; diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h index 78757c799f5..b61a2d0a464 100644 --- a/servers/visual/rasterizer.h +++ b/servers/visual/rasterizer.h @@ -54,11 +54,8 @@ public: virtual void environment_set_glow(RID p_env,bool p_enable,int p_radius,float p_intensity,float p_strength,float p_bloom_treshold,VS::EnvironmentGlowBlendMode p_blend_mode)=0; virtual void environment_set_fog(RID p_env,bool p_enable,float p_begin,float p_end,RID p_gradient_texture)=0; - virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,VS::EnvironmentToneMapper p_tone_mapper)=0; - virtual void environment_set_brightness(RID p_env,bool p_enable,float p_brightness)=0; - virtual void environment_set_contrast(RID p_env,bool p_enable,float p_contrast)=0; - virtual void environment_set_saturation(RID p_env,bool p_enable,float p_saturation)=0; - virtual void environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp)=0; + virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,float p_auto_exp_scale,VS::EnvironmentToneMapper p_tone_mapper)=0; + virtual void environment_set_adjustment(RID p_env,bool p_enable,float p_brightness,float p_contrast,float p_saturation,RID p_ramp)=0; struct InstanceBase : RID_Data { @@ -176,6 +173,8 @@ public: virtual void material_set_param(RID p_material, const StringName& p_param, const Variant& p_value)=0; virtual Variant material_get_param(RID p_material, const StringName& p_param) const=0; + virtual void material_set_line_width(RID p_material, float p_width)=0; + /* MESH API */ virtual RID mesh_create()=0; diff --git a/servers/visual/visual_server_raster.h b/servers/visual/visual_server_raster.h index 655ce410bcb..6c7048bd622 100644 --- a/servers/visual/visual_server_raster.h +++ b/servers/visual/visual_server_raster.h @@ -658,6 +658,10 @@ public: BIND3(material_set_param,RID, const StringName&, const Variant& ) BIND2RC(Variant,material_get_param,RID, const StringName& ) + BIND2(material_set_line_width,RID, float ) + + + /* MESH API */ BIND0R(RID,mesh_create) @@ -864,11 +868,8 @@ public: BIND7(environment_set_glow,RID,bool ,int ,float ,float ,float ,EnvironmentGlowBlendMode ) BIND5(environment_set_fog,RID,bool ,float ,float ,RID ) - BIND8(environment_set_tonemap,RID,bool ,float ,float ,float ,float ,float ,EnvironmentToneMapper ) - BIND3(environment_set_brightness,RID,bool ,float ) - BIND3(environment_set_contrast,RID,bool ,float ) - BIND3(environment_set_saturation,RID,bool ,float ) - BIND3(environment_set_color_correction,RID,bool ,RID ) + BIND9(environment_set_tonemap,RID,bool ,float ,float ,float ,float ,float,float ,EnvironmentToneMapper ) + BIND6(environment_set_adjustment,RID,bool ,float ,float ,float ,RID ) /* SCENARIO API */ diff --git a/servers/visual/visual_server_scene.cpp b/servers/visual/visual_server_scene.cpp index 2f0f8f263f4..d958ea4bdb9 100644 --- a/servers/visual/visual_server_scene.cpp +++ b/servers/visual/visual_server_scene.cpp @@ -692,22 +692,82 @@ void VisualServerScene::instance_set_extra_visibility_margin( RID p_instance, re } -// don't use these in a game! -Vector VisualServerScene::instances_cull_aabb(const AABB& p_aabb, RID p_scenario) const{ +Vector VisualServerScene::instances_cull_aabb(const AABB& p_aabb, RID p_scenario) const { - return Vector(); + + Vector instances; + Scenario *scenario=scenario_owner.get(p_scenario); + ERR_FAIL_COND_V(!scenario,instances); + + const_cast(this)->update_dirty_instances(); // check dirty instances before culling + + int culled=0; + Instance *cull[1024]; + culled=scenario->octree.cull_AABB(p_aabb,cull,1024); + + for (int i=0;iobject_ID==0) + continue; + + instances.push_back(instance->object_ID); + } + + return instances; } - Vector VisualServerScene::instances_cull_ray(const Vector3& p_from, const Vector3& p_to, RID p_scenario) const{ - return Vector(); -} -Vector VisualServerScene::instances_cull_convex(const Vector& p_convex, RID p_scenario) const { + Vector instances; + Scenario *scenario=scenario_owner.get(p_scenario); + ERR_FAIL_COND_V(!scenario,instances); + const_cast(this)->update_dirty_instances(); // check dirty instances before culling - return Vector(); -} + int culled=0; + Instance *cull[1024]; + culled=scenario->octree.cull_segment(p_from,p_to*10000,cull,1024); + for (int i=0;iobject_ID==0) + continue; + + instances.push_back(instance->object_ID); + } + + return instances; + +} +Vector VisualServerScene::instances_cull_convex(const Vector& p_convex, RID p_scenario) const{ + + Vector instances; + Scenario *scenario=scenario_owner.get(p_scenario); + ERR_FAIL_COND_V(!scenario,instances); + const_cast(this)->update_dirty_instances(); // check dirty instances before culling + + int culled=0; + Instance *cull[1024]; + + + culled=scenario->octree.cull_convex(p_convex,cull,1024); + + for (int i=0;iobject_ID==0) + continue; + + instances.push_back(instance->object_ID); + } + + return instances; + +} + void VisualServerScene::instance_geometry_set_flag(RID p_instance,VS::InstanceFlags p_flags,bool p_enabled){ Instance *instance = instance_owner.get( p_instance ); diff --git a/servers/visual/visual_server_scene.h b/servers/visual/visual_server_scene.h index 515d52db7bc..10a159f27a0 100644 --- a/servers/visual/visual_server_scene.h +++ b/servers/visual/visual_server_scene.h @@ -162,7 +162,7 @@ public: Scenario() { debug=VS::SCENARIO_DEBUG_DISABLED; } }; - RID_Owner scenario_owner; + mutable RID_Owner scenario_owner; static void* _instance_pair(void *p_self, OctreeElementID, Instance *p_A,int, OctreeElementID, Instance *p_B,int); static void _instance_unpair(void *p_self, OctreeElementID, Instance *p_A,int, OctreeElementID, Instance *p_B,int,void*); diff --git a/servers/visual_server.h b/servers/visual_server.h index bbe6c2c6831..78769c17f7b 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -172,6 +172,8 @@ public: virtual void material_set_param(RID p_material, const StringName& p_param, const Variant& p_value)=0; virtual Variant material_get_param(RID p_material, const StringName& p_param) const=0; + virtual void material_set_line_width(RID p_material, float p_width)=0; + /* MESH API */ enum ArrayType { @@ -512,15 +514,14 @@ public: enum EnvironmentToneMapper { ENV_TONE_MAPPER_LINEAR, + ENV_TONE_MAPPER_LOG, ENV_TONE_MAPPER_REINHARDT, - ENV_TONE_MAPPER_FILMIC + ENV_TONE_MAPPER_FILMIC, + ENV_TONE_MAPPER_ACES_FILMIC }; - virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,EnvironmentToneMapper p_tone_mapper)=0; - virtual void environment_set_brightness(RID p_env,bool p_enable,float p_brightness)=0; - virtual void environment_set_contrast(RID p_env,bool p_enable,float p_contrast)=0; - virtual void environment_set_saturation(RID p_env,bool p_enable,float p_saturation)=0; - virtual void environment_set_color_correction(RID p_env,bool p_enable,RID p_ramp)=0; + virtual void environment_set_tonemap(RID p_env,bool p_enable,float p_exposure,float p_white,float p_min_luminance,float p_max_luminance,float p_auto_exp_speed,float p_auto_exp_scale,EnvironmentToneMapper p_tone_mapper)=0; + virtual void environment_set_adjustment(RID p_env,bool p_enable,float p_brightness,float p_contrast,float p_saturation,RID p_ramp)=0; /* SCENARIO API */ diff --git a/tools/editor/editor_node.cpp b/tools/editor/editor_node.cpp index a4ee102aed5..99c2b468fa9 100644 --- a/tools/editor/editor_node.cpp +++ b/tools/editor/editor_node.cpp @@ -4144,7 +4144,7 @@ void EditorNode::register_editor_types() { ObjectTypeDB::register_type(); //ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); -// ObjectTypeDB::register_type(); + ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); ObjectTypeDB::register_type(); diff --git a/tools/editor/plugins/spatial_editor_plugin.cpp b/tools/editor/plugins/spatial_editor_plugin.cpp index 7a24ab41802..13c69692c7f 100644 --- a/tools/editor/plugins/spatial_editor_plugin.cpp +++ b/tools/editor/plugins/spatial_editor_plugin.cpp @@ -2553,6 +2553,7 @@ void SpatialEditor::_generate_selection_box() { mat->set_albedo(Color(1,1,1)); mat->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT,true); mat->set_flag(FixedSpatialMaterial::FLAG_ALBEDO_FROM_VERTEX_COLOR,true); + mat->set_flag(FixedSpatialMaterial::FLAG_SRGB_VERTEX_COLOR,true); st->set_material(mat); selection_box = st->commit(); } @@ -3143,6 +3144,8 @@ void SpatialEditor::_init_indicators() { indicator_mat->set_flag(FixedSpatialMaterial::FLAG_UNSHADED,true); indicator_mat->set_flag(FixedSpatialMaterial::FLAG_ONTOP,true); indicator_mat->set_flag(FixedSpatialMaterial::FLAG_ALBEDO_FROM_VERTEX_COLOR,true); + indicator_mat->set_flag(FixedSpatialMaterial::FLAG_SRGB_VERTEX_COLOR,true); + indicator_mat->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT,true); DVector grid_colors[3]; @@ -3543,7 +3546,7 @@ void SpatialEditor::_notification(int p_what) { if (p_what==NOTIFICATION_ENTER_TREE) { - //gizmos = memnew( SpatialEditorGizmos ); + gizmos = memnew( SpatialEditorGizmos ); _init_indicators(); _update_default_light_angle(); } @@ -3551,7 +3554,7 @@ void SpatialEditor::_notification(int p_what) { if (p_what==NOTIFICATION_EXIT_TREE) { _finish_indicators(); -// memdelete( gizmos ); + memdelete( gizmos ); } } @@ -3597,7 +3600,7 @@ void SpatialEditor::_request_gizmo(Object* p_obj) { } if (!seg.is_valid()) { - // seg = gizmos->get_gizmo(sp); + seg = gizmos->get_gizmo(sp); } if (seg.is_valid()) { diff --git a/tools/editor/plugins/spatial_editor_plugin.h b/tools/editor/plugins/spatial_editor_plugin.h index 8c8a80bc3ae..1617acaefb9 100644 --- a/tools/editor/plugins/spatial_editor_plugin.h +++ b/tools/editor/plugins/spatial_editor_plugin.h @@ -467,7 +467,7 @@ private: static SpatialEditor *singleton; void _node_removed(Node* p_node); - //SpatialEditorGizmos *gizmos; + SpatialEditorGizmos *gizmos; SpatialEditor(); void _update_ambient_light_color(const Color& p_color); diff --git a/tools/editor/spatial_editor_gizmos.cpp b/tools/editor/spatial_editor_gizmos.cpp index bc76f6c20c9..b1accac762b 100644 --- a/tools/editor/spatial_editor_gizmos.cpp +++ b/tools/editor/spatial_editor_gizmos.cpp @@ -41,7 +41,7 @@ // Keep small children away from this file. // It's so ugly it will eat them alive -#if 0 + #define HANDLE_HALF_SIZE 0.05 @@ -84,7 +84,6 @@ void EditorSpatialGizmo::Instance::create_instance(Spatial *p_base) { if (extra_margin) VS::get_singleton()->instance_set_extra_visibility_margin(instance,1); VS::get_singleton()->instance_geometry_set_cast_shadows_setting(instance,VS::SHADOW_CASTING_SETTING_OFF); - VS::get_singleton()->instance_geometry_set_flag(instance,VS::INSTANCE_FLAG_RECEIVE_SHADOWS,false); VS::get_singleton()->instance_set_layer_mask(instance,1<get_parameter(Light::PARAM_RADIUS); + return light->get_param(Light::PARAM_RANGE); if (p_idx==1) - return light->get_parameter(Light::PARAM_SPOT_ANGLE); + return light->get_param(Light::PARAM_SPOT_ANGLE); return Variant(); } @@ -729,7 +728,7 @@ void LightSpatialGizmo::set_handle(int p_idx,Camera *p_camera, const Point2& p_p if (d<0) d=0; - light->set_parameter(Light::PARAM_RADIUS,d); + light->set_param(Light::PARAM_RANGE,d); } else if (light->cast_to()) { Plane cp=Plane( gt.origin, p_camera->get_transform().basis.get_axis(2)); @@ -738,15 +737,15 @@ void LightSpatialGizmo::set_handle(int p_idx,Camera *p_camera, const Point2& p_p if (cp.intersects_ray(ray_from,ray_dir,&inters)) { float r = inters.distance_to(gt.origin); - light->set_parameter(Light::PARAM_RADIUS,r); + light->set_param(Light::PARAM_RANGE,r); } } } else if (p_idx==1) { - float a = _find_closest_angle_to_half_pi_arc(s[0],s[1],light->get_parameter(Light::PARAM_RADIUS),gt); - light->set_parameter(Light::PARAM_SPOT_ANGLE,CLAMP(a,0.01,89.99)); + float a = _find_closest_angle_to_half_pi_arc(s[0],s[1],light->get_param(Light::PARAM_RANGE),gt); + light->set_param(Light::PARAM_SPOT_ANGLE,CLAMP(a,0.01,89.99)); } } @@ -754,21 +753,21 @@ void LightSpatialGizmo::commit_handle(int p_idx,const Variant& p_restore,bool p_ if (p_cancel) { - light->set_parameter(p_idx==0?Light::PARAM_RADIUS:Light::PARAM_SPOT_ANGLE,p_restore); + light->set_param(p_idx==0?Light::PARAM_RANGE:Light::PARAM_SPOT_ANGLE,p_restore); } else if (p_idx==0) { UndoRedo *ur = SpatialEditor::get_singleton()->get_undo_redo(); ur->create_action(TTR("Change Light Radius")); - ur->add_do_method(light,"set_parameter",Light::PARAM_RADIUS,light->get_parameter(Light::PARAM_RADIUS)); - ur->add_undo_method(light,"set_parameter",Light::PARAM_RADIUS,p_restore); + ur->add_do_method(light,"set_param",Light::PARAM_RANGE,light->get_param(Light::PARAM_RANGE)); + ur->add_undo_method(light,"set_param",Light::PARAM_RANGE,p_restore); ur->commit_action(); } else if (p_idx==1) { UndoRedo *ur = SpatialEditor::get_singleton()->get_undo_redo(); ur->create_action(TTR("Change Light Radius")); - ur->add_do_method(light,"set_parameter",Light::PARAM_SPOT_ANGLE,light->get_parameter(Light::PARAM_SPOT_ANGLE)); - ur->add_undo_method(light,"set_parameter",Light::PARAM_SPOT_ANGLE,p_restore); + ur->add_do_method(light,"set_param",Light::PARAM_SPOT_ANGLE,light->get_param(Light::PARAM_SPOT_ANGLE)); + ur->add_undo_method(light,"set_param",Light::PARAM_SPOT_ANGLE,p_restore); ur->commit_action(); } @@ -831,7 +830,7 @@ void LightSpatialGizmo::redraw() { OmniLight *on = light->cast_to(); - float r = on->get_parameter(Light::PARAM_RADIUS); + float r = on->get_param(Light::PARAM_RANGE); Vector points; @@ -871,9 +870,9 @@ void LightSpatialGizmo::redraw() { Vector points; SpotLight *on = light->cast_to(); - float r = on->get_parameter(Light::PARAM_RADIUS); - float w = r*Math::sin(Math::deg2rad(on->get_parameter(Light::PARAM_SPOT_ANGLE))); - float d = r*Math::cos(Math::deg2rad(on->get_parameter(Light::PARAM_SPOT_ANGLE))); + float r = on->get_param(Light::PARAM_RANGE); + float w = r*Math::sin(Math::deg2rad(on->get_param(Light::PARAM_SPOT_ANGLE))); + float d = r*Math::cos(Math::deg2rad(on->get_param(Light::PARAM_SPOT_ANGLE))); @@ -1543,7 +1542,7 @@ void RayCastSpatialGizmo::redraw() { } -RayCastSpatialGizmo::RayCastSpatialGizmo(RayCast* p_raycast){ +RayCastSpatialGizmo::RayCastSpatialGizmo(RayCast* p_raycast) { set_spatial_node(p_raycast); raycast=p_raycast; @@ -2979,11 +2978,12 @@ Ref SpatialEditorGizmos::get_gizmo(Spatial *p_spatial) { Ref SpatialEditorGizmos::create_line_material(const Color& p_base_color) { Ref line_material = Ref( memnew( FixedSpatialMaterial )); - line_material->set_flag(Material::FLAG_UNSHADED, true); + line_material->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); line_material->set_line_width(3.0); - line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); - line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, true); - line_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,p_base_color); + line_material->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); + line_material->set_flag(FixedSpatialMaterial::FLAG_ALBEDO_FROM_VERTEX_COLOR, true); + line_material->set_flag(FixedSpatialMaterial::FLAG_SRGB_VERTEX_COLOR, true); + line_material->set_albedo(p_base_color); return line_material; @@ -2992,9 +2992,9 @@ Ref SpatialEditorGizmos::create_line_material(const Color& Ref SpatialEditorGizmos::create_solid_material(const Color& p_base_color) { Ref line_material = Ref( memnew( FixedSpatialMaterial )); - line_material->set_flag(Material::FLAG_UNSHADED, true); - line_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); - line_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,p_base_color); + line_material->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + line_material->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); + line_material->set_albedo(p_base_color); return line_material; @@ -3005,56 +3005,59 @@ SpatialEditorGizmos::SpatialEditorGizmos() { singleton=this; handle_material = Ref( memnew( FixedSpatialMaterial )); - handle_material->set_flag(Material::FLAG_UNSHADED, true); - handle_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(0.8,0.8,0.8)); + handle_material->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + handle_material->set_albedo(Color(0.8,0.8,0.8)); handle2_material = Ref( memnew( FixedSpatialMaterial )); - handle2_material->set_flag(Material::FLAG_UNSHADED, true); - handle2_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_POINT_SIZE, true); + handle2_material->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + handle2_material->set_flag(FixedSpatialMaterial::FLAG_USE_POINT_SIZE, true); handle_t = SpatialEditor::get_singleton()->get_icon("Editor3DHandle","EditorIcons"); handle2_material->set_point_size(handle_t->get_width()); - handle2_material->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,handle_t); - handle2_material->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1)); - handle2_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); - handle2_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, true); + handle2_material->set_texture(FixedSpatialMaterial::TEXTURE_ALBEDO,handle_t); + handle2_material->set_albedo(Color(1,1,1)); + handle2_material->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); + handle2_material->set_flag(FixedSpatialMaterial::FLAG_ALBEDO_FROM_VERTEX_COLOR, true); + handle2_material->set_flag(FixedSpatialMaterial::FLAG_SRGB_VERTEX_COLOR, true); light_material = create_line_material(Color(1,1,0.2)); light_material_omni_icon = Ref( memnew( FixedSpatialMaterial )); - light_material_omni_icon->set_flag(Material::FLAG_UNSHADED, true); - light_material_omni_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); - light_material_omni_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - light_material_omni_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); - light_material_omni_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); - light_material_omni_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoLight","EditorIcons")); + light_material_omni_icon->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + light_material_omni_icon->set_cull_mode(FixedSpatialMaterial::CULL_DISABLED); + light_material_omni_icon->set_depth_draw_mode(FixedSpatialMaterial::DEPTH_DRAW_DISABLED); + light_material_omni_icon->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); + light_material_omni_icon->set_albedo(Color(1,1,1,0.9)); + light_material_omni_icon->set_texture(FixedSpatialMaterial::TEXTURE_ALBEDO,SpatialEditor::get_singleton()->get_icon("GizmoLight","EditorIcons")); light_material_directional_icon = Ref( memnew( FixedSpatialMaterial )); - light_material_directional_icon->set_flag(Material::FLAG_UNSHADED, true); - light_material_directional_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); - light_material_directional_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - light_material_directional_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); - light_material_directional_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); - light_material_directional_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoDirectionalLight","EditorIcons")); + light_material_directional_icon->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + light_material_directional_icon->set_cull_mode(FixedSpatialMaterial::CULL_DISABLED); + light_material_directional_icon->set_depth_draw_mode(FixedSpatialMaterial::DEPTH_DRAW_DISABLED); + light_material_directional_icon->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); + light_material_directional_icon->set_albedo(Color(1,1,1,0.9)); + light_material_directional_icon->set_texture(FixedSpatialMaterial::TEXTURE_ALBEDO,SpatialEditor::get_singleton()->get_icon("GizmoDirectionalLight","EditorIcons")); camera_material = create_line_material(Color(1.0,0.5,1.0)); navmesh_edge_material = create_line_material(Color(0.1,0.8,1.0)); navmesh_solid_material = create_solid_material(Color(0.1,0.8,1.0,0.4)); - navmesh_edge_material->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, false); - navmesh_solid_material->set_flag(Material::FLAG_DOUBLE_SIDED,true); + navmesh_edge_material->set_flag(FixedSpatialMaterial::FLAG_ALBEDO_FROM_VERTEX_COLOR, false); + navmesh_edge_material->set_flag(FixedSpatialMaterial::FLAG_SRGB_VERTEX_COLOR, false); + navmesh_solid_material->set_cull_mode(FixedSpatialMaterial::CULL_DISABLED); navmesh_edge_material_disabled = create_line_material(Color(1.0,0.8,0.1)); navmesh_solid_material_disabled = create_solid_material(Color(1.0,0.8,0.1,0.4)); - navmesh_edge_material_disabled->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, false); - navmesh_solid_material_disabled->set_flag(Material::FLAG_DOUBLE_SIDED,true); + navmesh_edge_material_disabled->set_flag(FixedSpatialMaterial::FLAG_ALBEDO_FROM_VERTEX_COLOR, false); + navmesh_edge_material_disabled->set_flag(FixedSpatialMaterial::FLAG_SRGB_VERTEX_COLOR, false); + navmesh_solid_material_disabled->set_cull_mode(FixedSpatialMaterial::CULL_DISABLED); skeleton_material = create_line_material(Color(0.6,1.0,0.3)); - skeleton_material->set_flag(Material::FLAG_DOUBLE_SIDED,true); - skeleton_material->set_flag(Material::FLAG_UNSHADED,true); - skeleton_material->set_flag(Material::FLAG_ONTOP,true); - skeleton_material->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); + skeleton_material->set_cull_mode(FixedSpatialMaterial::CULL_DISABLED); + skeleton_material->set_flag(FixedSpatialMaterial::FLAG_UNSHADED,true); + skeleton_material->set_flag(FixedSpatialMaterial::FLAG_ONTOP,true); + skeleton_material->set_depth_draw_mode(FixedSpatialMaterial::DEPTH_DRAW_DISABLED); //position 3D Shared mesh @@ -3078,9 +3081,10 @@ SpatialEditorGizmos::SpatialEditorGizmos() { cursor_colors.push_back(Color(0.5,0.5,1,0.7)); Ref mat = memnew( FixedSpatialMaterial ); - mat->set_flag(Material::FLAG_UNSHADED,true); - mat->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY,true); - mat->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA,true); + mat->set_flag(FixedSpatialMaterial::FLAG_UNSHADED,true); + mat->set_flag(FixedSpatialMaterial::FLAG_ALBEDO_FROM_VERTEX_COLOR,true); + mat->set_flag(FixedSpatialMaterial::FLAG_SRGB_VERTEX_COLOR,true); + mat->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT,true); mat->set_line_width(3); Array d; d.resize(VS::ARRAY_MAX); @@ -3101,9 +3105,10 @@ SpatialEditorGizmos::SpatialEditorGizmos() { cursor_colors.push_back(Color(0.5, 0.5, 0.5, 0.7)); Ref mat = memnew(FixedSpatialMaterial); - mat->set_flag(Material::FLAG_UNSHADED, true); - mat->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_COLOR_ARRAY, true); - mat->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); + mat->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + mat->set_flag(FixedSpatialMaterial::FLAG_ALBEDO_FROM_VERTEX_COLOR, true); + mat->set_flag(FixedSpatialMaterial::FLAG_SRGB_VERTEX_COLOR, true); + mat->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); mat->set_line_width(3); Array d; d.resize(VS::ARRAY_MAX); @@ -3115,12 +3120,12 @@ SpatialEditorGizmos::SpatialEditorGizmos() { sample_player_icon = Ref( memnew( FixedSpatialMaterial )); - sample_player_icon->set_flag(Material::FLAG_UNSHADED, true); - sample_player_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); - sample_player_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - sample_player_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); - sample_player_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); - sample_player_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoSpatialSamplePlayer","EditorIcons")); + sample_player_icon->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + sample_player_icon->set_cull_mode(FixedSpatialMaterial::CULL_DISABLED); + sample_player_icon->set_depth_draw_mode(FixedSpatialMaterial::DEPTH_DRAW_DISABLED); + sample_player_icon->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); + sample_player_icon->set_albedo(Color(1,1,1,0.9)); + sample_player_icon->set_texture(FixedSpatialMaterial::TEXTURE_ALBEDO,SpatialEditor::get_singleton()->get_icon("GizmoSpatialSamplePlayer","EditorIcons")); room_material = create_line_material(Color(1.0,0.6,0.9)); portal_material = create_line_material(Color(1.0,0.8,0.6)); @@ -3130,28 +3135,28 @@ SpatialEditorGizmos::SpatialEditorGizmos() { joint_material = create_line_material(Color(0.6,0.8,1.0)); stream_player_icon = Ref( memnew( FixedSpatialMaterial )); - stream_player_icon->set_flag(Material::FLAG_UNSHADED, true); - stream_player_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); - stream_player_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - stream_player_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); - stream_player_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); - stream_player_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("GizmoSpatialStreamPlayer","EditorIcons")); + stream_player_icon->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + stream_player_icon->set_cull_mode(FixedSpatialMaterial::CULL_DISABLED); + stream_player_icon->set_depth_draw_mode(FixedSpatialMaterial::DEPTH_DRAW_DISABLED); + stream_player_icon->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); + stream_player_icon->set_albedo(Color(1,1,1,0.9)); + stream_player_icon->set_texture(FixedSpatialMaterial::TEXTURE_ALBEDO,SpatialEditor::get_singleton()->get_icon("GizmoSpatialStreamPlayer","EditorIcons")); visibility_notifier_icon = Ref( memnew( FixedSpatialMaterial )); - visibility_notifier_icon->set_flag(Material::FLAG_UNSHADED, true); - visibility_notifier_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); - visibility_notifier_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - visibility_notifier_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); - visibility_notifier_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE,Color(1,1,1,0.9)); - visibility_notifier_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE,SpatialEditor::get_singleton()->get_icon("Visible","EditorIcons")); + visibility_notifier_icon->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + visibility_notifier_icon->set_cull_mode(FixedSpatialMaterial::CULL_DISABLED); + visibility_notifier_icon->set_depth_draw_mode(FixedSpatialMaterial::DEPTH_DRAW_DISABLED); + visibility_notifier_icon->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); + visibility_notifier_icon->set_albedo(Color(1,1,1,0.9)); + visibility_notifier_icon->set_texture(FixedSpatialMaterial::TEXTURE_ALBEDO,SpatialEditor::get_singleton()->get_icon("Visible","EditorIcons")); listener_icon = Ref(memnew(FixedSpatialMaterial)); - listener_icon->set_flag(Material::FLAG_UNSHADED, true); - listener_icon->set_flag(Material::FLAG_DOUBLE_SIDED, true); - listener_icon->set_depth_draw_mode(Material::DEPTH_DRAW_NEVER); - listener_icon->set_fixed_flag(FixedSpatialMaterial::FLAG_USE_ALPHA, true); - listener_icon->set_parameter(FixedSpatialMaterial::PARAM_DIFFUSE, Color(1, 1, 1, 0.9)); - listener_icon->set_texture(FixedSpatialMaterial::PARAM_DIFFUSE, SpatialEditor::get_singleton()->get_icon("GizmoListener", "EditorIcons")); + listener_icon->set_flag(FixedSpatialMaterial::FLAG_UNSHADED, true); + listener_icon->set_cull_mode(FixedSpatialMaterial::CULL_DISABLED); + listener_icon->set_depth_draw_mode(FixedSpatialMaterial::DEPTH_DRAW_DISABLED); + listener_icon->set_feature(FixedSpatialMaterial::FEATURE_TRANSPARENT, true); + listener_icon->set_albedo( Color(1, 1, 1, 0.9)); + listener_icon->set_texture(FixedSpatialMaterial::TEXTURE_ALBEDO, SpatialEditor::get_singleton()->get_icon("GizmoListener", "EditorIcons")); { @@ -3200,4 +3205,4 @@ SpatialEditorGizmos::SpatialEditorGizmos() { } -#endif + diff --git a/tools/editor/spatial_editor_gizmos.h b/tools/editor/spatial_editor_gizmos.h index 64c6a6e8249..2c0033cdcae 100644 --- a/tools/editor/spatial_editor_gizmos.h +++ b/tools/editor/spatial_editor_gizmos.h @@ -52,7 +52,7 @@ class Camera; -#if 0 + class EditorSpatialGizmo : public SpatialEditorGizmo { OBJ_TYPE(EditorSpatialGizmo,SpatialGizmo); @@ -505,5 +505,4 @@ public: SpatialEditorGizmos(); }; -#endif #endif // SPATIAL_EDITOR_GIZMOS_H From acfa606915416a85106817974e5ba2ec3518c203 Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Sat, 29 Oct 2016 22:11:05 -0300 Subject: [PATCH 020/223] resolved reflection cubemap blending --- drivers/gles3/rasterizer_storage_gles3.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index a678c460f59..4b2c6072970 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -57,7 +57,7 @@ #define _EXT_ATC_RGBA_INTERPOLATED_ALPHA_AMD 0x87EE - +#define _EXT_TEXTURE_CUBE_MAP_SEAMLESS 0x884F #define _GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE #define _GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT 0x84FF @@ -3946,6 +3946,8 @@ void RasterizerStorageGLES3::initialize() { } shaders.cubemap_filter.init(); + + glEnable(_EXT_TEXTURE_CUBE_MAP_SEAMLESS); } void RasterizerStorageGLES3::finalize() { From 0aebddafc1636f3e7628cdac8028c2ce69c5abe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Sun, 30 Oct 2016 15:13:44 +0100 Subject: [PATCH 021/223] Revert "make `Container` node aware of `Size Flags`" This reverts commit 6ed15e995dd83c1cf6808f261066580a1f8bc297. Fixes #6974. --- scene/gui/container.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/scene/gui/container.cpp b/scene/gui/container.cpp index 83a4f342827..feaf516f42d 100644 --- a/scene/gui/container.cpp +++ b/scene/gui/container.cpp @@ -151,18 +151,6 @@ void Container::_notification(int p_what) { queue_sort(); } } break; - case NOTIFICATION_SORT_CHILDREN: { - - Size2 s = get_size(); - - for (int i=0; icast_to(); - if (!c || !c->is_visible() || c->is_set_as_toplevel()) - continue; - - fit_child_in_rect(c,Rect2(0, 0, s.width, s.height)); - } - } } } From 696c47d9aba44edb5ca55f8c6a27381170eb4237 Mon Sep 17 00:00:00 2001 From: volzhs Date: Sun, 30 Oct 2016 23:21:59 +0900 Subject: [PATCH 022/223] Fix bug with saving last select language and remove warning --- tools/editor/editor_settings.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/editor/editor_settings.cpp b/tools/editor/editor_settings.cpp index f5741c4a9e8..78bfd7cf259 100644 --- a/tools/editor/editor_settings.cpp +++ b/tools/editor/editor_settings.cpp @@ -1032,7 +1032,6 @@ String EditorSettings::get_last_selected_language() String path = get_project_settings_path().plus_file("project_metadata.cfg"); Error err = cf->load(path); if (err != OK) { - WARN_PRINTS("Can't load config file: " + path); return ""; } Variant last_selected_language = cf->get_value("script_setup", "last_selected_language"); @@ -1045,11 +1044,7 @@ void EditorSettings::set_last_selected_language(String p_language) { Ref cf = memnew( ConfigFile ); String path = get_project_settings_path().plus_file("project_metadata.cfg"); - Error err = cf->load(path); - if (err != OK) { - WARN_PRINTS("Can't load config file: " + path); - return; - } + cf->load(path); cf->set_value("script_setup", "last_selected_language", p_language); cf->save(path); } From 513820ab276e06b7a5f14b3437dc21d9a320c1a2 Mon Sep 17 00:00:00 2001 From: volzhs Date: Mon, 31 Oct 2016 00:07:16 +0900 Subject: [PATCH 023/223] Fix 2 search menus are shown --- tools/editor/plugins/script_editor_plugin.cpp | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tools/editor/plugins/script_editor_plugin.cpp b/tools/editor/plugins/script_editor_plugin.cpp index 99c50efd2fc..b6ce3e89eb6 100644 --- a/tools/editor/plugins/script_editor_plugin.cpp +++ b/tools/editor/plugins/script_editor_plugin.cpp @@ -1909,19 +1909,14 @@ void ScriptEditor::_update_selected_editor_menu() { se->get_edit_menu()->hide(); } - EditorHelp *eh = tab_container->get_child(i)->cast_to(); - - if (eh) { - - if (current) - script_search_menu->show(); - else - script_search_menu->hide(); - } - - } + EditorHelp *eh=tab_container->get_current_tab_control()->cast_to(); + if (eh) { + script_search_menu->show(); + } else { + script_search_menu->hide(); + } } void ScriptEditor::_update_history_pos(int p_new_pos) { From e34a5324c884960735b3f743956b3a052574d6ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Sun, 30 Oct 2016 17:04:07 +0100 Subject: [PATCH 024/223] scons: Move lib splitting method to methods.py Apparently it might still be necessary for some console ports. --- SConstruct | 1 + drivers/SCsub | 9 ++++++--- methods.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/SConstruct b/SConstruct index 72de2e2004a..fb76014e4a5 100644 --- a/SConstruct +++ b/SConstruct @@ -100,6 +100,7 @@ env_base.__class__.disable_module = methods.disable_module env_base.__class__.add_source_files = methods.add_source_files env_base.__class__.use_windows_spawn_fix = methods.use_windows_spawn_fix +env_base.__class__.split_lib = methods.split_lib env_base["x86_libtheora_opt_gcc"]=False env_base["x86_libtheora_opt_vc"]=False diff --git a/drivers/SCsub b/drivers/SCsub index 9c561b5c13b..e9a6e3b983b 100644 --- a/drivers/SCsub +++ b/drivers/SCsub @@ -35,6 +35,9 @@ if (env["tools"]=="yes"): if env['vsproj']=="yes": env.AddToVSProject(env.drivers_sources) -env.add_source_files(env.drivers_sources,"*.cpp") -lib = env.Library("drivers",env.drivers_sources) -env.Prepend(LIBS=[lib]) +if env.split_drivers: + env.split_lib("drivers") +else: + env.add_source_files(env.drivers_sources,"*.cpp") + lib = env.Library("drivers",env.drivers_sources) + env.Prepend(LIBS=[lib]) diff --git a/methods.py b/methods.py index 26c1c6e68e0..8a195067614 100755 --- a/methods.py +++ b/methods.py @@ -1402,6 +1402,50 @@ def use_windows_spawn_fix(self, platform=None): self['SPAWN'] = mySpawn +def split_lib(self, libname): + import string + env = self + + num = 0 + cur_base = "" + max_src = 64 + list = [] + lib_list = [] + + for f in getattr(env, libname + "_sources"): + fname = "" + if type(f) == type(""): + fname = env.File(f).path + else: + fname = env.File(f)[0].path + fname = fname.replace("\\", "/") + base = string.join(fname.split("/")[:2], "/") + if base != cur_base and len(list) > max_src: + if num > 0: + lib = env.Library(libname + str(num), list) + lib_list.append(lib) + list = [] + num = num + 1 + cur_base = base + list.append(f) + + lib = env.Library(libname + str(num), list) + lib_list.append(lib) + + if len(lib_list) > 0: + import os, sys + if os.name == 'posix' and sys.platform == 'msys': + env.Replace(ARFLAGS = ['rcsT']) + lib = env.Library(libname + "_collated", lib_list) + lib_list = [lib] + + lib_base = [] + env.add_source_files(lib_base, "*.cpp") + lib_list.insert(0, env.Library(libname, lib_base)) + + env.Prepend(LIBS = lib_list) + + def save_active_platforms(apnames,ap): for x in ap: From ee69bd81cfa67eb5c642604f0f43d711ab370faf Mon Sep 17 00:00:00 2001 From: Fabio Alessandrelli Date: Thu, 27 Oct 2016 05:54:42 +0200 Subject: [PATCH 025/223] TCPServer listen now default to IP type ANY (v6 socket with v4 support) --- core/io/packet_peer_udp.h | 2 +- core/io/tcp_server.h | 4 ++-- drivers/unix/tcp_server_posix.h | 2 +- platform/windows/tcp_server_winsock.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/io/packet_peer_udp.h b/core/io/packet_peer_udp.h index c0806a9e6a7..6c79104e43c 100644 --- a/core/io/packet_peer_udp.h +++ b/core/io/packet_peer_udp.h @@ -46,7 +46,7 @@ protected: public: - virtual Error listen(int p_port, IP_Address::AddrType p_address_type = IP_Address::TYPE_IPV4, int p_recv_buffer_size=65536)=0; + virtual Error listen(int p_port, IP_Address::AddrType p_address_type = IP_Address::TYPE_ANY, int p_recv_buffer_size=65536)=0; virtual void close()=0; virtual Error wait()=0; virtual bool is_listening() const=0; diff --git a/core/io/tcp_server.h b/core/io/tcp_server.h index 883a3ef2f63..02678950086 100644 --- a/core/io/tcp_server.h +++ b/core/io/tcp_server.h @@ -41,11 +41,11 @@ protected: static TCP_Server* (*_create)(); //bind helper - Error _listen(uint16_t p_port, IP_Address::AddrType p_type = IP_Address::TYPE_IPV4 ,DVector p_accepted_hosts=DVector()); + Error _listen(uint16_t p_port, IP_Address::AddrType p_type = IP_Address::TYPE_ANY ,DVector p_accepted_hosts=DVector()); static void _bind_methods(); public: - virtual Error listen(uint16_t p_port, IP_Address::AddrType p_type = IP_Address::TYPE_IPV4, const List *p_accepted_hosts=NULL)=0; + virtual Error listen(uint16_t p_port, IP_Address::AddrType p_type = IP_Address::TYPE_ANY, const List *p_accepted_hosts=NULL)=0; virtual bool is_connection_available() const=0; virtual Ref take_connection()=0; diff --git a/drivers/unix/tcp_server_posix.h b/drivers/unix/tcp_server_posix.h index 134ec494cb2..a700b6ae0e6 100644 --- a/drivers/unix/tcp_server_posix.h +++ b/drivers/unix/tcp_server_posix.h @@ -40,7 +40,7 @@ class TCPServerPosix : public TCP_Server { public: - virtual Error listen(uint16_t p_port, IP_Address::AddrType p_type = IP_Address::TYPE_IPV4, const List *p_accepted_hosts=NULL); + virtual Error listen(uint16_t p_port, IP_Address::AddrType p_type, const List *p_accepted_hosts=NULL); virtual bool is_connection_available() const; virtual Ref take_connection(); diff --git a/platform/windows/tcp_server_winsock.h b/platform/windows/tcp_server_winsock.h index 94601a2e12f..115fdb42346 100644 --- a/platform/windows/tcp_server_winsock.h +++ b/platform/windows/tcp_server_winsock.h @@ -39,7 +39,7 @@ class TCPServerWinsock : public TCP_Server { public: - virtual Error listen(uint16_t p_port, IP_Address::AddrType p_type = IP_Address::TYPE_IPV4,const List *p_accepted_hosts=NULL); + virtual Error listen(uint16_t p_port, IP_Address::AddrType p_type,const List *p_accepted_hosts=NULL); virtual bool is_connection_available() const; virtual Ref take_connection(); From 812908e236e83db368dfef49b8badb9a6182e1de Mon Sep 17 00:00:00 2001 From: Fabio Alessandrelli Date: Fri, 28 Oct 2016 04:18:17 +0200 Subject: [PATCH 026/223] Fix windows debugger connection problems. Unify network socket creation between platform. Ensure IPV6_V6ONLY flag is not set on sockets (allow IPv4 connection in IPv6 socket, dual-stack). --- drivers/unix/packet_peer_udp_posix.cpp | 8 +------- drivers/unix/socket_helpers.h | 21 ++++++++++++++++++++ drivers/unix/stream_peer_tcp_posix.cpp | 4 ++-- drivers/unix/tcp_server_posix.cpp | 4 ++-- platform/windows/packet_peer_udp_winsock.cpp | 6 +----- platform/windows/stream_peer_winsock.cpp | 4 ++-- platform/windows/tcp_server_winsock.cpp | 2 +- 7 files changed, 30 insertions(+), 19 deletions(-) diff --git a/drivers/unix/packet_peer_udp_posix.cpp b/drivers/unix/packet_peer_udp_posix.cpp index 0b172b6a51a..01c26420a84 100644 --- a/drivers/unix/packet_peer_udp_posix.cpp +++ b/drivers/unix/packet_peer_udp_posix.cpp @@ -121,8 +121,6 @@ int PacketPeerUDPPosix::get_max_packet_size() const{ Error PacketPeerUDPPosix::listen(int p_port, IP_Address::AddrType p_address_type, int p_recv_buffer_size) { - ERR_FAIL_COND_V(p_address_type != IP_Address::TYPE_IPV4 && p_address_type != IP_Address::TYPE_IPV6, ERR_INVALID_PARAMETER); - close(); int sock = _get_socket(p_address_type); if (sock == -1 ) @@ -223,11 +221,7 @@ int PacketPeerUDPPosix::_get_socket(IP_Address::AddrType p_type) { if (sockfd != -1) return sockfd; - int family = p_type == IP_Address::TYPE_IPV6 ? AF_INET6 : AF_INET; - - sockfd = socket(family, SOCK_DGRAM, IPPROTO_UDP); - ERR_FAIL_COND_V( sockfd == -1, -1 ); - //fcntl(sockfd, F_SETFL, O_NONBLOCK); + sockfd = _socket_create(p_type, SOCK_DGRAM, IPPROTO_UDP); return sockfd; } diff --git a/drivers/unix/socket_helpers.h b/drivers/unix/socket_helpers.h index 4ee40e3d3d1..b432a028b3b 100644 --- a/drivers/unix/socket_helpers.h +++ b/drivers/unix/socket_helpers.h @@ -45,6 +45,27 @@ static size_t _set_listen_sockaddr(struct sockaddr_storage* p_addr, int p_port, }; }; +static int _socket_create(IP_Address::AddrType p_type, int type, int protocol) { + + ERR_FAIL_COND_V(p_type > IP_Address::TYPE_ANY || p_type < IP_Address::TYPE_NONE, ERR_INVALID_PARAMETER); + + int family = p_type == IP_Address::TYPE_IPV4 ? AF_INET : AF_INET6; + int sockfd = socket(family, type, protocol); + + ERR_FAIL_COND_V( sockfd == -1, -1 ); + + if(family == AF_INET6) { + // Ensure IPv4 over IPv6 is enabled + int v6_only = 0; + if(setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&v6_only, sizeof(v6_only)) != 0) { + WARN_PRINT("Unable to set IPv4 address mapping over IPv6"); + } + } + + return sockfd; +} + + static void _set_ip_addr_port(IP_Address& r_ip, int& r_port, struct sockaddr_storage* p_addr) { if (p_addr->ss_family == AF_INET) { diff --git a/drivers/unix/stream_peer_tcp_posix.cpp b/drivers/unix/stream_peer_tcp_posix.cpp index f2915b0a163..b1636abd692 100644 --- a/drivers/unix/stream_peer_tcp_posix.cpp +++ b/drivers/unix/stream_peer_tcp_posix.cpp @@ -137,8 +137,8 @@ Error StreamPeerTCPPosix::connect(const IP_Address& p_host, uint16_t p_port) { ERR_FAIL_COND_V( p_host.type == IP_Address::TYPE_NONE, ERR_INVALID_PARAMETER); - int family = p_host.type == IP_Address::TYPE_IPV6 ? AF_INET6 : AF_INET; - if ((sockfd = socket(family, SOCK_STREAM, 0)) == -1) { + sockfd = _socket_create(p_host.type, SOCK_STREAM, IPPROTO_TCP); + if (sockfd == -1) { ERR_PRINT("Socket creation failed!"); disconnect(); //perror("socket"); diff --git a/drivers/unix/tcp_server_posix.cpp b/drivers/unix/tcp_server_posix.cpp index 05b739804c0..32fda618b24 100644 --- a/drivers/unix/tcp_server_posix.cpp +++ b/drivers/unix/tcp_server_posix.cpp @@ -71,8 +71,8 @@ void TCPServerPosix::make_default() { Error TCPServerPosix::listen(uint16_t p_port, IP_Address::AddrType p_type, const List *p_accepted_hosts) { int sockfd; - int family = p_type == IP_Address::TYPE_IPV6 ? AF_INET6 : AF_INET; - sockfd = socket(family, SOCK_STREAM, 0); + sockfd = _socket_create(p_type, SOCK_STREAM, IPPROTO_TCP); + ERR_FAIL_COND_V(sockfd == -1, FAILED); #ifndef NO_FCNTL fcntl(sockfd, F_SETFL, O_NONBLOCK); diff --git a/platform/windows/packet_peer_udp_winsock.cpp b/platform/windows/packet_peer_udp_winsock.cpp index b4cd60979c7..6c661db0c74 100644 --- a/platform/windows/packet_peer_udp_winsock.cpp +++ b/platform/windows/packet_peer_udp_winsock.cpp @@ -239,11 +239,7 @@ int PacketPeerUDPWinsock::_get_socket(IP_Address::AddrType p_type) { if (sockfd != -1) return sockfd; - int family = p_type == IP_Address::TYPE_IPV6 ? AF_INET6 : AF_INET; - - sockfd = socket(family, SOCK_DGRAM, IPPROTO_UDP); - ERR_FAIL_COND_V( sockfd == -1, -1 ); - //fcntl(sockfd, F_SETFL, O_NONBLOCK); + sockfd = _socket_create(p_type, SOCK_DGRAM, IPPROTO_UDP); return sockfd; } diff --git a/platform/windows/stream_peer_winsock.cpp b/platform/windows/stream_peer_winsock.cpp index b511e38ecbf..44c17c73e5e 100644 --- a/platform/windows/stream_peer_winsock.cpp +++ b/platform/windows/stream_peer_winsock.cpp @@ -296,8 +296,8 @@ Error StreamPeerWinsock::connect(const IP_Address& p_host, uint16_t p_port) { ERR_FAIL_COND_V( p_host.type == IP_Address::TYPE_NONE, ERR_INVALID_PARAMETER); - int family = p_host.type == IP_Address::TYPE_IPV6 ? AF_INET6 : AF_INET; - if ((sockfd = socket(family, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) { + sockfd = _socket_create(p_host.type, SOCK_STREAM, IPPROTO_TCP); + if (sockfd == INVALID_SOCKET) { ERR_PRINT("Socket creation failed!"); disconnect(); //perror("socket"); diff --git a/platform/windows/tcp_server_winsock.cpp b/platform/windows/tcp_server_winsock.cpp index 1aedc52bb92..a4eb3a769ce 100644 --- a/platform/windows/tcp_server_winsock.cpp +++ b/platform/windows/tcp_server_winsock.cpp @@ -66,7 +66,7 @@ void TCPServerWinsock::cleanup() { Error TCPServerWinsock::listen(uint16_t p_port, IP_Address::AddrType p_type,const List *p_accepted_hosts) { int sockfd; - sockfd = socket(AF_INET, SOCK_STREAM, 0); + sockfd = _socket_create(p_type, SOCK_STREAM, IPPROTO_TCP); ERR_FAIL_COND_V(sockfd == INVALID_SOCKET, FAILED); unsigned long par = 1; From 2f1c8592721eca1e6a71f419208f99977ceab2d9 Mon Sep 17 00:00:00 2001 From: Fabio Alessandrelli Date: Fri, 28 Oct 2016 20:35:31 +0200 Subject: [PATCH 027/223] Add optional IP type param in TCP/UDP connect/set_send_address --- core/io/packet_peer_udp.cpp | 6 +++--- core/io/packet_peer_udp.h | 2 +- core/io/stream_peer_tcp.cpp | 19 ++++++++++++++++++- core/io/stream_peer_tcp.h | 2 ++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/core/io/packet_peer_udp.cpp b/core/io/packet_peer_udp.cpp index 018dc77d916..3eff128d258 100644 --- a/core/io/packet_peer_udp.cpp +++ b/core/io/packet_peer_udp.cpp @@ -38,13 +38,13 @@ String PacketPeerUDP::_get_packet_ip() const { return get_packet_address(); } -Error PacketPeerUDP::_set_send_address(const String& p_address,int p_port) { +Error PacketPeerUDP::_set_send_address(const String& p_address,int p_port,IP_Address::AddrType p_type) { IP_Address ip; if (p_address.is_valid_ip_address()) { ip=p_address; } else { - ip=IP::get_singleton()->resolve_hostname(p_address); + ip=IP::get_singleton()->resolve_hostname(p_address, p_type); if (ip==IP_Address()) return ERR_CANT_RESOLVE; } @@ -62,7 +62,7 @@ void PacketPeerUDP::_bind_methods() { ObjectTypeDB::bind_method(_MD("get_packet_ip"),&PacketPeerUDP::_get_packet_ip); //ObjectTypeDB::bind_method(_MD("get_packet_address"),&PacketPeerUDP::_get_packet_address); ObjectTypeDB::bind_method(_MD("get_packet_port"),&PacketPeerUDP::get_packet_port); - ObjectTypeDB::bind_method(_MD("set_send_address","host","port"),&PacketPeerUDP::_set_send_address); + ObjectTypeDB::bind_method(_MD("set_send_address","host","port","ip_type"),&PacketPeerUDP::_set_send_address,DEFVAL(IP_Address::TYPE_ANY)); } diff --git a/core/io/packet_peer_udp.h b/core/io/packet_peer_udp.h index 6c79104e43c..37e700cebdf 100644 --- a/core/io/packet_peer_udp.h +++ b/core/io/packet_peer_udp.h @@ -42,7 +42,7 @@ protected: String _get_packet_ip() const; - virtual Error _set_send_address(const String& p_address,int p_port); + virtual Error _set_send_address(const String& p_address,int p_port, IP_Address::AddrType p_address_type = IP_Address::TYPE_ANY); public: diff --git a/core/io/stream_peer_tcp.cpp b/core/io/stream_peer_tcp.cpp index fbb0c69cb78..528f2e8cab4 100644 --- a/core/io/stream_peer_tcp.cpp +++ b/core/io/stream_peer_tcp.cpp @@ -30,9 +30,26 @@ StreamPeerTCP* (*StreamPeerTCP::_create)()=NULL; +VARIANT_ENUM_CAST(IP_Address::AddrType); + +Error StreamPeerTCP::_connect(const String& p_address,int p_port,IP_Address::AddrType p_type) { + + IP_Address ip; + if (p_address.is_valid_ip_address()) { + ip=p_address; + } else { + ip=IP::get_singleton()->resolve_hostname(p_address, p_type); + if (ip==IP_Address()) + return ERR_CANT_RESOLVE; + } + + connect(ip,p_port); + return OK; +} + void StreamPeerTCP::_bind_methods() { - ObjectTypeDB::bind_method(_MD("connect","host","port"),&StreamPeerTCP::connect); + ObjectTypeDB::bind_method(_MD("connect","host","port","ip_type"),&StreamPeerTCP::_connect,DEFVAL(IP_Address::TYPE_ANY)); ObjectTypeDB::bind_method(_MD("is_connected"),&StreamPeerTCP::is_connected); ObjectTypeDB::bind_method(_MD("get_status"),&StreamPeerTCP::get_status); ObjectTypeDB::bind_method(_MD("get_connected_host"),&StreamPeerTCP::get_connected_host); diff --git a/core/io/stream_peer_tcp.h b/core/io/stream_peer_tcp.h index 4c58e7e149b..a151fffad87 100644 --- a/core/io/stream_peer_tcp.h +++ b/core/io/stream_peer_tcp.h @@ -32,6 +32,7 @@ #include "stream_peer.h" #include "ip_address.h" +#include "io/ip.h" class StreamPeerTCP : public StreamPeer { @@ -50,6 +51,7 @@ public: protected: + virtual Error _connect(const String& p_address, int p_port, IP_Address::AddrType p_type = IP_Address::TYPE_ANY); static StreamPeerTCP* (*_create)(); static void _bind_methods(); From eb27e993f0f2fb3de48b7b8aa01c74cc1635a178 Mon Sep 17 00:00:00 2001 From: Fabio Alessandrelli Date: Fri, 28 Oct 2016 23:11:53 +0200 Subject: [PATCH 028/223] TCP/UDP Listen sockets can now be set to IPv6 only --- drivers/unix/ip_unix.cpp | 9 +++++++++ drivers/unix/packet_peer_udp_posix.cpp | 15 ++++++++++++--- drivers/unix/socket_helpers.h | 4 ++-- drivers/unix/tcp_server_posix.cpp | 8 ++++++++ platform/windows/packet_peer_udp_winsock.cpp | 14 +++++++++++--- platform/windows/tcp_server_winsock.cpp | 8 ++++++++ 6 files changed, 50 insertions(+), 8 deletions(-) diff --git a/drivers/unix/ip_unix.cpp b/drivers/unix/ip_unix.cpp index d6d1be3395b..e3d32618eec 100644 --- a/drivers/unix/ip_unix.cpp +++ b/drivers/unix/ip_unix.cpp @@ -33,6 +33,13 @@ #include #ifdef WINDOWS_ENABLED + // Workaround mingw missing flags! + #ifndef AI_ADDRCONFIG + #define AI_ADDRCONFIG 0x00000400 + #endif + #ifndef AI_V4MAPPED + #define AI_V4MAPPED 0x00000800 + #endif #ifdef WINRT_ENABLED #include #include @@ -90,8 +97,10 @@ IP_Address IP_Unix::_resolve_hostname(const String& p_hostname, IP_Address::Addr hints.ai_family = AF_INET; } else if (p_type == IP_Address::TYPE_IPV6) { hints.ai_family = AF_INET6; + hints.ai_flags = 0; } else { hints.ai_family = AF_UNSPEC; + hints.ai_flags = (AI_V4MAPPED | AI_ADDRCONFIG); }; int s = getaddrinfo(p_hostname.utf8().get_data(), NULL, &hints, &result); diff --git a/drivers/unix/packet_peer_udp_posix.cpp b/drivers/unix/packet_peer_udp_posix.cpp index 01c26420a84..ed295f04599 100644 --- a/drivers/unix/packet_peer_udp_posix.cpp +++ b/drivers/unix/packet_peer_udp_posix.cpp @@ -119,15 +119,24 @@ int PacketPeerUDPPosix::get_max_packet_size() const{ return 512; // uhm maybe not } -Error PacketPeerUDPPosix::listen(int p_port, IP_Address::AddrType p_address_type, int p_recv_buffer_size) { +Error PacketPeerUDPPosix::listen(int p_port, IP_Address::AddrType p_type, int p_recv_buffer_size) { close(); - int sock = _get_socket(p_address_type); + int sock = _get_socket(p_type); + if (sock == -1 ) return ERR_CANT_CREATE; + if(p_type == IP_Address::TYPE_IPV6) { + // Use IPv6 only socket + int yes = 1; + if(setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&yes, sizeof(yes)) != 0) { + WARN_PRINT("Unable to unset IPv4 address mapping over IPv6"); + } + } + sockaddr_storage addr = {0}; - size_t addr_size = _set_listen_sockaddr(&addr, p_port, p_address_type, NULL); + size_t addr_size = _set_listen_sockaddr(&addr, p_port, p_type, NULL); if (bind(sock, (struct sockaddr*)&addr, addr_size) == -1 ) { close(); diff --git a/drivers/unix/socket_helpers.h b/drivers/unix/socket_helpers.h index b432a028b3b..80e013ad68a 100644 --- a/drivers/unix/socket_helpers.h +++ b/drivers/unix/socket_helpers.h @@ -56,8 +56,8 @@ static int _socket_create(IP_Address::AddrType p_type, int type, int protocol) { if(family == AF_INET6) { // Ensure IPv4 over IPv6 is enabled - int v6_only = 0; - if(setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&v6_only, sizeof(v6_only)) != 0) { + int no = 0; + if(setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&no, sizeof(no)) != 0) { WARN_PRINT("Unable to set IPv4 address mapping over IPv6"); } } diff --git a/drivers/unix/tcp_server_posix.cpp b/drivers/unix/tcp_server_posix.cpp index 32fda618b24..7028c1a1074 100644 --- a/drivers/unix/tcp_server_posix.cpp +++ b/drivers/unix/tcp_server_posix.cpp @@ -74,6 +74,14 @@ Error TCPServerPosix::listen(uint16_t p_port, IP_Address::AddrType p_type, const sockfd = _socket_create(p_type, SOCK_STREAM, IPPROTO_TCP); ERR_FAIL_COND_V(sockfd == -1, FAILED); + + if(p_type == IP_Address::TYPE_IPV6) { + // Use IPv6 only socket + int yes = 1; + if(setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&yes, sizeof(yes)) != 0) { + WARN_PRINT("Unable to unset IPv4 address mapping over IPv6"); + } + } #ifndef NO_FCNTL fcntl(sockfd, F_SETFL, O_NONBLOCK); #else diff --git a/platform/windows/packet_peer_udp_winsock.cpp b/platform/windows/packet_peer_udp_winsock.cpp index 6c661db0c74..73c6116aa3b 100644 --- a/platform/windows/packet_peer_udp_winsock.cpp +++ b/platform/windows/packet_peer_udp_winsock.cpp @@ -112,15 +112,23 @@ void PacketPeerUDPWinsock::_set_blocking(bool p_blocking) { }; } -Error PacketPeerUDPWinsock::listen(int p_port, IP_Address::AddrType p_address_type, int p_recv_buffer_size) { +Error PacketPeerUDPWinsock::listen(int p_port, IP_Address::AddrType p_type, int p_recv_buffer_size) { close(); - int sock = _get_socket(p_address_type); + int sock = _get_socket(p_type); if (sock == -1 ) return ERR_CANT_CREATE; + if(p_type == IP_Address::TYPE_IPV6) { + // Use IPv6 only socket + int yes = 1; + if(setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&yes, sizeof(yes)) != 0) { + WARN_PRINT("Unable to unset IPv4 address mapping over IPv6"); + } + } + struct sockaddr_storage addr = {0}; - size_t addr_size = _set_listen_sockaddr(&addr, p_port, p_address_type, NULL); + size_t addr_size = _set_listen_sockaddr(&addr, p_port, p_type, NULL); if (bind(sock, (struct sockaddr*)&addr, addr_size) == -1 ) { close(); diff --git a/platform/windows/tcp_server_winsock.cpp b/platform/windows/tcp_server_winsock.cpp index a4eb3a769ce..38a6b041000 100644 --- a/platform/windows/tcp_server_winsock.cpp +++ b/platform/windows/tcp_server_winsock.cpp @@ -69,6 +69,14 @@ Error TCPServerWinsock::listen(uint16_t p_port, IP_Address::AddrType p_type,cons sockfd = _socket_create(p_type, SOCK_STREAM, IPPROTO_TCP); ERR_FAIL_COND_V(sockfd == INVALID_SOCKET, FAILED); + if(p_type == IP_Address::TYPE_IPV6) { + // Use IPv6 only socket + int yes = 1; + if(setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&yes, sizeof(yes)) != 0) { + WARN_PRINT("Unable to unset IPv4 address mapping over IPv6"); + } + } + unsigned long par = 1; if (ioctlsocket(sockfd, FIONBIO, &par)) { perror("setting non-block mode"); From 7eef15b73460062e4558857969919313e461f1e4 Mon Sep 17 00:00:00 2001 From: Fabio Alessandrelli Date: Sat, 29 Oct 2016 03:30:59 +0200 Subject: [PATCH 029/223] Set proper ip_type default for listen() and resolve_hostname() --- core/io/ip.cpp | 4 ++-- core/io/packet_peer_udp.cpp | 2 +- core/io/tcp_server.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/io/ip.cpp b/core/io/ip.cpp index 4ee1b281c45..f0f273af1ad 100644 --- a/core/io/ip.cpp +++ b/core/io/ip.cpp @@ -212,8 +212,8 @@ Array IP::_get_local_addresses() const { void IP::_bind_methods() { - ObjectTypeDB::bind_method(_MD("resolve_hostname","host"),&IP::resolve_hostname); - ObjectTypeDB::bind_method(_MD("resolve_hostname_queue_item","host"),&IP::resolve_hostname_queue_item); + ObjectTypeDB::bind_method(_MD("resolve_hostname","host","ip_type"),&IP::resolve_hostname,DEFVAL(IP_Address::TYPE_ANY)); + ObjectTypeDB::bind_method(_MD("resolve_hostname_queue_item","host","ip_type"),&IP::resolve_hostname_queue_item,DEFVAL(IP_Address::TYPE_ANY)); ObjectTypeDB::bind_method(_MD("get_resolve_item_status","id"),&IP::get_resolve_item_status); ObjectTypeDB::bind_method(_MD("get_resolve_item_address","id"),&IP::get_resolve_item_address); ObjectTypeDB::bind_method(_MD("erase_resolve_item","id"),&IP::erase_resolve_item); diff --git a/core/io/packet_peer_udp.cpp b/core/io/packet_peer_udp.cpp index 3eff128d258..925d00a84a7 100644 --- a/core/io/packet_peer_udp.cpp +++ b/core/io/packet_peer_udp.cpp @@ -55,7 +55,7 @@ Error PacketPeerUDP::_set_send_address(const String& p_address,int p_port,IP_Add void PacketPeerUDP::_bind_methods() { - ObjectTypeDB::bind_method(_MD("listen:Error","port","recv_buf_size"),&PacketPeerUDP::listen,DEFVAL(65536)); + ObjectTypeDB::bind_method(_MD("listen:Error","port","ip_type", "recv_buf_size"),&PacketPeerUDP::listen,DEFVAL(IP_Address::TYPE_ANY),DEFVAL(65536)); ObjectTypeDB::bind_method(_MD("close"),&PacketPeerUDP::close); ObjectTypeDB::bind_method(_MD("wait:Error"),&PacketPeerUDP::wait); ObjectTypeDB::bind_method(_MD("is_listening"),&PacketPeerUDP::is_listening); diff --git a/core/io/tcp_server.cpp b/core/io/tcp_server.cpp index 53d6e900f38..894cccf691f 100644 --- a/core/io/tcp_server.cpp +++ b/core/io/tcp_server.cpp @@ -58,7 +58,7 @@ Error TCP_Server::_listen(uint16_t p_port, IP_Address::AddrType p_type, DVector< void TCP_Server::_bind_methods() { - ObjectTypeDB::bind_method(_MD("listen","port","accepted_hosts"),&TCP_Server::_listen,DEFVAL(IP_Address::TYPE_IPV4), DEFVAL(DVector())); + ObjectTypeDB::bind_method(_MD("listen","port","ip_type", "accepted_hosts"),&TCP_Server::_listen,DEFVAL(IP_Address::TYPE_ANY),DEFVAL(DVector())); ObjectTypeDB::bind_method(_MD("is_connection_available"),&TCP_Server::is_connection_available); ObjectTypeDB::bind_method(_MD("take_connection"),&TCP_Server::take_connection); ObjectTypeDB::bind_method(_MD("stop"),&TCP_Server::stop); From 7f42da03300d515c3fb991ba311783619ac90ea2 Mon Sep 17 00:00:00 2001 From: Fabio Alessandrelli Date: Sat, 29 Oct 2016 03:53:24 +0200 Subject: [PATCH 030/223] Update docs to IPv6 --- doc/base/classes.xml | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/doc/base/classes.xml b/doc/base/classes.xml index 977b44763ed..92b208ff509 100644 --- a/doc/base/classes.xml +++ b/doc/base/classes.xml @@ -16068,8 +16068,10 @@ + + - Resolve a given hostname, blocking. Resolved hostname is returned as an IP. + Resolve a given hostname, blocking. Resolved hostname is returned as an IPv4 or IPv6 depending on "ip_type". @@ -16077,8 +16079,10 @@ + + - Create a queue item for resolving a given hostname. The queue ID is returned, or RESOLVER_INVALID_ID on error. + Create a queue item for resolving a given hostname to an IPv4 or IPv6 depending on "ip_type". The queue ID is returned, or RESOLVER_INVALID_ID on error. @@ -25357,10 +25361,15 @@ - + + + - Make this [PacketPeerUDP] listen on the "port" using a buffer size "recv_buf_size". Listens on all available adresses. + Make this [PacketPeerUDP] listen on the "port" using protocol "ip_type" and a buffer size "recv_buf_size". Listens on all available adresses. + IP_TYPE_IPV4 = IPv4 only + IP_TYPE_IPV6 = IPv6 only + IP_TYPE_ANY = Dual stack (supports both IPv6 and IPv4 connections). @@ -25370,8 +25379,10 @@ + + - Set the destination address and port for sending packets and variables, a hostname will be resolved if valid. + Set the destination address and port for sending packets and variables, a hostname will be resolved using "ip_type" (v4/v6/any) if valid. @@ -39181,8 +39192,10 @@
+ + - Connect to the specified IP:port pair. Returns [OK] on success or [FAILED] on failure. + Connect to the specified host:port pair. A hostname will be resolved using "ip_type" (v4/v6/any) if valid. Returns [OK] on success or [FAILED] on failure. @@ -40505,10 +40518,15 @@ - + + + - Listen on a port, alternatively give a white-list of accepted hosts. + Listen on a port using protocol "ip_type", alternatively give a white-list of accepted hosts. + IP_TYPE_IPV4 = IPv4 only + IP_TYPE_IPV6 = IPv6 only + IP_TYPE_ANY = Dual stack (supports both IPv6 and IPv4 connections). From 3f15a65307c0a3c2c4769af964356996ed367c35 Mon Sep 17 00:00:00 2001 From: volzhs Date: Mon, 31 Oct 2016 03:40:52 +0900 Subject: [PATCH 031/223] Revert "Place child control under label in AcceptDialog." This reverts commit 3ef272290460670b82621727ba2e876fa0a5a01e. --- scene/gui/dialogs.cpp | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/scene/gui/dialogs.cpp b/scene/gui/dialogs.cpp index b8c5f227c60..15ff334c929 100644 --- a/scene/gui/dialogs.cpp +++ b/scene/gui/dialogs.cpp @@ -232,8 +232,6 @@ String AcceptDialog::get_text() const { void AcceptDialog::set_text(String p_text) { label->set_text(p_text); - minimum_size_changed(); - _update_child_rect(); } void AcceptDialog::set_hide_on_ok(bool p_hide) { @@ -255,51 +253,38 @@ void AcceptDialog::register_text_enter(Node *p_line_edit) { void AcceptDialog::_update_child_rect() { - const int margin = get_constant("margin","Dialogs"); - const Size2 size = get_size(); + int margin = get_constant("margin","Dialogs"); + Size2 size = get_size(); Size2 hminsize = hbc->get_combined_minimum_size(); - const Size2 max_csize( - size.width - margin * 2, - size.height - margin * 3 - hminsize.height); - hminsize.width = max_csize.width; - - Point2 cpos(margin, margin); - Size2 csize = label->get_combined_minimum_size(); - if(label->get_text().empty()) - csize.y = 0; - csize.x = MIN(csize.width, max_csize.width); - csize.y = MIN(csize.height, max_csize.height); + Vector2 cpos(margin,margin); + Vector2 csize(size.x-margin*2,size.y-margin*3-hminsize.y); label->set_pos(cpos); label->set_size(csize); - if(child) { - const float child_y_offset = csize.height + (csize.height > 0 ? margin : 0); - cpos.y += child_y_offset; - csize = max_csize; - csize.height -= child_y_offset; + if (child) { child->set_pos(cpos); child->set_size(csize); } - cpos.y += csize.height + margin; + cpos.y+=csize.y+margin; + csize.y=hminsize.y; hbc->set_pos(cpos); - hbc->set_size(hminsize); + hbc->set_size(csize); + } Size2 AcceptDialog::get_minimum_size() const { int margin = get_constant("margin","Dialogs"); Size2 minsize = label->get_combined_minimum_size(); - if(label->get_text().empty()) - minsize.y = 0; if (child) { Size2 cminsize = child->get_combined_minimum_size(); minsize.x=MAX(cminsize.x,minsize.x); - minsize.y += cminsize.y + (minsize.y > 0 ? margin : 0); + minsize.y=MAX(cminsize.y,minsize.y); } Size2 hminsize = hbc->get_combined_minimum_size(); From 8d5644c4b217636994440f698b09ef395e5dfa55 Mon Sep 17 00:00:00 2001 From: volzhs Date: Mon, 31 Oct 2016 03:42:30 +0900 Subject: [PATCH 032/223] Fix Accept/ConfirmationDialog UI broken --- scene/gui/dialogs.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scene/gui/dialogs.cpp b/scene/gui/dialogs.cpp index 15ff334c929..49782bcb75e 100644 --- a/scene/gui/dialogs.cpp +++ b/scene/gui/dialogs.cpp @@ -232,6 +232,8 @@ String AcceptDialog::get_text() const { void AcceptDialog::set_text(String p_text) { label->set_text(p_text); + minimum_size_changed(); + _update_child_rect(); } void AcceptDialog::set_hide_on_ok(bool p_hide) { @@ -252,15 +254,16 @@ void AcceptDialog::register_text_enter(Node *p_line_edit) { } void AcceptDialog::_update_child_rect() { - + Size2 label_size=label->get_minimum_size(); + if (label->get_text().empty()) { + label_size.height = 0; + } int margin = get_constant("margin","Dialogs"); Size2 size = get_size(); Size2 hminsize = hbc->get_combined_minimum_size(); - Vector2 cpos(margin,margin); - Vector2 csize(size.x-margin*2,size.y-margin*3-hminsize.y); - label->set_pos(cpos); - label->set_size(csize); + Vector2 cpos(margin,margin+label_size.height); + Vector2 csize(size.x-margin*2,size.y-margin*3-hminsize.y-label_size.height); if (child) { From 707185d9d8a273c1bf2a70dcba707045295427a9 Mon Sep 17 00:00:00 2001 From: volzhs Date: Mon, 31 Oct 2016 06:20:54 +0900 Subject: [PATCH 033/223] Fix p_index out of size error when closing script --- tools/editor/plugins/script_editor_plugin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/editor/plugins/script_editor_plugin.cpp b/tools/editor/plugins/script_editor_plugin.cpp index b6ce3e89eb6..e7170a23f39 100644 --- a/tools/editor/plugins/script_editor_plugin.cpp +++ b/tools/editor/plugins/script_editor_plugin.cpp @@ -945,7 +945,7 @@ void ScriptEditor::_menu_option(int p_option) { } } - EditorHelp *help = tab_container->get_child(selected)->cast_to(); + EditorHelp *help = tab_container->get_current_tab_control()->cast_to(); if (help) { switch(p_option) { From bdc7ca84cac727f3f94663f23e1229450230bd2e Mon Sep 17 00:00:00 2001 From: Fabio Alessandrelli Date: Sun, 30 Oct 2016 22:58:15 +0100 Subject: [PATCH 034/223] Define IPV6_V6ONLY flag if not defined on windows (old mingw versions) --- drivers/unix/socket_helpers.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/unix/socket_helpers.h b/drivers/unix/socket_helpers.h index 80e013ad68a..9693911acd6 100644 --- a/drivers/unix/socket_helpers.h +++ b/drivers/unix/socket_helpers.h @@ -3,6 +3,13 @@ #include +#ifdef WINDOWS_ENABLED + // Workaround mingw missing flags! + #ifndef IPV6_V6ONLY + #define IPV6_V6ONLY 27 + #endif +#endif + // helpers for sockaddr -> IP_Address and back, should work for posix and winsock. All implementations should use this static size_t _set_sockaddr(struct sockaddr_storage* p_addr, const IP_Address& p_ip, int p_port) { From d6f2862429399844ebdd16b61da3a3c9d14fba36 Mon Sep 17 00:00:00 2001 From: eska Date: Sun, 30 Oct 2016 23:10:17 +0100 Subject: [PATCH 035/223] Add option 'wasm' to compile to WebAssembly in web export WebAssembly is still experimental, so disabled by default. The HTML shell file now uses $GODOT_BASE, a placeholder for the base filename, instead of $GODOT_JS, $GODOT_MEM and $GODOT_FS. --- platform/javascript/SCsub | 18 +++++++++++----- platform/javascript/detect.py | 12 ++++++++--- platform/javascript/export/export.cpp | 11 +++++----- .../javascript/godot_shell.html | 21 ++----------------- 4 files changed, 30 insertions(+), 32 deletions(-) rename tools/dist/html_fs/godot.html => platform/javascript/godot_shell.html (93%) diff --git a/platform/javascript/SCsub b/platform/javascript/SCsub index 22af436470f..59e5aa175d2 100644 --- a/platform/javascript/SCsub +++ b/platform/javascript/SCsub @@ -10,8 +10,6 @@ javascript_files = [ "javascript_eval.cpp" ] -#obj = env.SharedObject('godot_javascript.cpp') - env_javascript = env.Clone() if env['target'] == "profile": env_javascript.Append(CPPFLAGS=['-DPROFILER_ENABLED']) @@ -21,9 +19,19 @@ for x in javascript_files: javascript_objects.append( env_javascript.Object( x ) ) env.Append(LINKFLAGS=["-s","EXPORTED_FUNCTIONS=\"['_main','_audio_server_mix_function','_main_after_fs_sync']\""]) +env.Append(LINKFLAGS=["--shell-file",'"platform/javascript/godot_shell.html"']) -prog = None +build = env.Program('#bin/godot',javascript_objects,PROGSUFFIX=env["PROGSUFFIX"]+".html") -#env_javascript.SharedLibrary("#platform/javascript/libgodot_javascript.so",[javascript_objects]) +def make_html_shell(target, source, env): + html_path = target[0].rstr() + assert html_path[:4] == 'bin/' + assert html_path[-5:] == '.html' + basename = html_path[4:-5] + with open(html_path, 'r+') as html_file: + fixed_html = html_file.read().replace('.html.mem', '.mem').replace(basename, '$GODOT_BASE') + html_file.seek(0) + html_file.truncate() + html_file.write(fixed_html) -env.Program('#bin/godot',javascript_objects,PROGSUFFIX=env["PROGSUFFIX"]+".html") +env.AddPostAction(build, Action(make_html_shell, "Creating HTML shell file")) diff --git a/platform/javascript/detect.py b/platform/javascript/detect.py index b1277bfc05c..b6a6a453b33 100644 --- a/platform/javascript/detect.py +++ b/platform/javascript/detect.py @@ -18,6 +18,7 @@ def can_build(): def get_opts(): return [ + ['wasm','Compile to WebAssembly','no'], ['javascript_eval','Enable JavaScript eval interface','yes'], ] @@ -42,7 +43,6 @@ def configure(env): em_path=os.environ["EMSCRIPTEN_ROOT"] env['ENV']['PATH'] = em_path+":"+env['ENV']['PATH'] - env['CC'] = em_path+'/emcc' env['CXX'] = em_path+'/emcc' #env['AR'] = em_path+"/emar" @@ -77,14 +77,20 @@ def configure(env): env.Append(CPPFLAGS=['-DJAVASCRIPT_ENABLED', '-DUNIX_ENABLED', '-DPTHREAD_NO_RENAME', '-DNO_FCNTL','-DMPC_FIXED_POINT','-DTYPED_METHOD_BIND','-DNO_THREADS']) env.Append(CPPFLAGS=['-DGLES2_ENABLED']) env.Append(CPPFLAGS=['-DGLES_NO_CLIENT_ARRAYS']) - env.Append(CPPFLAGS=['-s','ASM_JS=1']) env.Append(CPPFLAGS=['-s','FULL_ES2=1']) # env.Append(CPPFLAGS=['-DANDROID_ENABLED', '-DUNIX_ENABLED','-DMPC_FIXED_POINT']) + if env['wasm'] == 'yes': + env.Append(LINKFLAGS=['-s','BINARYEN=1']) + env.Append(LINKFLAGS=['-s','\'BINARYEN_METHOD="native-wasm"\'']) + env["PROGSUFFIX"]+=".webassembly" + else: + env.Append(CPPFLAGS=['-s','ASM_JS=1']) + env.Append(LINKFLAGS=['-s','ASM_JS=1']) + if env['javascript_eval'] == 'yes': env.Append(CPPFLAGS=['-DJAVASCRIPT_EVAL_ENABLED']) - env.Append(LINKFLAGS=['-s','ASM_JS=1']) env.Append(LINKFLAGS=['-O2']) #env.Append(LINKFLAGS=['-g4']) diff --git a/platform/javascript/export/export.cpp b/platform/javascript/export/export.cpp index f934916aa26..721aef3e505 100644 --- a/platform/javascript/export/export.cpp +++ b/platform/javascript/export/export.cpp @@ -180,9 +180,7 @@ void EditorExportPlatformJavaScript::_fix_html(Vector& p_html, const St String current_line = lines[i]; current_line = current_line.replace("$GODOT_TMEM",itos((1<<(max_memory+5))*1024*1024)); - current_line = current_line.replace("$GODOT_FS",p_name+"fs.js"); - current_line = current_line.replace("$GODOT_MEM",p_name+".mem"); - current_line = current_line.replace("$GODOT_JS",p_name+".js"); + current_line = current_line.replace("$GODOT_BASE",p_name); current_line = current_line.replace("$GODOT_CANVAS_WIDTH",Globals::get_singleton()->get("display/width")); current_line = current_line.replace("$GODOT_CANVAS_HEIGHT",Globals::get_singleton()->get("display/height")); current_line = current_line.replace("$GODOT_HEAD_TITLE",!html_title.empty()?html_title:(String) Globals::get_singleton()->get("application/name")); @@ -319,16 +317,19 @@ Error EditorExportPlatformJavaScript::export_project(const String& p_path, bool } if (file=="godot.js") { - //_fix_godot(data); file=p_path.get_file().basename()+".js"; } if (file=="godot.mem") { - //_fix_godot(data); file=p_path.get_file().basename()+".mem"; } + if (file=="godot.wasm") { + + file=p_path.get_file().basename()+".wasm"; + } + String dst = p_path.get_base_dir().plus_file(file); FileAccess *f=FileAccess::open(dst,FileAccess::WRITE); if (!f) { diff --git a/tools/dist/html_fs/godot.html b/platform/javascript/godot_shell.html similarity index 93% rename from tools/dist/html_fs/godot.html rename to platform/javascript/godot_shell.html index c354826e1f0..3170d2bb9ed 100644 --- a/tools/dist/html_fs/godot.html +++ b/platform/javascript/godot_shell.html @@ -351,24 +351,7 @@ }; }; //]]> - - + + {{{ SCRIPT }}} From a7d492eb53489083181682839c6d7f83a888ad46 Mon Sep 17 00:00:00 2001 From: volzhs Date: Mon, 31 Oct 2016 11:50:34 +0900 Subject: [PATCH 036/223] Fix memory leak with drag & drop on 2D viewport --- tools/editor/plugins/canvas_item_editor_plugin.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/editor/plugins/canvas_item_editor_plugin.cpp b/tools/editor/plugins/canvas_item_editor_plugin.cpp index ac39e0687c4..af9fd69ae7c 100644 --- a/tools/editor/plugins/canvas_item_editor_plugin.cpp +++ b/tools/editor/plugins/canvas_item_editor_plugin.cpp @@ -3797,6 +3797,7 @@ bool CanvasItemEditorViewport::_create_instance(Node* parent, String& path, cons if (editor->get_edited_scene()->get_filename()!="") { // cyclical instancing if (_cyclical_dependency_exists(editor->get_edited_scene()->get_filename(), instanced_scene)) { + memdelete(instanced_scene); return false; } } From 6b2a27bbe5fa112365fc88b9b4678a61293bcb53 Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Mon, 31 Oct 2016 08:47:46 -0300 Subject: [PATCH 037/223] shadow atlas allocation (work in progress) --- drivers/gles3/rasterizer_scene_gles3.cpp | 433 ++++++++++++++++++++++- drivers/gles3/rasterizer_scene_gles3.h | 62 +++- drivers/gles3/shadow_atlas_gles3.cpp | 2 + drivers/gles3/shadow_atlas_gles3.h | 8 + servers/visual/rasterizer.h | 4 + servers/visual/visual_server_scene.cpp | 4 + 6 files changed, 501 insertions(+), 12 deletions(-) create mode 100644 drivers/gles3/shadow_atlas_gles3.cpp create mode 100644 drivers/gles3/shadow_atlas_gles3.h diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index f7baf1a30b4..93dc01d4738 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -1,6 +1,6 @@ #include "rasterizer_scene_gles3.h" #include "globals.h" - +#include "os/os.h" @@ -55,7 +55,351 @@ static _FORCE_INLINE_ void store_camera(const CameraMatrix& p_mtx, float* p_arra } } +/* SHADOW ATLAS API */ +RID RasterizerSceneGLES3::shadow_atlas_create() { + + ShadowAtlas *shadow_atlas = memnew( ShadowAtlas ); + shadow_atlas->fbo=0; + shadow_atlas->depth=0; + shadow_atlas->size=0; + shadow_atlas->smallest_subdiv=0; + + for(int i=0;i<4;i++) { + shadow_atlas->size_order[i]=i; + } + + + return shadow_atlas_owner.make_rid(shadow_atlas); +} + +void RasterizerSceneGLES3::shadow_atlas_set_size(RID p_atlas,int p_size){ + + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_atlas); + ERR_FAIL_COND(!shadow_atlas); + ERR_FAIL_COND(p_size<0); + if (p_size==shadow_atlas->size) + return; + + if (shadow_atlas->fbo) { + glDeleteTextures(1,&shadow_atlas->depth); + glDeleteFramebuffers(1,&shadow_atlas->fbo); + + shadow_atlas->depth=0; + shadow_atlas->fbo=0; + } + for(int i=0;i<4;i++) { + //clear subdivisions + shadow_atlas->quadrants[i].shadows.resize(0); + shadow_atlas->quadrants[i].shadows.resize( 1<quadrants[i].subdivision ); + } + + //erase shadow atlas reference from lights + for (Map::Element *E=shadow_atlas->shadow_owners.front();E;E=E->next()) { + LightInstance *li = light_instance_owner.getornull(E->key()); + ERR_CONTINUE(!li); + li->shadow_atlases.erase(p_atlas); + } + + //clear owners + shadow_atlas->shadow_owners.clear(); + + shadow_atlas->size=nearest_power_of_2(p_size); + + if (shadow_atlas->size) { + glGenFramebuffers(1, &shadow_atlas->fbo); + glBindFramebuffer(GL_FRAMEBUFFER, shadow_atlas->fbo); + + // Create a texture for storing the depth + glActiveTexture(GL_TEXTURE0); + glGenTextures(1, &shadow_atlas->depth); + glBindTexture(GL_TEXTURE_2D, shadow_atlas->depth); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, shadow_atlas->size, shadow_atlas->size, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL); + + //interpola nearest (though nvidia can improve this) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + // Remove artifact on the edges of the shadowmap + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + // We'll use a depth texture to store the depths in the shadow map + // Attach the depth texture to FBO depth attachment point + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_TEXTURE_2D, shadow_atlas->depth, 0); + } +} + + +void RasterizerSceneGLES3::shadow_atlas_set_quadrant_subdivision(RID p_atlas,int p_quadrant,int p_subdivision){ + + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_atlas); + ERR_FAIL_COND(!shadow_atlas); + ERR_FAIL_INDEX(p_quadrant,4); + ERR_FAIL_INDEX(p_subdivision,16384); + + + uint32_t subdiv = nearest_power_of_2(p_subdivision); + if (subdiv&0xaaaaaaaa) { //sqrt(subdiv) must be integer + subdiv<<=1; + } + + subdiv=int(Math::sqrt(subdiv)); + + //obtain the number that will be x*x + + if (shadow_atlas->quadrants[p_quadrant].subdivision==subdiv) + return; + + //erase all data from quadrant + for(int i=0;iquadrants[p_quadrant].shadows.size();i++) { + + if (shadow_atlas->quadrants[p_quadrant].shadows[i].owner.is_valid()) { + shadow_atlas->shadow_owners.erase(shadow_atlas->quadrants[p_quadrant].shadows[i].owner); + LightInstance *li = light_instance_owner.getornull(shadow_atlas->quadrants[p_quadrant].shadows[i].owner); + ERR_CONTINUE(!li); + li->shadow_atlases.erase(p_atlas); + } + } + + shadow_atlas->quadrants[p_quadrant].shadows.resize(0); + shadow_atlas->quadrants[p_quadrant].shadows.resize(subdiv*subdiv); + shadow_atlas->quadrants[p_quadrant].subdivision=subdiv; + + //cache the smallest subdiv (for faster allocation in light update) + + shadow_atlas->smallest_subdiv=1<<30; + + for(int i=0;i<4;i++) { + if (shadow_atlas->quadrants[i].subdivision) { + shadow_atlas->smallest_subdiv=MIN(shadow_atlas->smallest_subdiv,shadow_atlas->quadrants[i].subdivision); + } + } + + if (shadow_atlas->smallest_subdiv==1<<30) { + shadow_atlas->smallest_subdiv=0; + } + + //resort the size orders, simple bublesort for 4 elements.. + + int swaps=0; + do { + swaps=0; + + for(int i=0;i<3;i++) { + if (shadow_atlas->quadrants[shadow_atlas->size_order[i]].subdivision > shadow_atlas->quadrants[shadow_atlas->size_order[i+1]].subdivision) { + SWAP(shadow_atlas->size_order[i],shadow_atlas->size_order[i+1]); + swaps++; + } + } + } while(swaps>0); + + + + + +} + +bool RasterizerSceneGLES3::_shadow_atlas_find_shadow(ShadowAtlas *shadow_atlas,int *p_in_quadrants,int p_quadrant_count,int p_current_subdiv,uint64_t p_tick,int &r_quadrant,int &r_shadow) { + + + for(int i=p_quadrant_count-1;i>=0;i--) { + + int qidx = p_in_quadrants[i]; + + if (shadow_atlas->quadrants[qidx].subdivision==p_current_subdiv) { + return false; + } + + //look for an empty space + int sc = shadow_atlas->quadrants[qidx].shadows.size(); + ShadowAtlas::Quadrant::Shadow *sarr = shadow_atlas->quadrants[qidx].shadows.ptr(); + + int found_free_idx=-1; //found a free one + int found_used_idx=-1; //found existing one, must steal it + uint64_t min_pass; // pass of the existing one, try to use the least recently used one (LRU fashion) + + for(int j=0;jlast_scene_pass!=scene_pass) { + + //was just allocated, don't kill it so soon, wait a bit.. + if (p_tick-sarr[j].alloc_tick < shadow_atlas_realloc_tolerance_msec) + continue; + + if (found_used_idx==-1 || sli->last_scene_passlast_scene_pass; + } + } + } + + if (found_free_idx==-1 && found_used_idx==-1) + continue; //nothing found + + if (found_free_idx==-1 && found_used_idx!=-1) { + found_free_idx=found_used_idx; + } + + r_quadrant=qidx; + r_shadow=found_free_idx; + + return true; + } + + return false; + +} + + +uint32_t RasterizerSceneGLES3::shadow_atlas_update_light(RID p_atlas, RID p_light_intance, float p_coverage, uint64_t p_light_version){ + + ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_atlas); + ERR_FAIL_COND_V(!shadow_atlas,ShadowAtlas::SHADOW_INVALID); + + LightInstance *li = light_instance_owner.getornull(p_light_intance); + ERR_FAIL_COND_V(!li,ShadowAtlas::SHADOW_INVALID); + + if (shadow_atlas->size==0 || shadow_atlas->smallest_subdiv==0) { + return ShadowAtlas::SHADOW_INVALID; + } + + uint32_t quad_size = shadow_atlas->size>>1; + int desired_fit = MAX(quad_size/shadow_atlas->smallest_subdiv,nearest_power_of_2(quad_size*p_coverage)); + + int valid_quadrants[4]; + int valid_quadrant_count=0; + int best_size=-1; //best size found + int best_subdiv=-1; //subdiv for the best size + + //find the quadrants this fits into, and the best possible size it can fit into + for(int i=0;i<4;i++) { + int q = shadow_atlas->size_order[i]; + int sd = shadow_atlas->quadrants[q].subdivision; + if (sd==0) + continue; //unused + + int max_fit = quad_size / sd; + + if (best_size!=-1 && max_fit>best_size) + break; //too large + + valid_quadrants[valid_quadrant_count++]=q; + best_subdiv=sd; + + if (max_fit>=desired_fit) { + best_size=max_fit; + } + } + + + ERR_FAIL_COND_V(valid_quadrant_count==0,ShadowAtlas::SHADOW_INVALID); + + uint64_t tick = OS::get_singleton()->get_ticks_msec(); + + + //see if it already exists + + if (shadow_atlas->shadow_owners.has(p_light_intance)) { + //it does! + uint32_t key = shadow_atlas->shadow_owners[p_light_intance]; + uint32_t q = (key>>ShadowAtlas::QUADRANT_SHIFT)&0x3; + uint32_t s = key&ShadowAtlas::SHADOW_INDEX_MASK; + + bool should_realloc=shadow_atlas->quadrants[q].subdivision!=best_subdiv && (shadow_atlas->quadrants[q].shadows[s].alloc_tick-tick > shadow_atlas_realloc_tolerance_msec); + bool should_redraw=shadow_atlas->quadrants[q].shadows[s].version!=p_light_version; + + if (!should_realloc) { + //already existing, see if it should redraw or it's just OK + if (should_redraw) { + key|=ShadowAtlas::SHADOW_INDEX_DIRTY_BIT; + } + + return key; + } + + int new_quadrant,new_shadow; + + //find a better place + if (_shadow_atlas_find_shadow(shadow_atlas,valid_quadrants,valid_quadrant_count,shadow_atlas->quadrants[q].subdivision,tick,new_quadrant,new_shadow)) { + //found a better place! + ShadowAtlas::Quadrant::Shadow *sh = &shadow_atlas->quadrants[new_quadrant].shadows[new_shadow]; + if (sh->owner.is_valid()) { + //is taken, but is invalid, erasing it + shadow_atlas->shadow_owners.erase(sh->owner); + LightInstance *sli = light_instance_owner.get(sh->owner); + sli->shadow_atlases.erase(p_atlas); + } + + sh->owner=p_light_intance; + sh->alloc_tick=tick; + sh->version=p_light_version; + + //make new key + key=new_quadrant<shadow_owners[p_light_intance]=key; + //make it dirty, as it should redraw anyway + key|=ShadowAtlas::SHADOW_INDEX_DIRTY_BIT; + + return key; + } + + //no better place for this shadow found, keep current + + //already existing, see if it should redraw or it's just OK + if (should_redraw) { + key|=ShadowAtlas::SHADOW_INDEX_DIRTY_BIT; + } + + return key; + } + + int new_quadrant,new_shadow; + + //find a better place + if (_shadow_atlas_find_shadow(shadow_atlas,valid_quadrants,valid_quadrant_count,-1,tick,new_quadrant,new_shadow)) { + //found a better place! + ShadowAtlas::Quadrant::Shadow *sh = &shadow_atlas->quadrants[new_quadrant].shadows[new_shadow]; + if (sh->owner.is_valid()) { + //is taken, but is invalid, erasing it + shadow_atlas->shadow_owners.erase(sh->owner); + LightInstance *sli = light_instance_owner.get(sh->owner); + sli->shadow_atlases.erase(p_atlas); + } + + sh->owner=p_light_intance; + sh->alloc_tick=tick; + sh->version=p_light_version; + + //make new key + uint32_t key=new_quadrant<shadow_owners[p_light_intance]=key; + //make it dirty, as it should redraw anyway + key|=ShadowAtlas::SHADOW_INDEX_DIRTY_BIT; + + return key; + } + + //no place to allocate this light, apologies + + return ShadowAtlas::SHADOW_INVALID; + + + + +} /* ENVIRONMENT API */ @@ -162,9 +506,12 @@ void RasterizerSceneGLES3::environment_set_adjustment(RID p_env,bool p_enable,fl RID RasterizerSceneGLES3::light_instance_create(RID p_light) { - print_line("hello light"); + LightInstance *light_instance = memnew( LightInstance ); + light_instance->last_pass=0; + light_instance->last_scene_pass=0; + light_instance->light=p_light; light_instance->light_ptr=storage->light_owner.getornull(p_light); @@ -187,6 +534,18 @@ void RasterizerSceneGLES3::light_instance_set_transform(RID p_light_instance,con light_instance->transform=p_transform; } +void RasterizerSceneGLES3::light_instance_mark_visible(RID p_light_instance) { + + LightInstance *light_instance = light_instance_owner.getornull(p_light_instance); + ERR_FAIL_COND(!light_instance); + + light_instance->last_scene_pass=scene_pass; +} + + +//////////////////////////// +//////////////////////////// +//////////////////////////// bool RasterizerSceneGLES3::_setup_material(RasterizerStorageGLES3::Material* p_material,bool p_alpha_pass) { @@ -851,13 +1210,13 @@ void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_g if (shadow || m->shader->spatial.unshaded /*|| current_debug==VS::SCENARIO_DEBUG_SHADELESS*/) { - e->sort_key=RenderList::SORT_KEY_LIGHT_INDEX_UNSHADED; + e->sort_key|=RenderList::SORT_KEY_LIGHT_INDEX_UNSHADED; e->sort_key|=uint64_t(0xF)<sort_key|=uint64_t(0xFFFF)<additive_ptr=&e->additive; + ec->sort_key&=~RenderList::SORT_KEY_LIGHT_MASK; ec->sort_key|=uint64_t(directional_light_instances[i]->light_index) << RenderList::SORT_KEY_LIGHT_INDEX_SHIFT; ec->sort_key|=uint64_t(VS::LIGHT_DIRECTIONAL) << RenderList::SORT_KEY_LIGHT_TYPE_SHIFT; - lighted=true; + lit=true; } @@ -922,14 +1282,16 @@ void RasterizerSceneGLES3::_add_geometry( RasterizerStorageGLES3::Geometry* p_g ec->additive_ptr=&e->additive; + ec->sort_key&=~RenderList::SORT_KEY_LIGHT_MASK; ec->sort_key|=uint64_t(li->light_index) << RenderList::SORT_KEY_LIGHT_INDEX_SHIFT; ec->sort_key|=uint64_t(li->light_ptr->type) << RenderList::SORT_KEY_LIGHT_TYPE_SHIFT; - lighted=true; + lit=true; } - if (!lighted) { + if (!lit) { + e->sort_key&=~RenderList::SORT_KEY_LIGHT_MASK; e->sort_key|=uint64_t(0xE)<sort_key|=uint64_t(0xFFFF)< lpercent; + for(int i=0;i=RenderList::MAX_LIGHTS ); @@ -1140,6 +1504,7 @@ void RasterizerSceneGLES3::_setup_lights(RID *p_light_cull_result,int p_light_cu + #if 0 if (li->light_ptr->shadow_enabled) { CameraMatrix bias; @@ -1181,6 +1546,26 @@ void RasterizerSceneGLES3::_setup_lights(RID *p_light_cull_result,int p_light_cu li->light_ubo_data.light_params[2]=0; li->light_ubo_data.light_params[3]=0; +#if 0 + + Transform ai = p_camera_inverse_transform.affine_inverse(); + float zn = p_camera_projection.get_z_near(); + Plane p (ai.origin + ai.basis.get_axis(2) * -zn, -ai.basis.get_axis(2) ); + + Vector3 point1 = li->transform.origin; + Vector3 point2 = li->transform.origin+p_camera_inverse_transform.affine_inverse().basis.get_axis(1).normalized()*li->light_ptr->param[VS::LIGHT_PARAM_RANGE]; + + p.intersects_segment(ai.origin,point1,&point1); + p.intersects_segment(ai.origin,point2,&point2); + float r = point1.distance_to(point2); + + float vp_w,vp_h; + p_camera_projection.get_viewport_size(vp_w,vp_h); + + lpercent.push_back(r*2/((vp_h+vp_w)*0.5)); + +#endif + #if 0 if (li->light_ptr->shadow_enabled) { li->shadow_projection[0] = Transform(camera_transform_inverse * li->transform).inverse(); @@ -1245,8 +1630,8 @@ void RasterizerSceneGLES3::_setup_lights(RID *p_light_cull_result,int p_light_cu glBindBuffer(GL_UNIFORM_BUFFER, 0); light_instances[i]=li; + light_instance_count++; } - } void RasterizerSceneGLES3::_copy_screen() { @@ -1305,7 +1690,7 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM _setup_environment(env,p_cam_projection,p_cam_transform); - _setup_lights(p_light_cull_result,p_light_cull_count,p_cam_transform.affine_inverse()); + _setup_lights(p_light_cull_result,p_light_cull_count,p_cam_transform.affine_inverse(),p_cam_projection); render_list.clear(); @@ -1609,16 +1994,40 @@ void RasterizerSceneGLES3::render_scene(const Transform& p_cam_transform,CameraM #endif } +void RasterizerSceneGLES3::set_scene_pass(uint64_t p_pass) { + scene_pass=p_pass; +} + bool RasterizerSceneGLES3::free(RID p_rid) { if (light_instance_owner.owns(p_rid)) { - print_line("bye light"); + LightInstance *light_instance = light_instance_owner.getptr(p_rid); + + //remove from shadow atlases.. + for(Set::Element *E=light_instance->shadow_atlases.front();E;E=E->next()) { + ShadowAtlas *shadow_atlas = shadow_atlas_owner.get(E->get()); + ERR_CONTINUE(!shadow_atlas->shadow_owners.has(p_rid)); + uint32_t key = shadow_atlas->shadow_owners[p_rid]; + uint32_t q = (key>>ShadowAtlas::QUADRANT_SHIFT)&0x3; + uint32_t s = key&ShadowAtlas::SHADOW_INDEX_MASK; + + shadow_atlas->quadrants[q].shadows[s].owner=RID(); + shadow_atlas->shadow_owners.erase(p_rid); + } + + glDeleteBuffers(1,&light_instance->light_ubo); light_instance_owner.free(p_rid); memdelete(light_instance); + } else if (shadow_atlas_owner.owns(p_rid)) { + + ShadowAtlas *shadow_atlas = shadow_atlas_owner.get(p_rid); + shadow_atlas_set_size(p_rid,0); + shadow_atlas_owner.free(p_rid); + memdelete(shadow_atlas); } else { return false; @@ -1800,6 +2209,8 @@ void RasterizerSceneGLES3::initialize() { } render_list.init(); _generate_brdf(); + + shadow_atlas_realloc_tolerance_msec=500; } void RasterizerSceneGLES3::finalize(){ diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h index 5457b5cde3b..9df6315e456 100644 --- a/drivers/gles3/rasterizer_scene_gles3.h +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -7,7 +7,11 @@ class RasterizerSceneGLES3 : public RasterizerScene { public: + uint64_t shadow_atlas_realloc_tolerance_msec; + + uint64_t render_pass; + uint64_t scene_pass; uint32_t current_material_index; uint32_t current_geometry_index; @@ -62,8 +66,58 @@ public: } state; + /* SHADOW ATLAS API */ + struct ShadowAtlas : public RID_Data { + enum { + SHADOW_INDEX_DIRTY_BIT=(1<<31), + QUADRANT_SHIFT=27, + SHADOW_INDEX_MASK=(1< shadows; + + Quadrant() { + subdivision=0; //not in use + } + + } quadrants[4]; + + int size_order[4]; + uint32_t smallest_subdiv; + + int size; + + GLuint fbo; + GLuint depth; + + Map shadow_owners; + }; + + RID_Owner shadow_atlas_owner; + + RID shadow_atlas_create(); + void shadow_atlas_set_size(RID p_atlas,int p_size); + void shadow_atlas_set_quadrant_subdivision(RID p_atlas,int p_quadrant,int p_subdivision); + bool _shadow_atlas_find_shadow(ShadowAtlas *shadow_atlas, int *p_in_quadrants, int p_quadrant_count, int p_current_subdiv, uint64_t p_tick, int &r_quadrant, int &r_shadow); + uint32_t shadow_atlas_update_light(RID p_atlas,RID p_light_intance,float p_coverage,uint64_t p_light_version); /* ENVIRONMENT API */ @@ -159,6 +213,7 @@ public: GLuint light_ubo; uint64_t shadow_pass; + uint64_t last_scene_pass; uint64_t last_pass; uint16_t light_index; @@ -166,6 +221,7 @@ public: CameraMatrix shadow_projection[4]; + Set shadow_atlases; //shadow atlases where this light is registered LightInstance() { } @@ -175,6 +231,7 @@ public: virtual RID light_instance_create(RID p_light); virtual void light_instance_set_transform(RID p_light_instance,const Transform& p_transform); + virtual void light_instance_mark_visible(RID p_light_instance); /* RENDER LIST */ @@ -190,6 +247,7 @@ public: SORT_KEY_LIGHT_TYPE_SHIFT=54, //type is most important SORT_KEY_LIGHT_INDEX_SHIFT=38, //type is most important SORT_KEY_LIGHT_INDEX_UNSHADED=uint64_t(0xF) << SORT_KEY_LIGHT_TYPE_SHIFT, //type is most important + SORT_KEY_LIGHT_MASK=(uint64_t(0xFFFFF) << SORT_KEY_LIGHT_INDEX_SHIFT), //type is most important SORT_KEY_MATERIAL_INDEX_SHIFT=22, SORT_KEY_GEOMETRY_INDEX_SHIFT=6, SORT_KEY_GEOMETRY_TYPE_SHIFT=2, @@ -327,7 +385,7 @@ public: void _draw_skybox(RID p_skybox, CameraMatrix& p_projection, const Transform& p_transform, bool p_vflip, float p_scale); void _setup_environment(Environment *env,CameraMatrix& p_cam_projection, const Transform& p_cam_transform); - void _setup_lights(RID *p_light_cull_result, int p_light_cull_count, const Transform &p_camera_inverse_transform); + void _setup_lights(RID *p_light_cull_result, int p_light_cull_count, const Transform &p_camera_inverse_transform,const CameraMatrix& p_camera_projection); void _copy_screen(); void _copy_to_front_buffer(Environment *env); @@ -337,6 +395,8 @@ public: void _generate_brdf(); + virtual void set_scene_pass(uint64_t p_pass); + void initialize(); void finalize(); RasterizerSceneGLES3(); diff --git a/drivers/gles3/shadow_atlas_gles3.cpp b/drivers/gles3/shadow_atlas_gles3.cpp new file mode 100644 index 00000000000..38dee8bceab --- /dev/null +++ b/drivers/gles3/shadow_atlas_gles3.cpp @@ -0,0 +1,2 @@ +#include "shadow_atlas_gles3.h" + diff --git a/drivers/gles3/shadow_atlas_gles3.h b/drivers/gles3/shadow_atlas_gles3.h new file mode 100644 index 00000000000..46eaf581cb3 --- /dev/null +++ b/drivers/gles3/shadow_atlas_gles3.h @@ -0,0 +1,8 @@ +#ifndef SHADOW_ATLAS_GLES3_H +#define SHADOW_ATLAS_GLES3_H + +#include "rasterizer_storage_gles3.h" + + + +#endif // SHADOW_ATLAS_GLES3_H diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h index b61a2d0a464..feaac38acb6 100644 --- a/servers/visual/rasterizer.h +++ b/servers/visual/rasterizer.h @@ -109,9 +109,13 @@ public: virtual RID light_instance_create(RID p_light)=0; virtual void light_instance_set_transform(RID p_light_instance,const Transform& p_transform)=0; + virtual void light_instance_mark_visible(RID p_light_instance)=0; + virtual void render_scene(const Transform& p_cam_transform,CameraMatrix& p_cam_projection,bool p_cam_ortogonal,InstanceBase** p_cull_result,int p_cull_count,RID* p_light_cull_result,int p_light_cull_count,RID* p_directional_lights,int p_directional_light_count,RID p_environment)=0; + virtual void set_scene_pass(uint64_t p_pass)=0; + virtual bool free(RID p_rid)=0; virtual ~RasterizerScene() {} diff --git a/servers/visual/visual_server_scene.cpp b/servers/visual/visual_server_scene.cpp index d958ea4bdb9..e7900bdcc9f 100644 --- a/servers/visual/visual_server_scene.cpp +++ b/servers/visual/visual_server_scene.cpp @@ -1107,6 +1107,8 @@ void VisualServerScene::render_camera(RID p_camera, RID p_scenario,Size2 p_viewp render_pass++; uint32_t camera_layer_mask=camera->visible_layers; + VSG::scene_render->set_scene_pass(render_pass); + /* STEP 1 - SETUP CAMERA */ CameraMatrix camera_matrix; @@ -1266,6 +1268,7 @@ void VisualServerScene::render_camera(RID p_camera, RID p_scenario,Size2 p_viewp //do not add this light if no geometry is affected by it.. light_cull_result[light_cull_count]=ins; light_instance_cull_result[light_cull_count]=light->instance; + VSG::scene_render->light_instance_mark_visible(light->instance); //mark it visible for shadow allocation later light_cull_count++; } @@ -1488,6 +1491,7 @@ void VisualServerScene::render_camera(RID p_camera, RID p_scenario,Size2 p_viewp #endif + VSG::scene_render->render_scene(camera->transform, camera_matrix,ortho,(RasterizerScene::InstanceBase**)instance_cull_result,cull_count,light_instance_cull_result,light_cull_count,directional_light_ptr,directional_light_count,environment); } From 70cce6152d32b8a4e5fdfee52e2bb873c92551e5 Mon Sep 17 00:00:00 2001 From: volzhs Date: Tue, 1 Nov 2016 01:51:34 +0900 Subject: [PATCH 038/223] Fix resetting to default value in EditorSettings --- tools/editor/editor_settings.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/editor/editor_settings.cpp b/tools/editor/editor_settings.cpp index 78bfd7cf259..2a355d95f30 100644 --- a/tools/editor/editor_settings.cpp +++ b/tools/editor/editor_settings.cpp @@ -1098,7 +1098,6 @@ EditorSettings::EditorSettings() { } _load_defaults(); - save_changed_setting=false; } From 40ba6d328be82f5b1e87b54e5db450c2f62731ef Mon Sep 17 00:00:00 2001 From: m4nu3lf Date: Mon, 31 Oct 2016 18:26:42 +0000 Subject: [PATCH 039/223] Fixed Mix nodes in Animation Tree Player --- scene/animation/animation_tree_player.cpp | 20 ++++---------------- scene/animation/animation_tree_player.h | 1 - 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/scene/animation/animation_tree_player.cpp b/scene/animation/animation_tree_player.cpp index 9488ae37a8c..6ab1a3e5a1b 100644 --- a/scene/animation/animation_tree_player.cpp +++ b/scene/animation/animation_tree_player.cpp @@ -494,15 +494,8 @@ float AnimationTreePlayer::_process_node(const StringName& p_node,AnimationNode case NODE_OUTPUT: { NodeOut *on = static_cast(nb); - - for(TrackMap::Element *E=track_map.front();E;E=E->next()) { - E->get().total_weight = 0; - } - HashMap weights; - - return _process_node(on->inputs[0].node,r_prev_anim,p_time,p_seek, p_fallback_weight, &weights); } break; @@ -544,15 +537,12 @@ float AnimationTreePlayer::_process_node(const StringName& p_node,AnimationNode NodePath track_path = an->animation->track_get_path(E->get().local_track); if (an->filter.has(track_path) && an->filter[track_path]) { E->get().weight = 0; - E->get().track->total_weight += p_fallback_weight; } else { if (p_weights->has(track_path)) { float weight = (*p_weights)[track_path]; E->get().weight = weight; - E->get().track->total_weight += weight; } else { E->get().weight = p_fallback_weight; - E->get().track->total_weight += p_fallback_weight; } } if (E->get().weight>CMP_EPSILON) @@ -875,8 +865,6 @@ void AnimationTreePlayer::_process_animation(float p_delta) { if (tr.track==NULL || tr.local_track<0 || tr.weight < CMP_EPSILON) continue; - float blend=tr.weight / tr.track->total_weight; - switch(a->track_get_type(tr.local_track)) { case Animation::TYPE_TRANSFORM: { ///< Transform a node or a bone. @@ -885,14 +873,14 @@ void AnimationTreePlayer::_process_animation(float p_delta) { Vector3 scale; a->transform_track_interpolate(tr.local_track,anim_list->time,&loc,&rot,&scale); - tr.track->loc+=loc*blend; + tr.track->loc+=loc*tr.weight; scale.x-=1.0; scale.y-=1.0; scale.z-=1.0; - tr.track->scale+=scale*blend; + tr.track->scale+=scale*tr.weight; - tr.track->rot = tr.track->rot * empty_rot.slerp(rot,blend); + tr.track->rot = tr.track->rot * empty_rot.slerp(rot,tr.weight); } break; @@ -900,7 +888,7 @@ void AnimationTreePlayer::_process_animation(float p_delta) { if (a->value_track_get_update_mode(tr.local_track)==Animation::UPDATE_CONTINUOUS) { Variant value = a->value_track_interpolate(tr.local_track,anim_list->time); - Variant::blend(tr.track->value,value,blend,tr.track->value); + Variant::blend(tr.track->value,value,tr.weight,tr.track->value); } else { int index = a->track_find_key(tr.local_track,anim_list->time); tr.track->value = a->track_get_key_value(tr.local_track, index); diff --git a/scene/animation/animation_tree_player.h b/scene/animation/animation_tree_player.h index 6b5350e9eef..909748924f1 100644 --- a/scene/animation/animation_tree_player.h +++ b/scene/animation/animation_tree_player.h @@ -111,7 +111,6 @@ private: Variant value; bool skip; - float total_weight; }; From 4c9b00b5085c0ab9195842f9a0cf2287d9242bbe Mon Sep 17 00:00:00 2001 From: volzhs Date: Tue, 1 Nov 2016 07:45:37 +0900 Subject: [PATCH 040/223] Fix to fit stylebox with ItemList --- scene/gui/item_list.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scene/gui/item_list.cpp b/scene/gui/item_list.cpp index f69ad8fa7e1..191627cadb5 100644 --- a/scene/gui/item_list.cpp +++ b/scene/gui/item_list.cpp @@ -801,7 +801,10 @@ void ItemList::_notification(int p_what) { Size2 size = get_size(); float page = size.height-bg->get_minimum_size().height; - int width = size.width - mw - bg->get_minimum_size().width; + int width = size.width-bg->get_minimum_size().width; + if (!scroll_bar->is_hidden()){ + width-=mw+bg->get_margin(MARGIN_RIGHT); + } scroll_bar->set_page(page); draw_style_box(bg,Rect2(Point2(),size)); From 97c8508f5e4f57b1048830d44e76e1f4517fd449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Sun, 30 Oct 2016 18:44:57 +0100 Subject: [PATCH 041/223] style: Start applying PEP8 to Python files, indentation issues Done with `autopep8 --select=E1`, fixes: - E101 - Reindent all lines. - E112 - Fix under-indented comments. - E113 - Fix over-indented comments. - E115 - Fix under-indented comments. - E116 - Fix over-indented comments. - E121 - Fix a badly indented line. - E122 - Fix a badly indented line. - E123 - Fix a badly indented line. - E124 - Fix a badly indented line. - E125 - Fix indentation undistinguish from the next logical line. - E126 - Fix a badly indented line. - E127 - Fix a badly indented line. - E128 - Fix a badly indented line. - E129 - Fix a badly indented line. --- SConstruct | 398 ++-- core/SCsub | 42 +- core/make_binders.py | 154 +- doc/tools/doc_merge.py | 260 +-- doc/tools/doc_status.py | 520 ++--- doc/tools/makedoku.py | 458 ++-- doc/tools/makehtml.py | 1026 ++++----- doc/tools/makemd.py | 488 ++--- doc/tools/makerst.py | 788 +++---- drivers/SCsub | 18 +- drivers/gl_context/SCsub | 22 +- drivers/gles2/shaders/SCsub | 10 +- drivers/png/SCsub | 70 +- drivers/rtaudio/SCsub | 2 +- drivers/zlib/SCsub | 22 +- methods.py | 1978 +++++++++--------- modules/SCsub | 10 +- modules/chibi/config.py | 4 +- modules/cscript/config.py | 4 +- modules/dds/config.py | 4 +- modules/enet/SCsub | 30 +- modules/enet/config.py | 4 +- modules/etc1/SCsub | 2 +- modules/etc1/config.py | 4 +- modules/freetype/SCsub | 114 +- modules/freetype/config.py | 4 +- modules/gdscript/config.py | 4 +- modules/gridmap/config.py | 4 +- modules/ik/config.py | 14 +- modules/jpg/SCsub | 2 +- modules/jpg/config.py | 4 +- modules/mpc/SCsub | 28 +- modules/mpc/config.py | 4 +- modules/ogg/SCsub | 16 +- modules/ogg/config.py | 4 +- modules/openssl/SCsub | 1322 ++++++------ modules/openssl/config.py | 4 +- modules/opus/SCsub | 392 ++-- modules/opus/config.py | 4 +- modules/pbm/config.py | 4 +- modules/pvr/SCsub | 10 +- modules/pvr/config.py | 4 +- modules/squish/SCsub | 30 +- modules/squish/config.py | 12 +- modules/theora/SCsub | 126 +- modules/theora/config.py | 4 +- modules/visual_script/config.py | 4 +- modules/vorbis/SCsub | 68 +- modules/vorbis/config.py | 4 +- modules/webm/SCsub | 18 +- modules/webm/config.py | 4 +- modules/webm/libvpx/SCsub | 518 ++--- modules/webm/libvpx/yasm_osx_fat.py | 32 +- modules/webp/SCsub | 216 +- modules/webp/config.py | 4 +- platform/android/SCsub | 74 +- platform/android/detect.py | 368 ++-- platform/bb10/SCsub | 14 +- platform/bb10/detect.py | 112 +- platform/haiku/SCsub | 22 +- platform/haiku/detect.py | 78 +- platform/iphone/SCsub | 28 +- platform/iphone/detect.py | 296 +-- platform/javascript/SCsub | 14 +- platform/javascript/detect.py | 132 +- platform/osx/SCsub | 16 +- platform/osx/detect.py | 118 +- platform/server/SCsub | 2 +- platform/server/detect.py | 76 +- platform/windows/SCsub | 22 +- platform/windows/detect.py | 436 ++-- platform/winrt/SCsub | 22 +- platform/winrt/detect.py | 216 +- platform/x11/SCsub | 8 +- platform/x11/detect.py | 326 +-- scene/3d/SCsub | 6 +- scene/resources/default_theme/make_header.py | 52 +- tools/SCsub | 158 +- tools/editor/SCsub | 116 +- tools/editor/icons/SCsub | 120 +- tools/scripts/addheader.py | 62 +- tools/scripts/file-hex-array.py | 56 +- tools/scripts/make_bmfhdr.py | 64 +- tools/scripts/make_glwrapper.py | 100 +- tools/scripts/makeargs.py | 26 +- tools/scripts/memsort.py | 24 +- tools/scripts/svgs_2_pngs.py | 4 +- tools/translations/extract.py | 118 +- 88 files changed, 6291 insertions(+), 6291 deletions(-) diff --git a/SConstruct b/SConstruct index 70816d82b85..32c24fed091 100644 --- a/SConstruct +++ b/SConstruct @@ -24,28 +24,28 @@ platform_exporters=[] global_defaults=[] for x in glob.glob("platform/*"): - if (not os.path.isdir(x) or not os.path.exists(x+"/detect.py")): - continue - tmppath="./"+x + if (not os.path.isdir(x) or not os.path.exists(x+"/detect.py")): + continue + tmppath="./"+x - sys.path.append(tmppath) - import detect + sys.path.append(tmppath) + import detect - if (os.path.exists(x+"/export/export.cpp")): - platform_exporters.append(x[9:]) - if (os.path.exists(x+"/globals/global_defaults.cpp")): - global_defaults.append(x[9:]) - if (detect.is_active()): - active_platforms.append( detect.get_name() ) - active_platform_ids.append(x); - if (detect.can_build()): - x=x.replace("platform/","") # rest of world - x=x.replace("platform\\","") # win32 - platform_list+=[x] - platform_opts[x]=detect.get_opts() - platform_flags[x]=detect.get_flags() - sys.path.remove(tmppath) - sys.modules.pop('detect') + if (os.path.exists(x+"/export/export.cpp")): + platform_exporters.append(x[9:]) + if (os.path.exists(x+"/globals/global_defaults.cpp")): + global_defaults.append(x[9:]) + if (detect.is_active()): + active_platforms.append( detect.get_name() ) + active_platform_ids.append(x); + if (detect.can_build()): + x=x.replace("platform/","") # rest of world + x=x.replace("platform\\","") # win32 + platform_list+=[x] + platform_opts[x]=detect.get_opts() + platform_flags[x]=detect.get_flags() + sys.path.remove(tmppath) + sys.modules.pop('detect') module_list=methods.detect_modules() @@ -59,14 +59,14 @@ custom_tools=['default'] platform_arg = ARGUMENTS.get("platform", ARGUMENTS.get("p", False)) if (os.name=="posix"): - pass + pass elif (os.name=="nt"): - if ( os.getenv("VCINSTALLDIR")==None or platform_arg=="android"): - custom_tools=['mingw'] + if ( os.getenv("VCINSTALLDIR")==None or platform_arg=="android"): + custom_tools=['mingw'] env_base=Environment(tools=custom_tools); if 'TERM' in os.environ: - env_base['ENV']['TERM'] = os.environ['TERM'] + env_base['ENV']['TERM'] = os.environ['TERM'] env_base.AppendENVPath('PATH', os.getenv('PATH')) env_base.AppendENVPath('PKG_CONFIG_PATH', os.getenv('PKG_CONFIG_PATH')) env_base.global_defaults=global_defaults @@ -109,11 +109,11 @@ customs = ['custom.py'] profile = ARGUMENTS.get("profile", False) if profile: - import os.path - if os.path.isfile(profile): - customs.append(profile) - elif os.path.isfile(profile+".py"): - customs.append(profile+".py") + import os.path + if os.path.isfile(profile): + customs.append(profile) + elif os.path.isfile(profile+".py"): + customs.append(profile+".py") opts=Variables(customs, ARGUMENTS) opts.Add('target', 'Compile Target (debug/release_debug/release).', "debug") @@ -155,12 +155,12 @@ opts.Add('vsproj', 'Generate Visual Studio Project. (yes/no)', 'no') # add platform specific options for k in platform_opts.keys(): - opt_list = platform_opts[k] - for o in opt_list: - opts.Add(o[0],o[1],o[2]) + opt_list = platform_opts[k] + for o in opt_list: + opts.Add(o[0],o[1],o[2]) for x in module_list: - opts.Add('module_'+x+'_enabled', "Enable module '"+x+"' (yes/no)", "yes") + opts.Add('module_'+x+'_enabled', "Enable module '"+x+"' (yes/no)", "yes") opts.Update(env_base) # update environment Help(opts.GenerateHelpText(env_base)) # generate help @@ -181,11 +181,11 @@ sys.modules.pop('detect') """ if (env_base['target']=='debug'): - env_base.Append(CPPFLAGS=['-DDEBUG_MEMORY_ALLOC']); - env_base.Append(CPPFLAGS=['-DSCI_NAMESPACE']) + env_base.Append(CPPFLAGS=['-DDEBUG_MEMORY_ALLOC']); + env_base.Append(CPPFLAGS=['-DSCI_NAMESPACE']) if (env_base['deprecated']!='no'): - env_base.Append(CPPFLAGS=['-DENABLE_DEPRECATED']); + env_base.Append(CPPFLAGS=['-DENABLE_DEPRECATED']); env_base.platforms = {} @@ -193,214 +193,214 @@ env_base.platforms = {} selected_platform ="" if env_base['platform'] != "": - selected_platform=env_base['platform'] + selected_platform=env_base['platform'] elif env_base['p'] != "": - selected_platform=env_base['p'] - env_base["platform"]=selected_platform + selected_platform=env_base['p'] + env_base["platform"]=selected_platform if selected_platform in platform_list: - sys.path.append("./platform/"+selected_platform) - import detect - if "create" in dir(detect): - env = detect.create(env_base) - else: - env = env_base.Clone() + sys.path.append("./platform/"+selected_platform) + import detect + if "create" in dir(detect): + env = detect.create(env_base) + else: + env = env_base.Clone() - if env['vsproj']=="yes": - env.vs_incs = [] - env.vs_srcs = [] + if env['vsproj']=="yes": + env.vs_incs = [] + env.vs_srcs = [] - def AddToVSProject( sources ): - for x in sources: - if type(x) == type(""): - fname = env.File(x).path - else: - fname = env.File(x)[0].path - pieces = fname.split(".") - if len(pieces)>0: - basename = pieces[0] - basename = basename.replace('\\\\','/') - env.vs_srcs = env.vs_srcs + [basename + ".cpp"] - env.vs_incs = env.vs_incs + [basename + ".h"] - #print basename - env.AddToVSProject = AddToVSProject + def AddToVSProject( sources ): + for x in sources: + if type(x) == type(""): + fname = env.File(x).path + else: + fname = env.File(x)[0].path + pieces = fname.split(".") + if len(pieces)>0: + basename = pieces[0] + basename = basename.replace('\\\\','/') + env.vs_srcs = env.vs_srcs + [basename + ".cpp"] + env.vs_incs = env.vs_incs + [basename + ".h"] + #print basename + env.AddToVSProject = AddToVSProject - env.extra_suffix="" + env.extra_suffix="" - if env["extra_suffix"] != '' : - env.extra_suffix += '.'+env["extra_suffix"] + if env["extra_suffix"] != '' : + env.extra_suffix += '.'+env["extra_suffix"] - CCFLAGS = env.get('CCFLAGS', '') - env['CCFLAGS'] = '' + CCFLAGS = env.get('CCFLAGS', '') + env['CCFLAGS'] = '' - env.Append(CCFLAGS=string.split(str(CCFLAGS))) + env.Append(CCFLAGS=string.split(str(CCFLAGS))) - CFLAGS = env.get('CFLAGS', '') - env['CFLAGS'] = '' + CFLAGS = env.get('CFLAGS', '') + env['CFLAGS'] = '' - env.Append(CFLAGS=string.split(str(CFLAGS))) + env.Append(CFLAGS=string.split(str(CFLAGS))) - LINKFLAGS = env.get('LINKFLAGS', '') - env['LINKFLAGS'] = '' + LINKFLAGS = env.get('LINKFLAGS', '') + env['LINKFLAGS'] = '' - env.Append(LINKFLAGS=string.split(str(LINKFLAGS))) + env.Append(LINKFLAGS=string.split(str(LINKFLAGS))) - flag_list = platform_flags[selected_platform] - for f in flag_list: - if not (f[0] in ARGUMENTS): # allow command line to override platform flags - env[f[0]] = f[1] + flag_list = platform_flags[selected_platform] + for f in flag_list: + if not (f[0] in ARGUMENTS): # allow command line to override platform flags + env[f[0]] = f[1] - #must happen after the flags, so when flags are used by configure, stuff happens (ie, ssl on x11) - detect.configure(env) + #must happen after the flags, so when flags are used by configure, stuff happens (ie, ssl on x11) + detect.configure(env) - #env['platform_libsuffix'] = env['LIBSUFFIX'] + #env['platform_libsuffix'] = env['LIBSUFFIX'] - suffix="."+selected_platform + suffix="."+selected_platform - if (env["target"]=="release"): - if (env["tools"]=="yes"): - print("Tools can only be built with targets 'debug' and 'release_debug'.") - sys.exit(255) - suffix+=".opt" + if (env["target"]=="release"): + if (env["tools"]=="yes"): + print("Tools can only be built with targets 'debug' and 'release_debug'.") + sys.exit(255) + suffix+=".opt" - env.Append(CCFLAGS=['-DNDEBUG']); + env.Append(CCFLAGS=['-DNDEBUG']); - elif (env["target"]=="release_debug"): - if (env["tools"]=="yes"): - suffix+=".opt.tools" - else: - suffix+=".opt.debug" - else: - if (env["tools"]=="yes"): - suffix+=".tools" - else: - suffix+=".debug" + elif (env["target"]=="release_debug"): + if (env["tools"]=="yes"): + suffix+=".opt.tools" + else: + suffix+=".opt.debug" + else: + if (env["tools"]=="yes"): + suffix+=".tools" + else: + suffix+=".debug" - if env["arch"] != "": - suffix += "."+env["arch"] - elif (env["bits"]=="32"): - suffix+=".32" - elif (env["bits"]=="64"): - suffix+=".64" - elif (env["bits"]=="fat"): - suffix+=".fat" + if env["arch"] != "": + suffix += "."+env["arch"] + elif (env["bits"]=="32"): + suffix+=".32" + elif (env["bits"]=="64"): + suffix+=".64" + elif (env["bits"]=="fat"): + suffix+=".fat" - suffix+=env.extra_suffix + suffix+=env.extra_suffix - env["PROGSUFFIX"]=suffix+env["PROGSUFFIX"] - env["OBJSUFFIX"]=suffix+env["OBJSUFFIX"] - env["LIBSUFFIX"]=suffix+env["LIBSUFFIX"] - env["SHLIBSUFFIX"]=suffix+env["SHLIBSUFFIX"] + env["PROGSUFFIX"]=suffix+env["PROGSUFFIX"] + env["OBJSUFFIX"]=suffix+env["OBJSUFFIX"] + env["LIBSUFFIX"]=suffix+env["LIBSUFFIX"] + env["SHLIBSUFFIX"]=suffix+env["SHLIBSUFFIX"] - sys.path.remove("./platform/"+selected_platform) - sys.modules.pop('detect') + sys.path.remove("./platform/"+selected_platform) + sys.modules.pop('detect') - env.module_list=[] + env.module_list=[] - for x in module_list: - if env['module_'+x+'_enabled'] != "yes": - continue - tmppath="./modules/"+x - sys.path.append(tmppath) - env.current_module=x - import config - if (config.can_build(selected_platform)): - config.configure(env) - env.module_list.append(x) - sys.path.remove(tmppath) - sys.modules.pop('config') + for x in module_list: + if env['module_'+x+'_enabled'] != "yes": + continue + tmppath="./modules/"+x + sys.path.append(tmppath) + env.current_module=x + import config + if (config.can_build(selected_platform)): + config.configure(env) + env.module_list.append(x) + sys.path.remove(tmppath) + sys.modules.pop('config') - if (env.use_ptrcall): - env.Append(CPPFLAGS=['-DPTRCALL_ENABLED']); + if (env.use_ptrcall): + env.Append(CPPFLAGS=['-DPTRCALL_ENABLED']); - # to test 64 bits compiltion - # env.Append(CPPFLAGS=['-m64']) + # to test 64 bits compiltion + # env.Append(CPPFLAGS=['-m64']) - if (env['tools']=='yes'): - env.Append(CPPFLAGS=['-DTOOLS_ENABLED']) - if (env['disable_3d']=='yes'): - env.Append(CPPFLAGS=['-D_3D_DISABLED']) - if (env['gdscript']=='yes'): - env.Append(CPPFLAGS=['-DGDSCRIPT_ENABLED']) - if (env['disable_advanced_gui']=='yes'): - env.Append(CPPFLAGS=['-DADVANCED_GUI_DISABLED']) + if (env['tools']=='yes'): + env.Append(CPPFLAGS=['-DTOOLS_ENABLED']) + if (env['disable_3d']=='yes'): + env.Append(CPPFLAGS=['-D_3D_DISABLED']) + if (env['gdscript']=='yes'): + env.Append(CPPFLAGS=['-DGDSCRIPT_ENABLED']) + if (env['disable_advanced_gui']=='yes'): + env.Append(CPPFLAGS=['-DADVANCED_GUI_DISABLED']) - if (env['minizip'] == 'yes'): - env.Append(CPPFLAGS=['-DMINIZIP_ENABLED']) + if (env['minizip'] == 'yes'): + env.Append(CPPFLAGS=['-DMINIZIP_ENABLED']) - if (env['xml']=='yes'): - env.Append(CPPFLAGS=['-DXML_ENABLED']) + if (env['xml']=='yes'): + env.Append(CPPFLAGS=['-DXML_ENABLED']) - if (env['verbose']=='no'): - methods.no_verbose(sys,env) + if (env['verbose']=='no'): + methods.no_verbose(sys,env) - Export('env') + Export('env') - #build subdirs, the build order is dependent on link order. + #build subdirs, the build order is dependent on link order. - SConscript("core/SCsub") - SConscript("servers/SCsub") - SConscript("scene/SCsub") - SConscript("tools/SCsub") - SConscript("drivers/SCsub") - SConscript("bin/SCsub") + SConscript("core/SCsub") + SConscript("servers/SCsub") + SConscript("scene/SCsub") + SConscript("tools/SCsub") + SConscript("drivers/SCsub") + SConscript("bin/SCsub") - SConscript("modules/SCsub") - SConscript("main/SCsub") + SConscript("modules/SCsub") + SConscript("main/SCsub") - SConscript("platform/"+selected_platform+"/SCsub"); # build selected platform + SConscript("platform/"+selected_platform+"/SCsub"); # build selected platform - # Microsoft Visual Studio Project Generation - if (env['vsproj'])=="yes": + # Microsoft Visual Studio Project Generation + if (env['vsproj'])=="yes": - AddToVSProject(env.core_sources) - AddToVSProject(env.main_sources) - AddToVSProject(env.modules_sources) - AddToVSProject(env.scene_sources) - AddToVSProject(env.servers_sources) - AddToVSProject(env.tool_sources) + AddToVSProject(env.core_sources) + AddToVSProject(env.main_sources) + AddToVSProject(env.modules_sources) + AddToVSProject(env.scene_sources) + AddToVSProject(env.servers_sources) + AddToVSProject(env.tool_sources) - # this env flag won't work, it needs to be set in env_base=Environment(MSVC_VERSION='9.0') - # Even then, SCons still seems to ignore it and builds with the latest MSVC... - # That said, it's not needed to be set so far but I'm leaving it here so that this comment - # has a purpose. - #env['MSVS_VERSION']='9.0' - - - # Calls a CMD with /C(lose) and /V(delayed environment variable expansion) options. - # And runs vcvarsall bat for the propper arhitecture and scons for propper configuration - env['MSVSBUILDCOM'] = 'cmd /V /C set "plat=$(PlatformTarget)" ^& (if "$(PlatformTarget)"=="x64" (set "plat=x86_amd64")) ^& set "tools=yes" ^& (if "$(Configuration)"=="release" (set "tools=no")) ^& call "$(VCInstallDir)vcvarsall.bat" !plat! ^& scons platform=windows target=$(Configuration) tools=!tools! -j2' - env['MSVSREBUILDCOM'] = 'cmd /V /C set "plat=$(PlatformTarget)" ^& (if "$(PlatformTarget)"=="x64" (set "plat=x86_amd64")) ^& set "tools=yes" ^& (if "$(Configuration)"=="release" (set "tools=no")) & call "$(VCInstallDir)vcvarsall.bat" !plat! ^& scons platform=windows target=$(Configuration) tools=!tools! vsproj=yes -j2' - env['MSVSCLEANCOM'] = 'cmd /V /C set "plat=$(PlatformTarget)" ^& (if "$(PlatformTarget)"=="x64" (set "plat=x86_amd64")) ^& set "tools=yes" ^& (if "$(Configuration)"=="release" (set "tools=no")) ^& call "$(VCInstallDir)vcvarsall.bat" !plat! ^& scons --clean platform=windows target=$(Configuration) tools=!tools! -j2' - - # This version information (Win32, x64, Debug, Release, Release_Debug seems to be - # required for Visual Studio to understand that it needs to generate an NMAKE - # project. Do not modify without knowing what you are doing. - debug_variants = ['debug|Win32']+['debug|x64'] - release_variants = ['release|Win32']+['release|x64'] - release_debug_variants = ['release_debug|Win32']+['release_debug|x64'] - variants = debug_variants + release_variants + release_debug_variants - debug_targets = ['bin\\godot.windows.tools.32.exe']+['bin\\godot.windows.tools.64.exe'] - release_targets = ['bin\\godot.windows.opt.32.exe']+['bin\\godot.windows.opt.64.exe'] - release_debug_targets = ['bin\\godot.windows.opt.tools.32.exe']+['bin\\godot.windows.opt.tools.64.exe'] - targets = debug_targets + release_targets + release_debug_targets - msvproj = env.MSVSProject(target = ['#godot' + env['MSVSPROJECTSUFFIX'] ], - incs = env.vs_incs, - srcs = env.vs_srcs, - runfile = targets, - buildtarget = targets, - auto_build_solution=1, - variant = variants) + # this env flag won't work, it needs to be set in env_base=Environment(MSVC_VERSION='9.0') + # Even then, SCons still seems to ignore it and builds with the latest MSVC... + # That said, it's not needed to be set so far but I'm leaving it here so that this comment + # has a purpose. + #env['MSVS_VERSION']='9.0' + + + # Calls a CMD with /C(lose) and /V(delayed environment variable expansion) options. + # And runs vcvarsall bat for the propper arhitecture and scons for propper configuration + env['MSVSBUILDCOM'] = 'cmd /V /C set "plat=$(PlatformTarget)" ^& (if "$(PlatformTarget)"=="x64" (set "plat=x86_amd64")) ^& set "tools=yes" ^& (if "$(Configuration)"=="release" (set "tools=no")) ^& call "$(VCInstallDir)vcvarsall.bat" !plat! ^& scons platform=windows target=$(Configuration) tools=!tools! -j2' + env['MSVSREBUILDCOM'] = 'cmd /V /C set "plat=$(PlatformTarget)" ^& (if "$(PlatformTarget)"=="x64" (set "plat=x86_amd64")) ^& set "tools=yes" ^& (if "$(Configuration)"=="release" (set "tools=no")) & call "$(VCInstallDir)vcvarsall.bat" !plat! ^& scons platform=windows target=$(Configuration) tools=!tools! vsproj=yes -j2' + env['MSVSCLEANCOM'] = 'cmd /V /C set "plat=$(PlatformTarget)" ^& (if "$(PlatformTarget)"=="x64" (set "plat=x86_amd64")) ^& set "tools=yes" ^& (if "$(Configuration)"=="release" (set "tools=no")) ^& call "$(VCInstallDir)vcvarsall.bat" !plat! ^& scons --clean platform=windows target=$(Configuration) tools=!tools! -j2' + + # This version information (Win32, x64, Debug, Release, Release_Debug seems to be + # required for Visual Studio to understand that it needs to generate an NMAKE + # project. Do not modify without knowing what you are doing. + debug_variants = ['debug|Win32']+['debug|x64'] + release_variants = ['release|Win32']+['release|x64'] + release_debug_variants = ['release_debug|Win32']+['release_debug|x64'] + variants = debug_variants + release_variants + release_debug_variants + debug_targets = ['bin\\godot.windows.tools.32.exe']+['bin\\godot.windows.tools.64.exe'] + release_targets = ['bin\\godot.windows.opt.32.exe']+['bin\\godot.windows.opt.64.exe'] + release_debug_targets = ['bin\\godot.windows.opt.tools.32.exe']+['bin\\godot.windows.opt.tools.64.exe'] + targets = debug_targets + release_targets + release_debug_targets + msvproj = env.MSVSProject(target = ['#godot' + env['MSVSPROJECTSUFFIX'] ], + incs = env.vs_incs, + srcs = env.vs_srcs, + runfile = targets, + buildtarget = targets, + auto_build_solution=1, + variant = variants) else: - print("No valid target platform selected.") - print("The following were detected:") - for x in platform_list: - print("\t"+x) - print("\nPlease run scons again with argument: platform=") + print("No valid target platform selected.") + print("The following were detected:") + for x in platform_list: + print("\t"+x) + print("\nPlease run scons again with argument: platform=") diff --git a/core/SCsub b/core/SCsub index cbed2e4f35c..d3429c57a82 100644 --- a/core/SCsub +++ b/core/SCsub @@ -9,9 +9,9 @@ gd_call="" gd_inc="" for x in env.global_defaults: - env.core_sources.append("#platform/"+x+"/globals/global_defaults.cpp") - gd_inc+='#include "platform/'+x+'/globals/global_defaults.h"\n' - gd_call+="\tregister_"+x+"_global_defaults();\n" + env.core_sources.append("#platform/"+x+"/globals/global_defaults.cpp") + gd_inc+='#include "platform/'+x+'/globals/global_defaults.h"\n' + gd_call+="\tregister_"+x+"_global_defaults();\n" gd_cpp='#include "globals.h"\n' gd_cpp+=gd_inc @@ -24,25 +24,25 @@ f.close() import os txt = "0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0" if ("SCRIPT_AES256_ENCRYPTION_KEY" in os.environ): - e=os.environ["SCRIPT_AES256_ENCRYPTION_KEY"] - txt = "" - ec_valid=True - if (len(e)!=64): - ec_valid=False - else: + e=os.environ["SCRIPT_AES256_ENCRYPTION_KEY"] + txt = "" + ec_valid=True + if (len(e)!=64): + ec_valid=False + else: - for i in range(len(e)>>1): - if (i>0): - txt+="," - txts="0x"+e[i*2:i*2+2] - try: - int(txts,16) - except: - ec_valid=False - txt+=txts - if (not ec_valid): - txt = "0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0" - print("Invalid AES256 encryption key, not 64 bits hex: "+e) + for i in range(len(e)>>1): + if (i>0): + txt+="," + txts="0x"+e[i*2:i*2+2] + try: + int(txts,16) + except: + ec_valid=False + txt+=txts + if (not ec_valid): + txt = "0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0" + print("Invalid AES256 encryption key, not 64 bits hex: "+e) f = open("script_encryption_key.cpp", "wb") f.write("#include \"globals.h\"\nuint8_t script_encryption_key[32]={" + txt + "};\n") diff --git a/core/make_binders.py b/core/make_binders.py index 75847229652..72ca3286fa6 100644 --- a/core/make_binders.py +++ b/core/make_binders.py @@ -168,94 +168,94 @@ MethodBind* create_method_bind($ifret R$ $ifnoret void$ (T::*p_method)($arg, P@$ def make_version(template,nargs,argmax,const,ret): - intext=template - from_pos=0 - outtext="" + intext=template + from_pos=0 + outtext="" - while(True): - to_pos=intext.find("$",from_pos) - if (to_pos==-1): - outtext+=intext[from_pos:] - break - else: - outtext+=intext[from_pos:to_pos] - end=intext.find("$",to_pos+1) - if (end==-1): - break # ignore - macro=intext[to_pos+1:end] - cmd="" - data="" + while(True): + to_pos=intext.find("$",from_pos) + if (to_pos==-1): + outtext+=intext[from_pos:] + break + else: + outtext+=intext[from_pos:to_pos] + end=intext.find("$",to_pos+1) + if (end==-1): + break # ignore + macro=intext[to_pos+1:end] + cmd="" + data="" - if (macro.find(" ")!=-1): - cmd=macro[0:macro.find(" ")] - data=macro[macro.find(" ")+1:] - else: - cmd=macro + if (macro.find(" ")!=-1): + cmd=macro[0:macro.find(" ")] + data=macro[macro.find(" ")+1:] + else: + cmd=macro - if (cmd=="argc"): - outtext+=str(nargs) - if (cmd=="ifret" and ret): - outtext+=data - if (cmd=="ifargs" and nargs): - outtext+=data - if (cmd=="ifretargs" and nargs and ret): - outtext+=data - if (cmd=="ifconst" and const): - outtext+=data - elif (cmd=="ifnoconst" and not const): - outtext+=data - elif (cmd=="ifnoret" and not ret): - outtext+=data - elif (cmd=="iftempl" and (nargs>0 or ret)): - outtext+=data - elif (cmd=="arg,"): - for i in range(1,nargs+1): - if (i>1): - outtext+=", " - outtext+=data.replace("@",str(i)) - elif (cmd=="arg"): - for i in range(1,nargs+1): - outtext+=data.replace("@",str(i)) - elif (cmd=="noarg"): - for i in range(nargs+1,argmax+1): - outtext+=data.replace("@",str(i)) - elif (cmd=="noarg"): - for i in range(nargs+1,argmax+1): - outtext+=data.replace("@",str(i)) + if (cmd=="argc"): + outtext+=str(nargs) + if (cmd=="ifret" and ret): + outtext+=data + if (cmd=="ifargs" and nargs): + outtext+=data + if (cmd=="ifretargs" and nargs and ret): + outtext+=data + if (cmd=="ifconst" and const): + outtext+=data + elif (cmd=="ifnoconst" and not const): + outtext+=data + elif (cmd=="ifnoret" and not ret): + outtext+=data + elif (cmd=="iftempl" and (nargs>0 or ret)): + outtext+=data + elif (cmd=="arg,"): + for i in range(1,nargs+1): + if (i>1): + outtext+=", " + outtext+=data.replace("@",str(i)) + elif (cmd=="arg"): + for i in range(1,nargs+1): + outtext+=data.replace("@",str(i)) + elif (cmd=="noarg"): + for i in range(nargs+1,argmax+1): + outtext+=data.replace("@",str(i)) + elif (cmd=="noarg"): + for i in range(nargs+1,argmax+1): + outtext+=data.replace("@",str(i)) - from_pos=end+1 + from_pos=end+1 - return outtext + return outtext def run(target, source, env): - versions=10 - versions_ext=6 - text="" - text_ext="" + versions=10 + versions_ext=6 + text="" + text_ext="" - for i in range(0,versions+1): + for i in range(0,versions+1): - t="" - t+=make_version(template,i,versions,False,False) - t+=make_version(template_typed,i,versions,False,False) - t+=make_version(template,i,versions,False,True) - t+=make_version(template_typed,i,versions,False,True) - t+=make_version(template,i,versions,True,False) - t+=make_version(template_typed,i,versions,True,False) - t+=make_version(template,i,versions,True,True) - t+=make_version(template_typed,i,versions,True,True) - if (i>=versions_ext): - text_ext+=t - else: - text+=t + t="" + t+=make_version(template,i,versions,False,False) + t+=make_version(template_typed,i,versions,False,False) + t+=make_version(template,i,versions,False,True) + t+=make_version(template_typed,i,versions,False,True) + t+=make_version(template,i,versions,True,False) + t+=make_version(template_typed,i,versions,True,False) + t+=make_version(template,i,versions,True,True) + t+=make_version(template_typed,i,versions,True,True) + if (i>=versions_ext): + text_ext+=t + else: + text+=t - f=open(target[0].path,"w") - f.write(text) - f.close() + f=open(target[0].path,"w") + f.write(text) + f.close() - f=open(target[1].path,"w") - f.write(text_ext) - f.close() + f=open(target[1].path,"w") + f.write(text_ext) + f.close() diff --git a/doc/tools/doc_merge.py b/doc/tools/doc_merge.py index 536770bee44..27f46960beb 100644 --- a/doc/tools/doc_merge.py +++ b/doc/tools/doc_merge.py @@ -17,195 +17,195 @@ tab=0 old_classes={} def write_string(_f, text,newline=True): - for t in range(tab): - _f.write("\t") - _f.write(text) - if (newline): - _f.write("\n") + for t in range(tab): + _f.write("\t") + _f.write(text) + if (newline): + _f.write("\n") def escape(ret): - ret=ret.replace("&","&"); - ret=ret.replace("<",">"); - ret=ret.replace(">","<"); - ret=ret.replace("'","'"); - ret=ret.replace("\"","""); - return ret + ret=ret.replace("&","&"); + ret=ret.replace("<",">"); + ret=ret.replace(">","<"); + ret=ret.replace("'","'"); + ret=ret.replace("\"","""); + return ret def inc_tab(): - global tab - tab+=1 + global tab + tab+=1 def dec_tab(): - global tab - tab-=1 + global tab + tab-=1 write_string(f,'') write_string(f,'') def get_tag(node,name): - tag="" - if (name in node.attrib): - tag=' '+name+'="'+escape(node.attrib[name])+'" ' - return tag + tag="" + if (name in node.attrib): + tag=' '+name+'="'+escape(node.attrib[name])+'" ' + return tag def find_method_descr(old_class,name): - methods = old_class.find("methods") - if(methods!=None and len(list(methods))>0): - for m in list(methods): - if (m.attrib["name"]==name): - description=m.find("description") - if (description!=None and description.text.strip()!=""): - return description.text + methods = old_class.find("methods") + if(methods!=None and len(list(methods))>0): + for m in list(methods): + if (m.attrib["name"]==name): + description=m.find("description") + if (description!=None and description.text.strip()!=""): + return description.text - return None + return None def find_signal_descr(old_class,name): - signals = old_class.find("signals") - if(signals!=None and len(list(signals))>0): - for m in list(signals): - if (m.attrib["name"]==name): - description=m.find("description") - if (description!=None and description.text.strip()!=""): - return description.text + signals = old_class.find("signals") + if(signals!=None and len(list(signals))>0): + for m in list(signals): + if (m.attrib["name"]==name): + description=m.find("description") + if (description!=None and description.text.strip()!=""): + return description.text - return None + return None def find_constant_descr(old_class,name): - if (old_class==None): - return None - constants = old_class.find("constants") - if(constants!=None and len(list(constants))>0): - for m in list(constants): - if (m.attrib["name"]==name): - if (m.text.strip()!=""): - return m.text - return None + if (old_class==None): + return None + constants = old_class.find("constants") + if(constants!=None and len(list(constants))>0): + for m in list(constants): + if (m.attrib["name"]==name): + if (m.text.strip()!=""): + return m.text + return None def write_class(c): - class_name = c.attrib["name"] - print("Parsing Class: "+class_name) - if (class_name in old_classes): - old_class=old_classes[class_name] - else: - old_class=None + class_name = c.attrib["name"] + print("Parsing Class: "+class_name) + if (class_name in old_classes): + old_class=old_classes[class_name] + else: + old_class=None - category=get_tag(c,"category") - inherits=get_tag(c,"inherits") - write_string(f,'') - inc_tab() + category=get_tag(c,"category") + inherits=get_tag(c,"inherits") + write_string(f,'') + inc_tab() - write_string(f,"") + write_string(f,"") - if (old_class!=None): - old_brief_descr=old_class.find("brief_description") - if (old_brief_descr!=None): - write_string(f,escape(old_brief_descr.text.strip())) + if (old_class!=None): + old_brief_descr=old_class.find("brief_description") + if (old_brief_descr!=None): + write_string(f,escape(old_brief_descr.text.strip())) - write_string(f,"") + write_string(f,"") - write_string(f,"") - if (old_class!=None): - old_descr=old_class.find("description") - if (old_descr!=None): - write_string(f,escape(old_descr.text.strip())) + write_string(f,"") + if (old_class!=None): + old_descr=old_class.find("description") + if (old_descr!=None): + write_string(f,escape(old_descr.text.strip())) - write_string(f,"") + write_string(f,"") - methods = c.find("methods") - if(methods!=None and len(list(methods))>0): + methods = c.find("methods") + if(methods!=None and len(list(methods))>0): - write_string(f,"") - inc_tab() + write_string(f,"") + inc_tab() - for m in list(methods): - qualifiers=get_tag(m,"qualifiers") + for m in list(methods): + qualifiers=get_tag(m,"qualifiers") - write_string(f,'') - inc_tab() + write_string(f,'') + inc_tab() - for a in list(m): - if (a.tag=="return"): - typ=get_tag(a,"type") - write_string(f,''); - write_string(f,''); - elif (a.tag=="argument"): + for a in list(m): + if (a.tag=="return"): + typ=get_tag(a,"type") + write_string(f,''); + write_string(f,''); + elif (a.tag=="argument"): - default=get_tag(a,"default") + default=get_tag(a,"default") - write_string(f,''); - write_string(f,''); + write_string(f,''); + write_string(f,''); - write_string(f,''); - if (old_class!=None): - old_method_descr=find_method_descr(old_class,m.attrib["name"]) - if (old_method_descr): - write_string(f,escape(escape(old_method_descr.strip()))) + write_string(f,''); + if (old_class!=None): + old_method_descr=find_method_descr(old_class,m.attrib["name"]) + if (old_method_descr): + write_string(f,escape(escape(old_method_descr.strip()))) - write_string(f,''); - dec_tab() - write_string(f,"") - dec_tab() - write_string(f,"") + write_string(f,''); + dec_tab() + write_string(f,"") + dec_tab() + write_string(f,"") - signals = c.find("signals") - if(signals!=None and len(list(signals))>0): + signals = c.find("signals") + if(signals!=None and len(list(signals))>0): - write_string(f,"") - inc_tab() + write_string(f,"") + inc_tab() - for m in list(signals): + for m in list(signals): - write_string(f,'') - inc_tab() + write_string(f,'') + inc_tab() - for a in list(m): - if (a.tag=="argument"): + for a in list(m): + if (a.tag=="argument"): - write_string(f,''); - write_string(f,''); + write_string(f,''); + write_string(f,''); - write_string(f,''); - if (old_class!=None): - old_signal_descr=find_signal_descr(old_class,m.attrib["name"]) - if (old_signal_descr): - write_string(f,escape(old_signal_descr.strip())) - write_string(f,''); - dec_tab() - write_string(f,"") - dec_tab() - write_string(f,"") + write_string(f,''); + if (old_class!=None): + old_signal_descr=find_signal_descr(old_class,m.attrib["name"]) + if (old_signal_descr): + write_string(f,escape(old_signal_descr.strip())) + write_string(f,''); + dec_tab() + write_string(f,"") + dec_tab() + write_string(f,"") - constants = c.find("constants") - if(constants!=None and len(list(constants))>0): + constants = c.find("constants") + if(constants!=None and len(list(constants))>0): - write_string(f,"") - inc_tab() + write_string(f,"") + inc_tab() - for m in list(constants): + for m in list(constants): - write_string(f,'') - old_constant_descr=find_constant_descr(old_class,m.attrib["name"]) - if (old_constant_descr): - write_string(f,escape(old_constant_descr.strip())) - write_string(f,"") + write_string(f,'') + old_constant_descr=find_constant_descr(old_class,m.attrib["name"]) + if (old_constant_descr): + write_string(f,escape(old_constant_descr.strip())) + write_string(f,"") - dec_tab() - write_string(f,"") + dec_tab() + write_string(f,"") - dec_tab() - write_string(f,"
") + dec_tab() + write_string(f,"
") for c in list(old_doc): - old_classes[c.attrib["name"]]=c + old_classes[c.attrib["name"]]=c for c in list(new_doc): - write_class(c) + write_class(c) write_string(f,'\n') diff --git a/doc/tools/doc_status.py b/doc/tools/doc_status.py index f0ede405ce0..00907bb01da 100755 --- a/doc/tools/doc_status.py +++ b/doc/tools/doc_status.py @@ -11,66 +11,66 @@ import xml.etree.ElementTree as ET ################################################################################ flags = { - 'c': platform.platform() != 'Windows', # Disable by default on windows, since we use ANSI escape codes - 'b': False, - 'g': False, - 's': False, - 'u': False, - 'h': False, - 'p': False, - 'o': True, - 'i': False, + 'c': platform.platform() != 'Windows', # Disable by default on windows, since we use ANSI escape codes + 'b': False, + 'g': False, + 's': False, + 'u': False, + 'h': False, + 'p': False, + 'o': True, + 'i': False, } flag_descriptions = { - 'c': 'Toggle colors when outputting.', - 'b': 'Toggle showing only not fully described classes.', - 'g': 'Toggle showing only completed classes.', - 's': 'Toggle showing comments about the status.', - 'u': 'Toggle URLs to docs.', - 'h': 'Show help and exit.', - 'p': 'Toggle showing percentage as well as counts.', - 'o': 'Toggle overall column.', - 'i': 'Toggle collapse of class items columns.', + 'c': 'Toggle colors when outputting.', + 'b': 'Toggle showing only not fully described classes.', + 'g': 'Toggle showing only completed classes.', + 's': 'Toggle showing comments about the status.', + 'u': 'Toggle URLs to docs.', + 'h': 'Show help and exit.', + 'p': 'Toggle showing percentage as well as counts.', + 'o': 'Toggle overall column.', + 'i': 'Toggle collapse of class items columns.', } long_flags = { - 'colors': 'c', - 'use-colors': 'c', + 'colors': 'c', + 'use-colors': 'c', - 'bad': 'b', - 'only-bad': 'b', + 'bad': 'b', + 'only-bad': 'b', - 'good': 'g', - 'only-good': 'g', + 'good': 'g', + 'only-good': 'g', - 'comments': 's', - 'status': 's', + 'comments': 's', + 'status': 's', - 'urls': 'u', - 'gen-url': 'u', + 'urls': 'u', + 'gen-url': 'u', - 'help': 'h', + 'help': 'h', - 'percent': 'p', - 'use-percentages': 'p', + 'percent': 'p', + 'use-percentages': 'p', - 'overall': 'o', - 'use-overall': 'o', + 'overall': 'o', + 'use-overall': 'o', - 'items': 'i', - 'collapse': 'i', + 'items': 'i', + 'collapse': 'i', } table_columns = ['name', 'brief_description', 'description', 'methods', 'constants', 'members', 'signals'] table_column_names = ['Name', 'Brief Desc.', 'Desc.', 'Methods', 'Constants', 'Members', 'Signals'] colors = { - 'name': [36], # cyan - 'part_big_problem': [4, 31], # underline, red - 'part_problem': [31], # red - 'part_mostly_good': [33], # yellow - 'part_good': [32], # green - 'url': [4, 34], # underline, blue - 'section': [1, 4], # bold, underline - 'state_off': [36], # cyan - 'state_on': [1, 35], # bold, magenta/plum + 'name': [36], # cyan + 'part_big_problem': [4, 31], # underline, red + 'part_problem': [31], # red + 'part_mostly_good': [33], # yellow + 'part_good': [32], # green + 'url': [4, 34], # underline, blue + 'section': [1, 4], # bold, underline + 'state_off': [36], # cyan + 'state_on': [1, 35], # bold, magenta/plum } overall_progress_description_weigth = 10 @@ -81,22 +81,22 @@ overall_progress_description_weigth = 10 ################################################################################ def validate_tag(elem, tag): - if elem.tag != tag: - print('Tag mismatch, expected "' + tag + '", got ' + elem.tag) - sys.exit(255) + if elem.tag != tag: + print('Tag mismatch, expected "' + tag + '", got ' + elem.tag) + sys.exit(255) def color(color, string): - if flags['c']: - color_format = '' - for code in colors[color]: - color_format += '\033[' + str(code) + 'm' - return color_format + string + '\033[0m' - else: - return string + if flags['c']: + color_format = '' + for code in colors[color]: + color_format += '\033[' + str(code) + 'm' + return color_format + string + '\033[0m' + else: + return string ansi_escape = re.compile(r'\x1b[^m]*m') def nonescape_len(s): - return len(ansi_escape.sub('', s)) + return len(ansi_escape.sub('', s)) @@ -105,138 +105,138 @@ def nonescape_len(s): ################################################################################ class ClassStatusProgress: - def __init__(self, described = 0, total = 0): - self.described = described - self.total = total + def __init__(self, described = 0, total = 0): + self.described = described + self.total = total - def __add__(self, other): - return ClassStatusProgress(self.described + other.described, self.total + other.total) + def __add__(self, other): + return ClassStatusProgress(self.described + other.described, self.total + other.total) - def increment(self, described): - if described: - self.described += 1 - self.total += 1 + def increment(self, described): + if described: + self.described += 1 + self.total += 1 - def is_ok(self): - return self.described >= self.total + def is_ok(self): + return self.described >= self.total - def to_configured_colored_string(self): - if flags['p']: - return self.to_colored_string('{percent}% ({has}/{total})', '{pad_percent}{pad_described}{s}{pad_total}') - else: - return self.to_colored_string() + def to_configured_colored_string(self): + if flags['p']: + return self.to_colored_string('{percent}% ({has}/{total})', '{pad_percent}{pad_described}{s}{pad_total}') + else: + return self.to_colored_string() - def to_colored_string(self, format='{has}/{total}', pad_format='{pad_described}{s}{pad_total}'): - ratio = self.described/self.total if self.total != 0 else 1 - percent = round(100*ratio) - s = format.format(has = str(self.described), total = str(self.total), percent = str(percent)) - if self.described >= self.total: - s = color('part_good', s) - elif self.described >= self.total/4*3: - s = color('part_mostly_good', s) - elif self.described > 0: - s = color('part_problem', s) - else: - s = color('part_big_problem', s) - pad_size = max(len(str(self.described)), len(str(self.total))) - pad_described = ''.ljust(pad_size - len(str(self.described))) - pad_percent = ''.ljust(3 - len(str(percent))) - pad_total = ''.ljust(pad_size - len(str(self.total))) - return pad_format.format(pad_described = pad_described, pad_total = pad_total, pad_percent = pad_percent, s = s) + def to_colored_string(self, format='{has}/{total}', pad_format='{pad_described}{s}{pad_total}'): + ratio = self.described/self.total if self.total != 0 else 1 + percent = round(100*ratio) + s = format.format(has = str(self.described), total = str(self.total), percent = str(percent)) + if self.described >= self.total: + s = color('part_good', s) + elif self.described >= self.total/4*3: + s = color('part_mostly_good', s) + elif self.described > 0: + s = color('part_problem', s) + else: + s = color('part_big_problem', s) + pad_size = max(len(str(self.described)), len(str(self.total))) + pad_described = ''.ljust(pad_size - len(str(self.described))) + pad_percent = ''.ljust(3 - len(str(percent))) + pad_total = ''.ljust(pad_size - len(str(self.total))) + return pad_format.format(pad_described = pad_described, pad_total = pad_total, pad_percent = pad_percent, s = s) class ClassStatus: - def __init__(self, name=''): - self.name = name - self.has_brief_description = True - self.has_description = True - self.progresses = { - 'methods': ClassStatusProgress(), - 'constants': ClassStatusProgress(), - 'members': ClassStatusProgress(), - 'signals': ClassStatusProgress() - } + def __init__(self, name=''): + self.name = name + self.has_brief_description = True + self.has_description = True + self.progresses = { + 'methods': ClassStatusProgress(), + 'constants': ClassStatusProgress(), + 'members': ClassStatusProgress(), + 'signals': ClassStatusProgress() + } - def __add__(self, other): - new_status = ClassStatus() - new_status.name = self.name - new_status.has_brief_description = self.has_brief_description and other.has_brief_description - new_status.has_description = self.has_description and other.has_description - for k in self.progresses: - new_status.progresses[k] = self.progresses[k] + other.progresses[k] - return new_status + def __add__(self, other): + new_status = ClassStatus() + new_status.name = self.name + new_status.has_brief_description = self.has_brief_description and other.has_brief_description + new_status.has_description = self.has_description and other.has_description + for k in self.progresses: + new_status.progresses[k] = self.progresses[k] + other.progresses[k] + return new_status - def is_ok(self): - ok = True - ok = ok and self.has_brief_description - ok = ok and self.has_description - for k in self.progresses: - ok = ok and self.progresses[k].is_ok() - return ok + def is_ok(self): + ok = True + ok = ok and self.has_brief_description + ok = ok and self.has_description + for k in self.progresses: + ok = ok and self.progresses[k].is_ok() + return ok - def make_output(self): - output = {} - output['name'] = color('name', self.name) + def make_output(self): + output = {} + output['name'] = color('name', self.name) - ok_string = color('part_good', 'OK') - missing_string = color('part_big_problem', 'MISSING') + ok_string = color('part_good', 'OK') + missing_string = color('part_big_problem', 'MISSING') - output['brief_description'] = ok_string if self.has_brief_description else missing_string - output['description'] = ok_string if self.has_description else missing_string + output['brief_description'] = ok_string if self.has_brief_description else missing_string + output['description'] = ok_string if self.has_description else missing_string - description_progress = ClassStatusProgress( - (self.has_brief_description + self.has_description) * overall_progress_description_weigth, - 2 * overall_progress_description_weigth - ) - items_progress = ClassStatusProgress() + description_progress = ClassStatusProgress( + (self.has_brief_description + self.has_description) * overall_progress_description_weigth, + 2 * overall_progress_description_weigth + ) + items_progress = ClassStatusProgress() - for k in ['methods', 'constants', 'members', 'signals']: - items_progress += self.progresses[k] - output[k] = self.progresses[k].to_configured_colored_string() + for k in ['methods', 'constants', 'members', 'signals']: + items_progress += self.progresses[k] + output[k] = self.progresses[k].to_configured_colored_string() - output['items'] = items_progress.to_configured_colored_string() + output['items'] = items_progress.to_configured_colored_string() - output['overall'] = (description_progress + items_progress).to_colored_string('{percent}%', '{pad_percent}{s}') + output['overall'] = (description_progress + items_progress).to_colored_string('{percent}%', '{pad_percent}{s}') - if self.name.startswith('Total'): - output['url'] = color('url', 'http://docs.godotengine.org/en/latest/classes/_classes.html') - if flags['s']: - output['comment'] = color('part_good', 'ALL OK') - else: - output['url'] = color('url', 'http://docs.godotengine.org/en/latest/classes/class_{name}.html'.format(name=self.name.lower())) + if self.name.startswith('Total'): + output['url'] = color('url', 'http://docs.godotengine.org/en/latest/classes/_classes.html') + if flags['s']: + output['comment'] = color('part_good', 'ALL OK') + else: + output['url'] = color('url', 'http://docs.godotengine.org/en/latest/classes/class_{name}.html'.format(name=self.name.lower())) - if flags['s'] and not flags['g'] and self.is_ok(): - output['comment'] = color('part_good', 'ALL OK') + if flags['s'] and not flags['g'] and self.is_ok(): + output['comment'] = color('part_good', 'ALL OK') - return output + return output - def generate_for_class(c): - status = ClassStatus() - status.name = c.attrib['name'] - for tag in list(c): + def generate_for_class(c): + status = ClassStatus() + status.name = c.attrib['name'] + for tag in list(c): - if tag.tag == 'brief_description': - status.has_brief_description = len(tag.text.strip()) > 0 + if tag.tag == 'brief_description': + status.has_brief_description = len(tag.text.strip()) > 0 - elif tag.tag == 'description': - status.has_description = len(tag.text.strip()) > 0 + elif tag.tag == 'description': + status.has_description = len(tag.text.strip()) > 0 - elif tag.tag in ['methods', 'signals']: - for sub_tag in list(tag): - descr = sub_tag.find('description') - status.progresses[tag.tag].increment(len(descr.text.strip()) > 0) + elif tag.tag in ['methods', 'signals']: + for sub_tag in list(tag): + descr = sub_tag.find('description') + status.progresses[tag.tag].increment(len(descr.text.strip()) > 0) - elif tag.tag in ['constants', 'members']: - for sub_tag in list(tag): - status.progresses[tag.tag].increment(len(sub_tag.text.strip()) > 0) + elif tag.tag in ['constants', 'members']: + for sub_tag in list(tag): + status.progresses[tag.tag].increment(len(sub_tag.text.strip()) > 0) - elif tag.tag in ['theme_items']: - pass #Ignore those tags, since they seem to lack description at all + elif tag.tag in ['theme_items']: + pass #Ignore those tags, since they seem to lack description at all - else: - print(tag.tag, tag.attrib) + else: + print(tag.tag, tag.attrib) - return status + return status @@ -248,31 +248,31 @@ input_file_list = [] input_class_list = [] for arg in sys.argv[1:]: - if arg.startswith('--'): - flags[long_flags[arg[2:]]] = not flags[long_flags[arg[2:]]] - elif arg.startswith('-'): - for f in arg[1:]: - flags[f] = not flags[f] - elif arg.endswith('.xml'): - input_file_list.append(arg) - else: - input_class_list.append(arg) + if arg.startswith('--'): + flags[long_flags[arg[2:]]] = not flags[long_flags[arg[2:]]] + elif arg.startswith('-'): + for f in arg[1:]: + flags[f] = not flags[f] + elif arg.endswith('.xml'): + input_file_list.append(arg) + else: + input_class_list.append(arg) if flags['i']: - for r in ['methods', 'constants', 'members', 'signals']: - index = table_columns.index(r) - del table_column_names[index] - del table_columns[index] - table_column_names.append('Items') - table_columns.append('items') + for r in ['methods', 'constants', 'members', 'signals']: + index = table_columns.index(r) + del table_column_names[index] + del table_columns[index] + table_column_names.append('Items') + table_columns.append('items') if flags['o'] == (not flags['i']): - table_column_names.append('Overall') - table_columns.append('overall') + table_column_names.append('Overall') + table_columns.append('overall') if flags['u']: - table_column_names.append('Docs URL') - table_columns.append('url') + table_column_names.append('Docs URL') + table_columns.append('url') ################################################################################ @@ -280,28 +280,28 @@ if flags['u']: ################################################################################ if len(input_file_list) < 1 or flags['h']: - if not flags['h']: - print(color('section', 'Invalid usage') + ': At least one classes.xml file is required') - print(color('section', 'Usage') + ': doc_status.py [flags] [class names]') - print('\t< and > signify required parameters, while [ and ] signify optional parameters.') - print('\tNote that you can give more than one classes file, in which case they will be merged on-the-fly.') - print(color('section', 'Available flags') + ':') - possible_synonym_list = list(long_flags) - possible_synonym_list.sort() - flag_list = list(flags) - flag_list.sort() - for flag in flag_list: - synonyms = [color('name', '-' + flag)] - for synonym in possible_synonym_list: - if long_flags[synonym] == flag: - synonyms.append(color('name', '--' + synonym)) + if not flags['h']: + print(color('section', 'Invalid usage') + ': At least one classes.xml file is required') + print(color('section', 'Usage') + ': doc_status.py [flags] [class names]') + print('\t< and > signify required parameters, while [ and ] signify optional parameters.') + print('\tNote that you can give more than one classes file, in which case they will be merged on-the-fly.') + print(color('section', 'Available flags') + ':') + possible_synonym_list = list(long_flags) + possible_synonym_list.sort() + flag_list = list(flags) + flag_list.sort() + for flag in flag_list: + synonyms = [color('name', '-' + flag)] + for synonym in possible_synonym_list: + if long_flags[synonym] == flag: + synonyms.append(color('name', '--' + synonym)) - print(('{synonyms} (Currently '+color('state_'+('on' if flags[flag] else 'off'), '{value}')+')\n\t{description}').format( - synonyms = ', '.join(synonyms), - value = ('on' if flags[flag] else 'off'), - description = flag_descriptions[flag] - )) - sys.exit(0) + print(('{synonyms} (Currently '+color('state_'+('on' if flags[flag] else 'off'), '{value}')+')\n\t{description}').format( + synonyms = ', '.join(synonyms), + value = ('on' if flags[flag] else 'off'), + description = flag_descriptions[flag] + )) + sys.exit(0) @@ -313,25 +313,25 @@ class_names = [] classes = {} for file in input_file_list: - tree = ET.parse(file) - doc = tree.getroot() + tree = ET.parse(file) + doc = tree.getroot() - if 'version' not in doc.attrib: - print('Version missing from "doc"') - sys.exit(255) + if 'version' not in doc.attrib: + print('Version missing from "doc"') + sys.exit(255) - version = doc.attrib['version'] + version = doc.attrib['version'] - for c in list(doc): - if c.attrib['name'] in class_names: - continue - class_names.append(c.attrib['name']) - classes[c.attrib['name']] = c + for c in list(doc): + if c.attrib['name'] in class_names: + continue + class_names.append(c.attrib['name']) + classes[c.attrib['name']] = c class_names.sort() if len(input_class_list) < 1: - input_class_list = class_names + input_class_list = class_names @@ -346,32 +346,32 @@ table_column_chars = '|' total_status = ClassStatus('Total') for cn in input_class_list: - if not cn in classes: - print('Cannot find class ' + cn + '!') - sys.exit(255) + if not cn in classes: + print('Cannot find class ' + cn + '!') + sys.exit(255) - c = classes[cn] - validate_tag(c, 'class') - status = ClassStatus.generate_for_class(c) + c = classes[cn] + validate_tag(c, 'class') + status = ClassStatus.generate_for_class(c) - if flags['b'] and status.is_ok(): - continue - if flags['g'] and not status.is_ok(): - continue + if flags['b'] and status.is_ok(): + continue + if flags['g'] and not status.is_ok(): + continue - total_status = total_status + status - out = status.make_output() - row = [] - for column in table_columns: - if column in out: - row.append(out[column]) - else: - row.append('') + total_status = total_status + status + out = status.make_output() + row = [] + for column in table_columns: + if column in out: + row.append(out[column]) + else: + row.append('') - if 'comment' in out and out['comment'] != '': - row.append(out['comment']) + if 'comment' in out and out['comment'] != '': + row.append(out['comment']) - table.append(row) + table.append(row) @@ -381,50 +381,50 @@ for cn in input_class_list: ################################################################################ if len(table) == 1: - print(color('part_big_problem', 'No classes suitable for printing!')) - sys.exit(0) + print(color('part_big_problem', 'No classes suitable for printing!')) + sys.exit(0) if len(table) > 2: - total_status.name = 'Total = {0}'.format(len(table) - 1) - out = total_status.make_output() - row = [] - for column in table_columns: - if column in out: - row.append(out[column]) - else: - row.append('') - table.append(row) + total_status.name = 'Total = {0}'.format(len(table) - 1) + out = total_status.make_output() + row = [] + for column in table_columns: + if column in out: + row.append(out[column]) + else: + row.append('') + table.append(row) table_column_sizes = [] for row in table: - for cell_i, cell in enumerate(row): - if cell_i >= len(table_column_sizes): - table_column_sizes.append(0) + for cell_i, cell in enumerate(row): + if cell_i >= len(table_column_sizes): + table_column_sizes.append(0) - table_column_sizes[cell_i] = max(nonescape_len(cell), table_column_sizes[cell_i]) + table_column_sizes[cell_i] = max(nonescape_len(cell), table_column_sizes[cell_i]) divider_string = table_row_chars[0] for cell_i in range(len(table[0])): - divider_string += table_row_chars[1] * (table_column_sizes[cell_i] + 2) + table_row_chars[0] + divider_string += table_row_chars[1] * (table_column_sizes[cell_i] + 2) + table_row_chars[0] print(divider_string) for row_i, row in enumerate(table): - row_string = table_column_chars - for cell_i, cell in enumerate(row): - padding_needed = table_column_sizes[cell_i] - nonescape_len(cell) + 2 - if cell_i == 0: - row_string += table_row_chars[2] + cell + table_row_chars[2]*(padding_needed-1) - else: - row_string += table_row_chars[2]*math.floor(padding_needed/2) + cell + table_row_chars[2]*math.ceil((padding_needed/2)) - row_string += table_column_chars + row_string = table_column_chars + for cell_i, cell in enumerate(row): + padding_needed = table_column_sizes[cell_i] - nonescape_len(cell) + 2 + if cell_i == 0: + row_string += table_row_chars[2] + cell + table_row_chars[2]*(padding_needed-1) + else: + row_string += table_row_chars[2]*math.floor(padding_needed/2) + cell + table_row_chars[2]*math.ceil((padding_needed/2)) + row_string += table_column_chars - print(row_string) + print(row_string) - if row_i == 0 or row_i == len(table) - 2: - print(divider_string) + if row_i == 0 or row_i == len(table) - 2: + print(divider_string) print(divider_string) if total_status.is_ok() and not flags['g']: - print('All listed classes are ' + color('part_good', 'OK') + '!') + print('All listed classes are ' + color('part_good', 'OK') + '!') diff --git a/doc/tools/makedoku.py b/doc/tools/makedoku.py index 7c3ca29af89..f00a4614d5c 100644 --- a/doc/tools/makedoku.py +++ b/doc/tools/makedoku.py @@ -8,17 +8,17 @@ input_list = [] for arg in sys.argv[1:]: - input_list.append(arg) + input_list.append(arg) if len(input_list) < 1: - print("usage: makedoku.py ") - sys.exit(0) + print("usage: makedoku.py ") + sys.exit(0) def validate_tag(elem,tag): - if (elem.tag != tag): - print("Tag mismatch, expected '"+tag+"', got "+elem.tag); - sys.exit(255) + if (elem.tag != tag): + print("Tag mismatch, expected '"+tag+"', got "+elem.tag); + sys.exit(255) class_names=[] @@ -27,289 +27,289 @@ classes={} def make_class_list(class_list,columns): - f=open("class_list.txt","wb") - prev=0 - col_max = len(class_list) / columns + 1 - print("col max is ", col_max) - col_count = 0 - row_count = 0 - last_initial = "" - fit_columns=[] + f=open("class_list.txt","wb") + prev=0 + col_max = len(class_list) / columns + 1 + print("col max is ", col_max) + col_count = 0 + row_count = 0 + last_initial = "" + fit_columns=[] - for n in range(0,columns): - fit_columns+=[[]] + for n in range(0,columns): + fit_columns+=[[]] - indexers=[] - last_initial="" + indexers=[] + last_initial="" - idx=0 - for n in class_list: - col = idx/col_max - if (col>=columns): - col=columns-1 - fit_columns[col]+=[n] - idx+=1 - if (n[:1]!=last_initial): - indexers+=[n] - last_initial=n[:1] + idx=0 + for n in class_list: + col = idx/col_max + if (col>=columns): + col=columns-1 + fit_columns[col]+=[n] + idx+=1 + if (n[:1]!=last_initial): + indexers+=[n] + last_initial=n[:1] - row_max=0 + row_max=0 - for n in range(0,columns): - if (len(fit_columns[n])>row_max): - row_max=len(fit_columns[n]) + for n in range(0,columns): + if (len(fit_columns[n])>row_max): + row_max=len(fit_columns[n]) - for r in range(0,row_max): - s="|" - for c in range(0,columns): - if (r>=len(fit_columns[c])): - continue + for r in range(0,row_max): + s="|" + for c in range(0,columns): + if (r>=len(fit_columns[c])): + continue - classname = fit_columns[c][r] - initial=classname[0] - if (classname in indexers): - s+="**"+initial+"**|" - else: - s+=" |" + classname = fit_columns[c][r] + initial=classname[0] + if (classname in indexers): + s+="**"+initial+"**|" + else: + s+=" |" - s+="[["+classname.lower()+"|"+classname+"]]|" + s+="[["+classname.lower()+"|"+classname+"]]|" - s+="\n" - f.write(s) + s+="\n" + f.write(s) def dokuize_text(txt): - return txt + return txt def dokuize_text(text): - pos=0 - while(True): - pos = text.find("[",pos) - if (pos==-1): - break + pos=0 + while(True): + pos = text.find("[",pos) + if (pos==-1): + break - endq_pos=text.find("]",pos+1) - if (endq_pos==-1): - break + endq_pos=text.find("]",pos+1) + if (endq_pos==-1): + break - pre_text=text[:pos] - post_text=text[endq_pos+1:] - tag_text=text[pos+1:endq_pos] + pre_text=text[:pos] + post_text=text[endq_pos+1:] + tag_text=text[pos+1:endq_pos] - if (tag_text in class_names): - tag_text="[["+tag_text.lower()+"|"+tag_text+"]]" - else: #command - cmd=tag_text - space_pos=tag_text.find(" ") - if (cmd.find("html")==0): - cmd=tag_text[:space_pos] - param=tag_text[space_pos+1:] - tag_text="<"+param+">" - elif(cmd.find("method")==0): - cmd=tag_text[:space_pos] - param=tag_text[space_pos+1:] + if (tag_text in class_names): + tag_text="[["+tag_text.lower()+"|"+tag_text+"]]" + else: #command + cmd=tag_text + space_pos=tag_text.find(" ") + if (cmd.find("html")==0): + cmd=tag_text[:space_pos] + param=tag_text[space_pos+1:] + tag_text="<"+param+">" + elif(cmd.find("method")==0): + cmd=tag_text[:space_pos] + param=tag_text[space_pos+1:] - if (param.find(".")!=-1): - class_param,method_param=param.split(".") - tag_text="[["+class_param.lower()+"#"+method_param+"|"+class_param+'.'+method_param+"]]" - else: - tag_text="[[#"+param+"|"+param+"]]" - elif (cmd.find("image=")==0): - tag_text="{{"+cmd[6:]+"}}" - elif (cmd.find("url=")==0): - tag_text="[["+cmd[4:]+"|" - elif (cmd=="/url"): - tag_text="]]>" - elif (cmd=="center"): - tag_text="" - elif (cmd=="/center"): - tag_text="" - elif (cmd=="br"): - tag_text="\\\\\n" - elif (cmd=="i" or cmd=="/i"): - tag_text="//" - elif (cmd=="b" or cmd=="/b"): - tag_text="**" - elif (cmd=="u" or cmd=="/u"): - tag_text="__" - else: - tag_text="["+tag_text+"]" + if (param.find(".")!=-1): + class_param,method_param=param.split(".") + tag_text="[["+class_param.lower()+"#"+method_param+"|"+class_param+'.'+method_param+"]]" + else: + tag_text="[[#"+param+"|"+param+"]]" + elif (cmd.find("image=")==0): + tag_text="{{"+cmd[6:]+"}}" + elif (cmd.find("url=")==0): + tag_text="[["+cmd[4:]+"|" + elif (cmd=="/url"): + tag_text="]]>" + elif (cmd=="center"): + tag_text="" + elif (cmd=="/center"): + tag_text="" + elif (cmd=="br"): + tag_text="\\\\\n" + elif (cmd=="i" or cmd=="/i"): + tag_text="//" + elif (cmd=="b" or cmd=="/b"): + tag_text="**" + elif (cmd=="u" or cmd=="/u"): + tag_text="__" + else: + tag_text="["+tag_text+"]" - text=pre_text+tag_text+post_text - pos=len(pre_text)+len(tag_text) + text=pre_text+tag_text+post_text + pos=len(pre_text)+len(tag_text) - #tnode = ET.SubElement(parent,"div") - #tnode.text=text - return text + #tnode = ET.SubElement(parent,"div") + #tnode.text=text + return text def make_type(t): - global class_names - if (t in class_names): - return "[["+t.lower()+"|"+t+"]]" - return t + global class_names + if (t in class_names): + return "[["+t.lower()+"|"+t+"]]" + return t def make_method(f,name,m,declare,event=False): - s=" * " - ret_type="void" - args=list(m) - mdata={} - mdata["argidx"]=[] - for a in args: - if (a.tag=="return"): - idx=-1 - elif (a.tag=="argument"): - idx=int(a.attrib["index"]) - else: - continue + s=" * " + ret_type="void" + args=list(m) + mdata={} + mdata["argidx"]=[] + for a in args: + if (a.tag=="return"): + idx=-1 + elif (a.tag=="argument"): + idx=int(a.attrib["index"]) + else: + continue - mdata["argidx"].append(idx) - mdata[idx]=a + mdata["argidx"].append(idx) + mdata[idx]=a - if (not event): - if (-1 in mdata["argidx"]): - s+=make_type(mdata[-1].attrib["type"]) - else: - s+="void" - s+=" " + if (not event): + if (-1 in mdata["argidx"]): + s+=make_type(mdata[-1].attrib["type"]) + else: + s+="void" + s+=" " - if (declare): + if (declare): - #span.attrib["class"]="funcdecl" - #a=ET.SubElement(span,"a") - #a.attrib["name"]=name+"_"+m.attrib["name"] - #a.text=name+"::"+m.attrib["name"] - s+="**"+m.attrib["name"]+"**" - else: - s+="[[#"+m.attrib["name"]+"|"+m.attrib["name"]+"]]" + #span.attrib["class"]="funcdecl" + #a=ET.SubElement(span,"a") + #a.attrib["name"]=name+"_"+m.attrib["name"] + #a.text=name+"::"+m.attrib["name"] + s+="**"+m.attrib["name"]+"**" + else: + s+="[[#"+m.attrib["name"]+"|"+m.attrib["name"]+"]]" - s+="**(**" - argfound=False - for a in mdata["argidx"]: - arg=mdata[a] - if (a<0): - continue - if (a>0): - s+=", " - else: - s+=" " + s+="**(**" + argfound=False + for a in mdata["argidx"]: + arg=mdata[a] + if (a<0): + continue + if (a>0): + s+=", " + else: + s+=" " - s+=make_type(arg.attrib["type"]) - if ("name" in arg.attrib): - s+=" "+arg.attrib["name"] - else: - s+=" arg"+str(a) + s+=make_type(arg.attrib["type"]) + if ("name" in arg.attrib): + s+=" "+arg.attrib["name"] + else: + s+=" arg"+str(a) - if ("default" in arg.attrib): - s+="="+arg.attrib["default"] + if ("default" in arg.attrib): + s+="="+arg.attrib["default"] - argfound=True + argfound=True - if (argfound): - s+=" " - s+="**)**" + if (argfound): + s+=" " + s+="**)**" - if ("qualifiers" in m.attrib): - s+=" "+m.attrib["qualifiers"] + if ("qualifiers" in m.attrib): + s+=" "+m.attrib["qualifiers"] - f.write(s+"\n") + f.write(s+"\n") def make_doku_class(node): - name = node.attrib["name"] + name = node.attrib["name"] - f=open(name.lower()+".txt","wb") + f=open(name.lower()+".txt","wb") - f.write("====== "+name+" ======\n") + f.write("====== "+name+" ======\n") - if ("inherits" in node.attrib): - inh=node.attrib["inherits"].strip() - f.write("**Inherits:** [["+inh.lower()+"|"+inh+"]]\\\\\n") - if ("category" in node.attrib): - f.write("**Category:** "+node.attrib["category"].strip()+"\\\\\n") + if ("inherits" in node.attrib): + inh=node.attrib["inherits"].strip() + f.write("**Inherits:** [["+inh.lower()+"|"+inh+"]]\\\\\n") + if ("category" in node.attrib): + f.write("**Category:** "+node.attrib["category"].strip()+"\\\\\n") - briefd = node.find("brief_description") - if (briefd!=None): - f.write("===== Brief Description ======\n") - f.write( dokuize_text(briefd.text.strip())+"\n" ) + briefd = node.find("brief_description") + if (briefd!=None): + f.write("===== Brief Description ======\n") + f.write( dokuize_text(briefd.text.strip())+"\n" ) - methods = node.find("methods") + methods = node.find("methods") - if(methods!=None and len(list(methods))>0): - f.write("===== Member Functions ======\n") - for m in list(methods): - make_method(f,node.attrib["name"],m,False) + if(methods!=None and len(list(methods))>0): + f.write("===== Member Functions ======\n") + for m in list(methods): + make_method(f,node.attrib["name"],m,False) - events = node.find("signals") - if(events!=None and len(list(events))>0): - f.write("===== Signals ======\n") - for m in list(events): - make_method(f,node.attrib["name"],m,True,True) + events = node.find("signals") + if(events!=None and len(list(events))>0): + f.write("===== Signals ======\n") + for m in list(events): + make_method(f,node.attrib["name"],m,True,True) - members = node.find("members") + members = node.find("members") - if(members!=None and len(list(members))>0): - f.write("===== Member Variables ======\n") + if(members!=None and len(list(members))>0): + f.write("===== Member Variables ======\n") - for c in list(members): - s = " * " - s+=make_type(c.attrib["type"])+" " - s+="**"+c.attrib["name"]+"**" - if (c.text.strip()!=""): - s+=" - "+c.text.strip() - f.write(s+"\n") + for c in list(members): + s = " * " + s+=make_type(c.attrib["type"])+" " + s+="**"+c.attrib["name"]+"**" + if (c.text.strip()!=""): + s+=" - "+c.text.strip() + f.write(s+"\n") - constants = node.find("constants") - if(constants!=None and len(list(constants))>0): - f.write("===== Numeric Constants ======\n") - for c in list(constants): - s = " * " - s+="**"+c.attrib["name"]+"**" - if ("value" in c.attrib): - s+=" = **"+c.attrib["value"]+"**" - if (c.text.strip()!=""): - s+=" - "+c.text.strip() - f.write(s+"\n") + constants = node.find("constants") + if(constants!=None and len(list(constants))>0): + f.write("===== Numeric Constants ======\n") + for c in list(constants): + s = " * " + s+="**"+c.attrib["name"]+"**" + if ("value" in c.attrib): + s+=" = **"+c.attrib["value"]+"**" + if (c.text.strip()!=""): + s+=" - "+c.text.strip() + f.write(s+"\n") - descr=node.find("description") - if (descr!=None and descr.text.strip()!=""): - f.write("===== Description ======\n") - f.write(dokuize_text(descr.text.strip())+"\n") + descr=node.find("description") + if (descr!=None and descr.text.strip()!=""): + f.write("===== Description ======\n") + f.write(dokuize_text(descr.text.strip())+"\n") - methods = node.find("methods") + methods = node.find("methods") - if(methods!=None and len(list(methods))>0): - f.write("===== Member Function Description ======\n") - for m in list(methods): + if(methods!=None and len(list(methods))>0): + f.write("===== Member Function Description ======\n") + for m in list(methods): - d=m.find("description") - if (d==None or d.text.strip()==""): - continue - f.write("== "+m.attrib["name"]+" ==\n") - make_method(f,node.attrib["name"],m,False) - f.write("\\\\\n") - f.write(dokuize_text(d.text.strip())) - f.write("\n") + d=m.find("description") + if (d==None or d.text.strip()==""): + continue + f.write("== "+m.attrib["name"]+" ==\n") + make_method(f,node.attrib["name"],m,False) + f.write("\\\\\n") + f.write(dokuize_text(d.text.strip())) + f.write("\n") - """ + """ div=ET.Element("div") div.attrib["class"]="class"; @@ -487,20 +487,20 @@ def make_doku_class(node): return div """ for file in input_list: - tree = ET.parse(file) - doc=tree.getroot() + tree = ET.parse(file) + doc=tree.getroot() - if ("version" not in doc.attrib): - print("Version missing from 'doc'") - sys.exit(255) + if ("version" not in doc.attrib): + print("Version missing from 'doc'") + sys.exit(255) - version=doc.attrib["version"] + version=doc.attrib["version"] - for c in list(doc): - if (c.attrib["name"] in class_names): - continue - class_names.append(c.attrib["name"]) - classes[c.attrib["name"]]=c + for c in list(doc): + if (c.attrib["name"] in class_names): + continue + class_names.append(c.attrib["name"]) + classes[c.attrib["name"]]=c class_names.sort() @@ -508,7 +508,7 @@ class_names.sort() make_class_list(class_names,4) for cn in class_names: - c=classes[cn] - make_doku_class(c) + c=classes[cn] + make_doku_class(c) diff --git a/doc/tools/makehtml.py b/doc/tools/makehtml.py index b0a8cbda88c..12f92883a4f 100644 --- a/doc/tools/makehtml.py +++ b/doc/tools/makehtml.py @@ -6,676 +6,676 @@ import xml.etree.ElementTree as ET from xml.sax.saxutils import escape, unescape html_escape_table = { - '"': """, - "'": "'" + '"': """, + "'": "'" } html_unescape_table = {v:k for k, v in html_escape_table.items()} def html_escape(text): - return escape(text, html_escape_table) + return escape(text, html_escape_table) def html_unescape(text): - return unescape(text, html_unescape_table) + return unescape(text, html_unescape_table) input_list = [] single_page=True for arg in sys.argv[1:]: - if arg[:1] == "-": - if arg[1:] == "multipage": - single_page = False - if arg[1:] == "singlepage": - single_page = True - else: - input_list.append(arg) + if arg[:1] == "-": + if arg[1:] == "multipage": + single_page = False + if arg[1:] == "singlepage": + single_page = True + else: + input_list.append(arg) if len(input_list) < 1: - print("usage: makehtml.py ") - sys.exit(0) + print("usage: makehtml.py ") + sys.exit(0) def validate_tag(elem,tag): - if (elem.tag != tag): - print("Tag mismatch, expected '"+tag+"', got "+elem.tag); - sys.exit(255) + if (elem.tag != tag): + print("Tag mismatch, expected '"+tag+"', got "+elem.tag); + sys.exit(255) def make_html_bottom(body): - #make_html_top(body,True) - ET.SubElement(body,"hr") - copyright = ET.SubElement(body,"span") - copyright.text = "Copyright 2008-2010 Codenix SRL" + #make_html_top(body,True) + ET.SubElement(body,"hr") + copyright = ET.SubElement(body,"span") + copyright.text = "Copyright 2008-2010 Codenix SRL" def make_html_top(body,bottom=False): - if (bottom): - ET.SubElement(body,"hr") + if (bottom): + ET.SubElement(body,"hr") - table = ET.SubElement(body,"table") - table.attrib["class"]="top_table" - tr = ET.SubElement(table,"tr") - td = ET.SubElement(tr,"td") - td.attrib["class"]="top_table" + table = ET.SubElement(body,"table") + table.attrib["class"]="top_table" + tr = ET.SubElement(table,"tr") + td = ET.SubElement(tr,"td") + td.attrib["class"]="top_table" - img = ET.SubElement(td,"image") - img.attrib["src"]="images/logo.png" - td = ET.SubElement(tr,"td") - td.attrib["class"]="top_table" - a = ET.SubElement(td,"a") - a.attrib["href"]="index.html" - a.text="Index" - td = ET.SubElement(tr,"td") - td.attrib["class"]="top_table" - a = ET.SubElement(td,"a") - a.attrib["href"]="alphabetical.html" - a.text="Classes" - td = ET.SubElement(tr,"td") - td.attrib["class"]="top_table" - a = ET.SubElement(td,"a") - a.attrib["href"]="category.html" - a.text="Categories" - td = ET.SubElement(tr,"td") - a = ET.SubElement(td,"a") - a.attrib["href"]="inheritance.html" - a.text="Inheritance" - if (not bottom): - ET.SubElement(body,"hr") + img = ET.SubElement(td,"image") + img.attrib["src"]="images/logo.png" + td = ET.SubElement(tr,"td") + td.attrib["class"]="top_table" + a = ET.SubElement(td,"a") + a.attrib["href"]="index.html" + a.text="Index" + td = ET.SubElement(tr,"td") + td.attrib["class"]="top_table" + a = ET.SubElement(td,"a") + a.attrib["href"]="alphabetical.html" + a.text="Classes" + td = ET.SubElement(tr,"td") + td.attrib["class"]="top_table" + a = ET.SubElement(td,"a") + a.attrib["href"]="category.html" + a.text="Categories" + td = ET.SubElement(tr,"td") + a = ET.SubElement(td,"a") + a.attrib["href"]="inheritance.html" + a.text="Inheritance" + if (not bottom): + ET.SubElement(body,"hr") def make_html_class_list(class_list,columns): - div=ET.Element("div") - div.attrib["class"]="ClassList"; + div=ET.Element("div") + div.attrib["class"]="ClassList"; - h1=ET.SubElement(div,"h2") - h1.text="Alphabetical Class List" + h1=ET.SubElement(div,"h2") + h1.text="Alphabetical Class List" - table=ET.SubElement(div,"table") - table.attrib["class"]="class_table" - table.attrib["width"]="100%" - prev=0 + table=ET.SubElement(div,"table") + table.attrib["class"]="class_table" + table.attrib["width"]="100%" + prev=0 - col_max = len(class_list) / columns + 1 - print("col max is ", col_max) - col_count = 0 - row_count = 0 - last_initial = "" - fit_columns=[] + col_max = len(class_list) / columns + 1 + print("col max is ", col_max) + col_count = 0 + row_count = 0 + last_initial = "" + fit_columns=[] - for n in range(0,columns): - fit_columns+=[[]] + for n in range(0,columns): + fit_columns+=[[]] - indexers=[] - last_initial="" + indexers=[] + last_initial="" - idx=0 - for n in class_list: - col = int(idx/col_max) - if (col>=columns): - col=columns-1 - fit_columns[col]+=[n] - idx+=1 - if (n[:1]!=last_initial): - indexers+=[n] - last_initial=n[:1] + idx=0 + for n in class_list: + col = int(idx/col_max) + if (col>=columns): + col=columns-1 + fit_columns[col]+=[n] + idx+=1 + if (n[:1]!=last_initial): + indexers+=[n] + last_initial=n[:1] - row_max=0 + row_max=0 - for n in range(0,columns): - if (len(fit_columns[n])>row_max): - row_max=len(fit_columns[n]) + for n in range(0,columns): + if (len(fit_columns[n])>row_max): + row_max=len(fit_columns[n]) - for r in range(0,row_max): - tr = ET.SubElement(table,"tr") - for c in range(0,columns): - tdi = ET.SubElement(tr,"td") - tdi.attrib["align"]="right" - td = ET.SubElement(tr,"td") - if (r>=len(fit_columns[c])): - continue + for r in range(0,row_max): + tr = ET.SubElement(table,"tr") + for c in range(0,columns): + tdi = ET.SubElement(tr,"td") + tdi.attrib["align"]="right" + td = ET.SubElement(tr,"td") + if (r>=len(fit_columns[c])): + continue - classname = fit_columns[c][r] - print(classname) - if (classname in indexers): + classname = fit_columns[c][r] + print(classname) + if (classname in indexers): - span = ET.SubElement(tdi, "span") - span.attrib["class"] = "class_index_letter" - span.text = classname[:1].upper() + span = ET.SubElement(tdi, "span") + span.attrib["class"] = "class_index_letter" + span.text = classname[:1].upper() - if (single_page): - link="#"+classname + if (single_page): + link="#"+classname + else: + link=classname+".html" + + a=ET.SubElement(td,"a") + a.attrib["href"]=link + a.text=classname + + + if (not single_page): + cat_class_list=ET.Element("html") + csscc = ET.SubElement(cat_class_list, "link") + csscc.attrib["href"] = "main.css" + csscc.attrib["rel"] = "stylesheet" + csscc.attrib["type"] = "text/css" + bodycc = ET.SubElement(cat_class_list, "body") + make_html_top(bodycc) + + cat_class_parent=bodycc else: - link=classname+".html" - - a=ET.SubElement(td,"a") - a.attrib["href"]=link - a.text=classname - - - if (not single_page): - cat_class_list=ET.Element("html") - csscc = ET.SubElement(cat_class_list, "link") - csscc.attrib["href"] = "main.css" - csscc.attrib["rel"] = "stylesheet" - csscc.attrib["type"] = "text/css" - bodycc = ET.SubElement(cat_class_list, "body") - make_html_top(bodycc) - - cat_class_parent=bodycc - else: - cat_class_parent=div + cat_class_parent=div - h1=ET.SubElement(cat_class_parent,"h2") - h1.text="Class List By Category" + h1=ET.SubElement(cat_class_parent,"h2") + h1.text="Class List By Category" - class_cat_table={} - class_cat_list=[] + class_cat_table={} + class_cat_list=[] - for c in class_list: - clss = classes[c] - if ("category" in clss.attrib): - class_cat=clss.attrib["category"] - else: - class_cat="Core" - if (class_cat.find("/")!=-1): - class_cat=class_cat[class_cat.rfind("/")+1:] - if (not class_cat in class_cat_list): - class_cat_list.append(class_cat) - class_cat_table[class_cat]=[] - class_cat_table[class_cat].append(c) + for c in class_list: + clss = classes[c] + if ("category" in clss.attrib): + class_cat=clss.attrib["category"] + else: + class_cat="Core" + if (class_cat.find("/")!=-1): + class_cat=class_cat[class_cat.rfind("/")+1:] + if (not class_cat in class_cat_list): + class_cat_list.append(class_cat) + class_cat_table[class_cat]=[] + class_cat_table[class_cat].append(c) - class_cat_list.sort() + class_cat_list.sort() - ct = ET.SubElement(cat_class_parent,"table") - for cl in class_cat_list: - l = class_cat_table[cl] - l.sort() - tr = ET.SubElement(ct,"tr") - tr.attrib["class"]="category_title" - td = ET.SubElement(ct,"td") - td.attrib["class"]="category_title" + ct = ET.SubElement(cat_class_parent,"table") + for cl in class_cat_list: + l = class_cat_table[cl] + l.sort() + tr = ET.SubElement(ct,"tr") + tr.attrib["class"]="category_title" + td = ET.SubElement(ct,"td") + td.attrib["class"]="category_title" - a = ET.SubElement(td,"a") - a.attrib["class"]="category_title" - a.text=cl - a.attrib["name"]="CATEGORY_"+cl + a = ET.SubElement(td,"a") + a.attrib["class"]="category_title" + a.text=cl + a.attrib["name"]="CATEGORY_"+cl - td = ET.SubElement(ct,"td") - td.attrib["class"]="category_title" + td = ET.SubElement(ct,"td") + td.attrib["class"]="category_title" - for clt in l: - tr = ET.SubElement(ct,"tr") - td = ET.SubElement(ct,"td") - make_type(clt,td) - clss=classes[clt] - bd = clss.find("brief_description") - bdtext="" - if (bd!=None): - bdtext=bd.text - td = ET.SubElement(ct,"td") - td.text=bdtext + for clt in l: + tr = ET.SubElement(ct,"tr") + td = ET.SubElement(ct,"td") + make_type(clt,td) + clss=classes[clt] + bd = clss.find("brief_description") + bdtext="" + if (bd!=None): + bdtext=bd.text + td = ET.SubElement(ct,"td") + td.text=bdtext - if (not single_page): - make_html_bottom(bodycc) - catet_out = ET.ElementTree(cat_class_list) - catet_out.write("category.html") + if (not single_page): + make_html_bottom(bodycc) + catet_out = ET.ElementTree(cat_class_list) + catet_out.write("category.html") - if (not single_page): - inh_class_list=ET.Element("html") - cssic = ET.SubElement(inh_class_list, "link") - cssic.attrib["href"] = "main.css" - cssic.attrib["rel"] = "stylesheet" - cssic.attrib["type"] = "text/css" - bodyic = ET.SubElement(inh_class_list, "body") - make_html_top(bodyic) - inh_class_parent=bodyic - else: - inh_class_parent=div - - - - - h1=ET.SubElement(inh_class_parent,"h2") - h1.text="Class List By Inheritance" - - itemlist = ET.SubElement(inh_class_parent,"list") - - class_inh_table={} - - def add_class(clss): - if (clss.attrib["name"] in class_inh_table): - return #already added - parent_list=None - - if ("inherits" in clss.attrib): - inhc = clss.attrib["inherits"] - if (not (inhc in class_inh_table)): - add_class(classes[inhc]) - - parent_list = class_inh_table[inhc].find("div") - if (parent_list == None): - parent_div = ET.SubElement(class_inh_table[inhc],"div") - parent_list = ET.SubElement(parent_div,"list") - parent_div.attrib["class"]="inh_class_list" + if (not single_page): + inh_class_list=ET.Element("html") + cssic = ET.SubElement(inh_class_list, "link") + cssic.attrib["href"] = "main.css" + cssic.attrib["rel"] = "stylesheet" + cssic.attrib["type"] = "text/css" + bodyic = ET.SubElement(inh_class_list, "body") + make_html_top(bodyic) + inh_class_parent=bodyic else: - parent_list = parent_list.find("list") + inh_class_parent=div - else: - parent_list=itemlist - item = ET.SubElement(parent_list,"li") + + h1=ET.SubElement(inh_class_parent,"h2") + h1.text="Class List By Inheritance" + + itemlist = ET.SubElement(inh_class_parent,"list") + + class_inh_table={} + + def add_class(clss): + if (clss.attrib["name"] in class_inh_table): + return #already added + parent_list=None + + if ("inherits" in clss.attrib): + inhc = clss.attrib["inherits"] + if (not (inhc in class_inh_table)): + add_class(classes[inhc]) + + parent_list = class_inh_table[inhc].find("div") + if (parent_list == None): + parent_div = ET.SubElement(class_inh_table[inhc],"div") + parent_list = ET.SubElement(parent_div,"list") + parent_div.attrib["class"]="inh_class_list" + else: + parent_list = parent_list.find("list") + + + else: + parent_list=itemlist + + item = ET.SubElement(parent_list,"li") # item.attrib["class"]="inh_class_list" - class_inh_table[clss.attrib["name"]]=item - make_type(clss.attrib["name"],item) + class_inh_table[clss.attrib["name"]]=item + make_type(clss.attrib["name"],item) - for c in class_list: - add_class(classes[c]) + for c in class_list: + add_class(classes[c]) - if (not single_page): - make_html_bottom(bodyic) - catet_out = ET.ElementTree(inh_class_list) - catet_out.write("inheritance.html") + if (not single_page): + make_html_bottom(bodyic) + catet_out = ET.ElementTree(inh_class_list) + catet_out.write("inheritance.html") - #h1=ET.SubElement(div,"h2") - #h1.text="Class List By Inheritance" + #h1=ET.SubElement(div,"h2") + #h1.text="Class List By Inheritance" - return div + return div def make_type(p_type,p_parent): - if (p_type=="RefPtr"): - p_type="Resource" + if (p_type=="RefPtr"): + p_type="Resource" - if (p_type in class_names): - a=ET.SubElement(p_parent,"a") - a.attrib["class"]="datatype_existing" - a.text=p_type+" " - if (single_page): - a.attrib["href"]="#"+p_type - else: - a.attrib["href"]=p_type+".html" - else: - span=ET.SubElement(p_parent,"span") - span.attrib["class"]="datatype" - span.text=p_type+" " + if (p_type in class_names): + a=ET.SubElement(p_parent,"a") + a.attrib["class"]="datatype_existing" + a.text=p_type+" " + if (single_page): + a.attrib["href"]="#"+p_type + else: + a.attrib["href"]=p_type+".html" + else: + span=ET.SubElement(p_parent,"span") + span.attrib["class"]="datatype" + span.text=p_type+" " def make_text_def(class_name,parent,text): - text = html_escape(text) - pos=0 - while(True): - pos = text.find("[",pos) - if (pos==-1): - break + text = html_escape(text) + pos=0 + while(True): + pos = text.find("[",pos) + if (pos==-1): + break - endq_pos=text.find("]",pos+1) - if (endq_pos==-1): - break + endq_pos=text.find("]",pos+1) + if (endq_pos==-1): + break - pre_text=text[:pos] - post_text=text[endq_pos+1:] - tag_text=text[pos+1:endq_pos] + pre_text=text[:pos] + post_text=text[endq_pos+1:] + tag_text=text[pos+1:endq_pos] - if (tag_text in class_names): - if (single_page): - tag_text=''+tag_text+'' - else: - tag_text=''+tag_text+'' - else: #command - cmd=tag_text - space_pos=tag_text.find(" ") - if (cmd.find("html")==0): - cmd=tag_text[:space_pos] - param=tag_text[space_pos+1:] - tag_text="<"+param+">" - elif(cmd.find("method")==0): - cmd=tag_text[:space_pos] - param=tag_text[space_pos+1:] + if (tag_text in class_names): + if (single_page): + tag_text=''+tag_text+'' + else: + tag_text=''+tag_text+'' + else: #command + cmd=tag_text + space_pos=tag_text.find(" ") + if (cmd.find("html")==0): + cmd=tag_text[:space_pos] + param=tag_text[space_pos+1:] + tag_text="<"+param+">" + elif(cmd.find("method")==0): + cmd=tag_text[:space_pos] + param=tag_text[space_pos+1:] - if (not single_page and param.find(".")!=-1): - class_param,method_param=param.split(".") - tag_text=tag_text=''+class_param+'.'+method_param+'()' - else: - tag_text=tag_text=''+class_name+'.'+param+'()' - elif (cmd.find("image=")==0): - print("found image: "+cmd) - tag_text="" - elif (cmd.find("url=")==0): - tag_text="" - elif (cmd=="/url"): - tag_text="" - elif (cmd=="center"): - tag_text="
" - elif (cmd=="/center"): - tag_text="
" - elif (cmd=="br"): - tag_text="
" - elif (cmd=="i" or cmd=="/i" or cmd=="b" or cmd=="/b" or cmd=="u" or cmd=="/u"): - tag_text="<"+tag_text+">" #html direct mapping - else: - tag_text="["+tag_text+"]" + if (not single_page and param.find(".")!=-1): + class_param,method_param=param.split(".") + tag_text=tag_text=''+class_param+'.'+method_param+'()' + else: + tag_text=tag_text=''+class_name+'.'+param+'()' + elif (cmd.find("image=")==0): + print("found image: "+cmd) + tag_text="" + elif (cmd.find("url=")==0): + tag_text="" + elif (cmd=="/url"): + tag_text="" + elif (cmd=="center"): + tag_text="
" + elif (cmd=="/center"): + tag_text="
" + elif (cmd=="br"): + tag_text="
" + elif (cmd=="i" or cmd=="/i" or cmd=="b" or cmd=="/b" or cmd=="u" or cmd=="/u"): + tag_text="<"+tag_text+">" #html direct mapping + else: + tag_text="["+tag_text+"]" - text=pre_text+tag_text+post_text - pos=len(pre_text)+len(tag_text) + text=pre_text+tag_text+post_text + pos=len(pre_text)+len(tag_text) - #tnode = ET.SubElement(parent,"div") - #tnode.text=text - text="
"+text+"
" - try: - tnode=ET.XML(text) - parent.append(tnode) - except: - print("Error parsing description text: '"+text+"'") - sys.exit(255) + #tnode = ET.SubElement(parent,"div") + #tnode.text=text + text="
"+text+"
" + try: + tnode=ET.XML(text) + parent.append(tnode) + except: + print("Error parsing description text: '"+text+"'") + sys.exit(255) - return tnode + return tnode def make_method_def(name,m,declare,event=False): - mdata={} + mdata={} - if (not declare): - div=ET.Element("tr") - div.attrib["class"]="method" - ret_parent=ET.SubElement(div,"td") - ret_parent.attrib["align"]="right" - func_parent=ET.SubElement(div,"td") - else: - div=ET.Element("div") - div.attrib["class"]="method" - ret_parent=div - func_parent=div + if (not declare): + div=ET.Element("tr") + div.attrib["class"]="method" + ret_parent=ET.SubElement(div,"td") + ret_parent.attrib["align"]="right" + func_parent=ET.SubElement(div,"td") + else: + div=ET.Element("div") + div.attrib["class"]="method" + ret_parent=div + func_parent=div - mdata["argidx"]=[] - mdata["name"]=m.attrib["name"] - qualifiers="" - if ("qualifiers" in m.attrib): - qualifiers=m.attrib["qualifiers"] + mdata["argidx"]=[] + mdata["name"]=m.attrib["name"] + qualifiers="" + if ("qualifiers" in m.attrib): + qualifiers=m.attrib["qualifiers"] - args=list(m) - for a in args: - if (a.tag=="return"): - idx=-1 - elif (a.tag=="argument"): - idx=int(a.attrib["index"]) - else: - continue + args=list(m) + for a in args: + if (a.tag=="return"): + idx=-1 + elif (a.tag=="argument"): + idx=int(a.attrib["index"]) + else: + continue - mdata["argidx"].append(idx) - mdata[idx]=a + mdata["argidx"].append(idx) + mdata[idx]=a - if (not event): - if (-1 in mdata["argidx"]): - make_type(mdata[-1].attrib["type"],ret_parent) - mdata["argidx"].remove(-1) - else: - make_type("void",ret_parent) + if (not event): + if (-1 in mdata["argidx"]): + make_type(mdata[-1].attrib["type"],ret_parent) + mdata["argidx"].remove(-1) + else: + make_type("void",ret_parent) - span=ET.SubElement(func_parent,"span") - if (declare): - span.attrib["class"]="funcdecl" - a=ET.SubElement(span,"a") - a.attrib["name"]=name+"_"+m.attrib["name"] - a.text=name+"::"+m.attrib["name"] - else: - span.attrib["class"]="identifier funcdef" - a=ET.SubElement(span,"a") - a.attrib["href"]="#"+name+"_"+m.attrib["name"] - a.text=m.attrib["name"] + span=ET.SubElement(func_parent,"span") + if (declare): + span.attrib["class"]="funcdecl" + a=ET.SubElement(span,"a") + a.attrib["name"]=name+"_"+m.attrib["name"] + a.text=name+"::"+m.attrib["name"] + else: + span.attrib["class"]="identifier funcdef" + a=ET.SubElement(span,"a") + a.attrib["href"]="#"+name+"_"+m.attrib["name"] + a.text=m.attrib["name"] - span=ET.SubElement(func_parent,"span") - span.attrib["class"]="symbol" - span.text=" (" + span=ET.SubElement(func_parent,"span") + span.attrib["class"]="symbol" + span.text=" (" - for a in mdata["argidx"]: - arg=mdata[a] - if (a>0): - span=ET.SubElement(func_parent,"span") - span.text=", " - else: - span=ET.SubElement(func_parent,"span") - span.text=" " + for a in mdata["argidx"]: + arg=mdata[a] + if (a>0): + span=ET.SubElement(func_parent,"span") + span.text=", " + else: + span=ET.SubElement(func_parent,"span") + span.text=" " - make_type(arg.attrib["type"],func_parent) + make_type(arg.attrib["type"],func_parent) - span=ET.SubElement(func_parent,"span") - span.text=arg.attrib["name"] - if ("default" in arg.attrib): - span.text=span.text+"="+arg.attrib["default"] + span=ET.SubElement(func_parent,"span") + span.text=arg.attrib["name"] + if ("default" in arg.attrib): + span.text=span.text+"="+arg.attrib["default"] - span=ET.SubElement(func_parent,"span") - span.attrib["class"]="symbol" - if (len(mdata["argidx"])): - span.text=" )" - else: - span.text=")" + span=ET.SubElement(func_parent,"span") + span.attrib["class"]="symbol" + if (len(mdata["argidx"])): + span.text=" )" + else: + span.text=")" - if (qualifiers): - span=ET.SubElement(func_parent,"span") - span.attrib["class"]="qualifier" - span.text=" "+qualifiers + if (qualifiers): + span=ET.SubElement(func_parent,"span") + span.attrib["class"]="qualifier" + span.text=" "+qualifiers - return div + return div def make_html_class(node): - div=ET.Element("div") - div.attrib["class"]="class"; + div=ET.Element("div") + div.attrib["class"]="class"; - a=ET.SubElement(div,"a") - a.attrib["name"]=node.attrib["name"] + a=ET.SubElement(div,"a") + a.attrib["name"]=node.attrib["name"] - h3=ET.SubElement(a,"h3") - h3.attrib["class"]="title class_title" - h3.text=node.attrib["name"] + h3=ET.SubElement(a,"h3") + h3.attrib["class"]="title class_title" + h3.text=node.attrib["name"] - briefd = node.find("brief_description") - if (briefd!=None): - div2=ET.SubElement(div,"div") - div2.attrib["class"]="description class_description" - div2.text=briefd.text + briefd = node.find("brief_description") + if (briefd!=None): + div2=ET.SubElement(div,"div") + div2.attrib["class"]="description class_description" + div2.text=briefd.text - if ("inherits" in node.attrib): - ET.SubElement(div,"br") + if ("inherits" in node.attrib): + ET.SubElement(div,"br") - div2=ET.SubElement(div,"div") - div2.attrib["class"]="inheritance"; + div2=ET.SubElement(div,"div") + div2.attrib["class"]="inheritance"; - span=ET.SubElement(div2,"span") - span.text="Inherits: " + span=ET.SubElement(div2,"span") + span.text="Inherits: " - make_type(node.attrib["inherits"],div2) + make_type(node.attrib["inherits"],div2) - if ("category" in node.attrib): - ET.SubElement(div,"br") + if ("category" in node.attrib): + ET.SubElement(div,"br") - div3=ET.SubElement(div,"div") - div3.attrib["class"]="category"; + div3=ET.SubElement(div,"div") + div3.attrib["class"]="category"; - span=ET.SubElement(div3,"span") - span.attrib["class"]="category" - span.text="Category: " + span=ET.SubElement(div3,"span") + span.attrib["class"]="category" + span.text="Category: " - a = ET.SubElement(div3,"a") - a.attrib["class"]="category_ref" - a.text=node.attrib["category"] - catname=a.text - if (catname.rfind("/")!=-1): - catname=catname[catname.rfind("/"):] - catname="CATEGORY_"+catname + a = ET.SubElement(div3,"a") + a.attrib["class"]="category_ref" + a.text=node.attrib["category"] + catname=a.text + if (catname.rfind("/")!=-1): + catname=catname[catname.rfind("/"):] + catname="CATEGORY_"+catname - if (single_page): - a.attrib["href"]="#"+catname - else: - a.attrib["href"]="category.html#"+catname + if (single_page): + a.attrib["href"]="#"+catname + else: + a.attrib["href"]="category.html#"+catname - methods = node.find("methods") + methods = node.find("methods") - if(methods!=None and len(list(methods))>0): + if(methods!=None and len(list(methods))>0): - h4=ET.SubElement(div,"h4") - h4.text="Public Methods:" + h4=ET.SubElement(div,"h4") + h4.text="Public Methods:" - method_table=ET.SubElement(div,"table") - method_table.attrib["class"]="method_list"; + method_table=ET.SubElement(div,"table") + method_table.attrib["class"]="method_list"; - for m in list(methods): -# li = ET.SubElement(div2, "li") - method_table.append( make_method_def(node.attrib["name"],m,False) ) + for m in list(methods): + #li = ET.SubElement(div2, "li") + method_table.append( make_method_def(node.attrib["name"],m,False) ) - events = node.find("signals") + events = node.find("signals") - if(events!=None and len(list(events))>0): - h4=ET.SubElement(div,"h4") - h4.text="Events:" + if(events!=None and len(list(events))>0): + h4=ET.SubElement(div,"h4") + h4.text="Events:" - event_table=ET.SubElement(div,"table") - event_table.attrib["class"]="method_list"; + event_table=ET.SubElement(div,"table") + event_table.attrib["class"]="method_list"; - for m in list(events): -# li = ET.SubElement(div2, "li") - event_table.append( make_method_def(node.attrib["name"],m,False,True) ) + for m in list(events): + #li = ET.SubElement(div2, "li") + event_table.append( make_method_def(node.attrib["name"],m,False,True) ) - members = node.find("members") - if(members!=None and len(list(members))>0): + members = node.find("members") + if(members!=None and len(list(members))>0): - h4=ET.SubElement(div,"h4") - h4.text="Public Variables:" - div2=ET.SubElement(div,"div") - div2.attrib["class"]="member_list"; + h4=ET.SubElement(div,"h4") + h4.text="Public Variables:" + div2=ET.SubElement(div,"div") + div2.attrib["class"]="member_list"; - for c in list(members): + for c in list(members): - li = ET.SubElement(div2, "li") - div3=ET.SubElement(li,"div") - div3.attrib["class"]="member"; - make_type(c.attrib["type"],div3) - span=ET.SubElement(div3,"span") - span.attrib["class"]="identifier member_name" - span.text=" "+c.attrib["name"]+" " - span=ET.SubElement(div3,"span") - span.attrib["class"]="member_description" - span.text=c.text + li = ET.SubElement(div2, "li") + div3=ET.SubElement(li,"div") + div3.attrib["class"]="member"; + make_type(c.attrib["type"],div3) + span=ET.SubElement(div3,"span") + span.attrib["class"]="identifier member_name" + span.text=" "+c.attrib["name"]+" " + span=ET.SubElement(div3,"span") + span.attrib["class"]="member_description" + span.text=c.text - constants = node.find("constants") - if(constants!=None and len(list(constants))>0): + constants = node.find("constants") + if(constants!=None and len(list(constants))>0): - h4=ET.SubElement(div,"h4") - h4.text="Constants:" - div2=ET.SubElement(div,"div") - div2.attrib["class"]="constant_list"; + h4=ET.SubElement(div,"h4") + h4.text="Constants:" + div2=ET.SubElement(div,"div") + div2.attrib["class"]="constant_list"; - for c in list(constants): - li = ET.SubElement(div2, "li") - div3=ET.SubElement(li,"div") - div3.attrib["class"]="constant"; + for c in list(constants): + li = ET.SubElement(div2, "li") + div3=ET.SubElement(li,"div") + div3.attrib["class"]="constant"; - span=ET.SubElement(div3,"span") - span.attrib["class"]="identifier constant_name" - span.text=c.attrib["name"]+" " - if ("value" in c.attrib): - span=ET.SubElement(div3,"span") - span.attrib["class"]="symbol" - span.text="= " - span=ET.SubElement(div3,"span") - span.attrib["class"]="constant_value" - span.text=c.attrib["value"]+" " - span=ET.SubElement(div3,"span") - span.attrib["class"]="constant_description" - span.text=c.text + span=ET.SubElement(div3,"span") + span.attrib["class"]="identifier constant_name" + span.text=c.attrib["name"]+" " + if ("value" in c.attrib): + span=ET.SubElement(div3,"span") + span.attrib["class"]="symbol" + span.text="= " + span=ET.SubElement(div3,"span") + span.attrib["class"]="constant_value" + span.text=c.attrib["value"]+" " + span=ET.SubElement(div3,"span") + span.attrib["class"]="constant_description" + span.text=c.text # ET.SubElement(div,"br") - descr=node.find("description") - if (descr!=None and descr.text.strip()!=""): - h4=ET.SubElement(div,"h4") - h4.text="Description:" + descr=node.find("description") + if (descr!=None and descr.text.strip()!=""): + h4=ET.SubElement(div,"h4") + h4.text="Description:" - make_text_def(node.attrib["name"],div,descr.text) + make_text_def(node.attrib["name"],div,descr.text) # div2=ET.SubElement(div,"div") # div2.attrib["class"]="description"; # div2.text=descr.text - if(methods!=None or events!=None): + if(methods!=None or events!=None): - h4=ET.SubElement(div,"h4") - h4.text="Method Documentation:" - iter_list = [] - if (methods!=None): - iter_list+=list(methods) - if (events!=None): - iter_list+=list(events) + h4=ET.SubElement(div,"h4") + h4.text="Method Documentation:" + iter_list = [] + if (methods!=None): + iter_list+=list(methods) + if (events!=None): + iter_list+=list(events) - for m in iter_list: + for m in iter_list: - descr=m.find("description") + descr=m.find("description") - if (descr==None or descr.text.strip()==""): - continue; + if (descr==None or descr.text.strip()==""): + continue; - div2=ET.SubElement(div,"div") - div2.attrib["class"]="method_doc"; + div2=ET.SubElement(div,"div") + div2.attrib["class"]="method_doc"; - div2.append( make_method_def(node.attrib["name"],m,True) ) - #anchor = ET.SubElement(div2, "a") - #anchor.attrib["name"] = - make_text_def(node.attrib["name"],div2,descr.text) - #div3=ET.SubElement(div2,"div") - #div3.attrib["class"]="description"; - #div3.text=descr.text + div2.append( make_method_def(node.attrib["name"],m,True) ) + #anchor = ET.SubElement(div2, "a") + #anchor.attrib["name"] = + make_text_def(node.attrib["name"],div2,descr.text) + #div3=ET.SubElement(div2,"div") + #div3.attrib["class"]="description"; + #div3.text=descr.text - return div + return div class_names=[] classes={} for file in input_list: - tree = ET.parse(file) - doc=tree.getroot() + tree = ET.parse(file) + doc=tree.getroot() - if ("version" not in doc.attrib): - print("Version missing from 'doc'") - sys.exit(255) + if ("version" not in doc.attrib): + print("Version missing from 'doc'") + sys.exit(255) - version=doc.attrib["version"] + version=doc.attrib["version"] - for c in list(doc): - if (c.attrib["name"] in class_names): - continue - class_names.append(c.attrib["name"]) - classes[c.attrib["name"]]=c + for c in list(doc): + if (c.attrib["name"] in class_names): + continue + class_names.append(c.attrib["name"]) + classes[c.attrib["name"]]=c html = ET.Element("html") css = ET.SubElement(html, "link") @@ -685,7 +685,7 @@ css.attrib["type"] = "text/css" body = ET.SubElement(html, "body") if (not single_page): - make_html_top(body) + make_html_top(body) @@ -694,27 +694,27 @@ class_names.sort() body.append( make_html_class_list(class_names,5) ) for cn in class_names: - c=classes[cn] - if (single_page): - body.append( make_html_class(c)) - else: - html2 = ET.Element("html") - css = ET.SubElement(html2, "link") - css.attrib["href"] = "main.css" - css.attrib["rel"] = "stylesheet" - css.attrib["type"] = "text/css" - body2 = ET.SubElement(html2, "body" ) - make_html_top(body2) - body2.append( make_html_class(c) ); - make_html_bottom(body2) - et_out = ET.ElementTree(html2) - et_out.write(c.attrib["name"]+".html") + c=classes[cn] + if (single_page): + body.append( make_html_class(c)) + else: + html2 = ET.Element("html") + css = ET.SubElement(html2, "link") + css.attrib["href"] = "main.css" + css.attrib["rel"] = "stylesheet" + css.attrib["type"] = "text/css" + body2 = ET.SubElement(html2, "body" ) + make_html_top(body2) + body2.append( make_html_class(c) ); + make_html_bottom(body2) + et_out = ET.ElementTree(html2) + et_out.write(c.attrib["name"]+".html") et_out = ET.ElementTree(html) if (single_page): - et_out.write("singlepage.html") + et_out.write("singlepage.html") else: - make_html_bottom(body) - et_out.write("alphabetical.html") + make_html_bottom(body) + et_out.write("alphabetical.html") diff --git a/doc/tools/makemd.py b/doc/tools/makemd.py index e0fbe9af03b..2c9d6970397 100644 --- a/doc/tools/makemd.py +++ b/doc/tools/makemd.py @@ -7,17 +7,17 @@ import xml.etree.ElementTree as ET input_list = [] for arg in sys.argv[1:]: - input_list.append(arg) + input_list.append(arg) if len(input_list) < 1: - print 'usage: makemd.py ' - sys.exit(0) + print 'usage: makemd.py ' + sys.exit(0) def validate_tag(elem, tag): - if elem.tag != tag: - print "Tag mismatch, expected '" + tag + "', got " + elem.tag - sys.exit(255) + if elem.tag != tag: + print "Tag mismatch, expected '" + tag + "', got " + elem.tag + sys.exit(255) class_names = [] @@ -26,321 +26,321 @@ classes = {} def make_class_list(class_list, columns): - f = open('class_list.md', 'wb') - prev = 0 - col_max = len(class_list) / columns + 1 - print ('col max is ', col_max) - col_count = 0 - row_count = 0 - last_initial = '' - fit_columns = [] + f = open('class_list.md', 'wb') + prev = 0 + col_max = len(class_list) / columns + 1 + print ('col max is ', col_max) + col_count = 0 + row_count = 0 + last_initial = '' + fit_columns = [] - for n in range(0, columns): - fit_columns += [[]] + for n in range(0, columns): + fit_columns += [[]] - indexers = [] - last_initial = '' + indexers = [] + last_initial = '' - idx = 0 - for n in class_list: - col = idx / col_max - if col >= columns: - col = columns - 1 - fit_columns[col] += [n] - idx += 1 - if n[:1] != last_initial: - indexers += [n] - last_initial = n[:1] + idx = 0 + for n in class_list: + col = idx / col_max + if col >= columns: + col = columns - 1 + fit_columns[col] += [n] + idx += 1 + if n[:1] != last_initial: + indexers += [n] + last_initial = n[:1] - row_max = 0 - f.write("\n") + row_max = 0 + f.write("\n") - for n in range(0, columns): - if len(fit_columns[n]) > row_max: - row_max = len(fit_columns[n]) + for n in range(0, columns): + if len(fit_columns[n]) > row_max: + row_max = len(fit_columns[n]) - f.write("| ") - for n in range(0, columns): - f.write(" | |") + f.write("| ") + for n in range(0, columns): + f.write(" | |") - f.write("\n") - f.write("| ") - for n in range(0, columns): - f.write(" --- | ------- |") - f.write("\n") + f.write("\n") + f.write("| ") + for n in range(0, columns): + f.write(" --- | ------- |") + f.write("\n") - for r in range(0, row_max): - s = '| ' - for c in range(0, columns): - if r >= len(fit_columns[c]): - continue + for r in range(0, row_max): + s = '| ' + for c in range(0, columns): + if r >= len(fit_columns[c]): + continue - classname = fit_columns[c][r] - initial = classname[0] - if classname in indexers: - s += '**' + initial + '** | ' - else: - s += ' | ' + classname = fit_columns[c][r] + initial = classname[0] + if classname in indexers: + s += '**' + initial + '** | ' + else: + s += ' | ' - s += '[' + classname + '](class_'+ classname.lower()+') | ' + s += '[' + classname + '](class_'+ classname.lower()+') | ' - s += '\n' - f.write(s) + s += '\n' + f.write(s) def dokuize_text(txt): - return txt + return txt def dokuize_text(text): - pos = 0 - while True: - pos = text.find('[', pos) - if pos == -1: - break + pos = 0 + while True: + pos = text.find('[', pos) + if pos == -1: + break - endq_pos = text.find(']', pos + 1) - if endq_pos == -1: - break + endq_pos = text.find(']', pos + 1) + if endq_pos == -1: + break - pre_text = text[:pos] - post_text = text[endq_pos + 1:] - tag_text = text[pos + 1:endq_pos] + pre_text = text[:pos] + post_text = text[endq_pos + 1:] + tag_text = text[pos + 1:endq_pos] - if tag_text in class_names: - tag_text = make_type(tag_text) - else: + if tag_text in class_names: + tag_text = make_type(tag_text) + else: - # command + # command - cmd = tag_text - space_pos = tag_text.find(' ') - if cmd.find('html') == 0: - cmd = tag_text[:space_pos] - param = tag_text[space_pos + 1:] - tag_text = '<' + param + '>' - elif cmd.find('method') == 0: - cmd = tag_text[:space_pos] - param = tag_text[space_pos + 1:] + cmd = tag_text + space_pos = tag_text.find(' ') + if cmd.find('html') == 0: + cmd = tag_text[:space_pos] + param = tag_text[space_pos + 1:] + tag_text = '<' + param + '>' + elif cmd.find('method') == 0: + cmd = tag_text[:space_pos] + param = tag_text[space_pos + 1:] - if param.find('.') != -1: - (class_param, method_param) = param.split('.') - tag_text = '['+class_param+'.'+method_param.replace("_","_")+'](' + class_param.lower() + '#' \ - + method_param + ')' - else: - tag_text = '[' + param.replace("_","_") + '](#' + param + ')' - elif cmd.find('image=') == 0: - tag_text = '![](' + cmd[6:] + ')' - elif cmd.find('url=') == 0: - tag_text = '[' + cmd[4:] + ']('+cmd[4:] - elif cmd == '/url': - tag_text = ')' - elif cmd == 'center': - tag_text = '' - elif cmd == '/center': - tag_text = '' - elif cmd == 'br': - tag_text = '\n' - elif cmd == 'i' or cmd == '/i': - tag_text = '_' - elif cmd == 'b' or cmd == '/b': - tag_text = '**' - elif cmd == 'u' or cmd == '/u': - tag_text = '__' - else: - tag_text = '[' + tag_text + ']' + if param.find('.') != -1: + (class_param, method_param) = param.split('.') + tag_text = '['+class_param+'.'+method_param.replace("_","_")+'](' + class_param.lower() + '#' \ + + method_param + ')' + else: + tag_text = '[' + param.replace("_","_") + '](#' + param + ')' + elif cmd.find('image=') == 0: + tag_text = '![](' + cmd[6:] + ')' + elif cmd.find('url=') == 0: + tag_text = '[' + cmd[4:] + ']('+cmd[4:] + elif cmd == '/url': + tag_text = ')' + elif cmd == 'center': + tag_text = '' + elif cmd == '/center': + tag_text = '' + elif cmd == 'br': + tag_text = '\n' + elif cmd == 'i' or cmd == '/i': + tag_text = '_' + elif cmd == 'b' or cmd == '/b': + tag_text = '**' + elif cmd == 'u' or cmd == '/u': + tag_text = '__' + else: + tag_text = '[' + tag_text + ']' - text = pre_text + tag_text + post_text - pos = len(pre_text) + len(tag_text) + text = pre_text + tag_text + post_text + pos = len(pre_text) + len(tag_text) # tnode = ET.SubElement(parent,"div") # tnode.text=text - return text + return text def make_type(t): - global class_names - if t in class_names: - return '[' + t + '](class_' + t.lower() + ')' - return t + global class_names + if t in class_names: + return '[' + t + '](class_' + t.lower() + ')' + return t def make_method( - f, - name, - m, - declare, - event=False, - ): + f, + name, + m, + declare, + event=False, +): - s = ' * ' - ret_type = 'void' - args = list(m) - mdata = {} - mdata['argidx'] = [] - for a in args: - if a.tag == 'return': - idx = -1 - elif a.tag == 'argument': - idx = int(a.attrib['index']) - else: - continue + s = ' * ' + ret_type = 'void' + args = list(m) + mdata = {} + mdata['argidx'] = [] + for a in args: + if a.tag == 'return': + idx = -1 + elif a.tag == 'argument': + idx = int(a.attrib['index']) + else: + continue - mdata['argidx'].append(idx) - mdata[idx] = a + mdata['argidx'].append(idx) + mdata[idx] = a - if not event: - if -1 in mdata['argidx']: - s += make_type(mdata[-1].attrib['type']) - else: - s += 'void' - s += ' ' + if not event: + if -1 in mdata['argidx']: + s += make_type(mdata[-1].attrib['type']) + else: + s += 'void' + s += ' ' - if declare: + if declare: - # span.attrib["class"]="funcdecl" - # a=ET.SubElement(span,"a") - # a.attrib["name"]=name+"_"+m.attrib["name"] - # a.text=name+"::"+m.attrib["name"] + # span.attrib["class"]="funcdecl" + # a=ET.SubElement(span,"a") + # a.attrib["name"]=name+"_"+m.attrib["name"] + # a.text=name+"::"+m.attrib["name"] - s += ' **'+m.attrib['name'].replace("_","_")+'** ' - else: - s += ' **['+ m.attrib['name'].replace("_","_")+'](#' + m.attrib['name'] + ')** ' + s += ' **'+m.attrib['name'].replace("_","_")+'** ' + else: + s += ' **['+ m.attrib['name'].replace("_","_")+'](#' + m.attrib['name'] + ')** ' - s += ' **(**' - argfound = False - for a in mdata['argidx']: - arg = mdata[a] - if a < 0: - continue - if a > 0: - s += ', ' - else: - s += ' ' + s += ' **(**' + argfound = False + for a in mdata['argidx']: + arg = mdata[a] + if a < 0: + continue + if a > 0: + s += ', ' + else: + s += ' ' - s += make_type(arg.attrib['type']) - if 'name' in arg.attrib: - s += ' ' + arg.attrib['name'] - else: - s += ' arg' + str(a) + s += make_type(arg.attrib['type']) + if 'name' in arg.attrib: + s += ' ' + arg.attrib['name'] + else: + s += ' arg' + str(a) - if 'default' in arg.attrib: - s += '=' + arg.attrib['default'] + if 'default' in arg.attrib: + s += '=' + arg.attrib['default'] - argfound = True + argfound = True - if argfound: - s += ' ' - s += ' **)**' + if argfound: + s += ' ' + s += ' **)**' - if 'qualifiers' in m.attrib: - s += ' ' + m.attrib['qualifiers'] + if 'qualifiers' in m.attrib: + s += ' ' + m.attrib['qualifiers'] - f.write(s + '\n') + f.write(s + '\n') def make_doku_class(node): - name = node.attrib['name'] + name = node.attrib['name'] - f = open("class_"+name.lower() + '.md', 'wb') + f = open("class_"+name.lower() + '.md', 'wb') - f.write('# ' + name + ' \n') + f.write('# ' + name + ' \n') - if 'inherits' in node.attrib: - inh = node.attrib['inherits'].strip() - f.write('####**Inherits:** '+make_type(inh)+'\n') - if 'category' in node.attrib: - f.write('####**Category:** ' + node.attrib['category'].strip() - + '\n') + if 'inherits' in node.attrib: + inh = node.attrib['inherits'].strip() + f.write('####**Inherits:** '+make_type(inh)+'\n') + if 'category' in node.attrib: + f.write('####**Category:** ' + node.attrib['category'].strip() + + '\n') - briefd = node.find('brief_description') - if briefd != None: - f.write('\n### Brief Description \n') - f.write(dokuize_text(briefd.text.strip()) + '\n') + briefd = node.find('brief_description') + if briefd != None: + f.write('\n### Brief Description \n') + f.write(dokuize_text(briefd.text.strip()) + '\n') - methods = node.find('methods') + methods = node.find('methods') - if methods != None and len(list(methods)) > 0: - f.write('\n### Member Functions \n') - for m in list(methods): - make_method(f, node.attrib['name'], m, False) + if methods != None and len(list(methods)) > 0: + f.write('\n### Member Functions \n') + for m in list(methods): + make_method(f, node.attrib['name'], m, False) - events = node.find('signals') - if events != None and len(list(events)) > 0: - f.write('\n### Signals \n') - for m in list(events): - make_method(f, node.attrib['name'], m, True, True) + events = node.find('signals') + if events != None and len(list(events)) > 0: + f.write('\n### Signals \n') + for m in list(events): + make_method(f, node.attrib['name'], m, True, True) - members = node.find('members') + members = node.find('members') - if members != None and len(list(members)) > 0: - f.write('\n### Member Variables \n') + if members != None and len(list(members)) > 0: + f.write('\n### Member Variables \n') - for c in list(members): - s = ' * ' - s += make_type(c.attrib['type']) + ' ' - s += '**' + c.attrib['name'] + '**' - if c.text.strip() != '': - s += ' - ' + c.text.strip() - f.write(s + '\n') + for c in list(members): + s = ' * ' + s += make_type(c.attrib['type']) + ' ' + s += '**' + c.attrib['name'] + '**' + if c.text.strip() != '': + s += ' - ' + c.text.strip() + f.write(s + '\n') - constants = node.find('constants') - if constants != None and len(list(constants)) > 0: - f.write('\n### Numeric Constants \n') - for c in list(constants): - s = ' * ' - s += '**' + c.attrib['name'] + '**' - if 'value' in c.attrib: - s += ' = **' + c.attrib['value'] + '**' - if c.text.strip() != '': - s += ' - ' + c.text.strip() - f.write(s + '\n') + constants = node.find('constants') + if constants != None and len(list(constants)) > 0: + f.write('\n### Numeric Constants \n') + for c in list(constants): + s = ' * ' + s += '**' + c.attrib['name'] + '**' + if 'value' in c.attrib: + s += ' = **' + c.attrib['value'] + '**' + if c.text.strip() != '': + s += ' - ' + c.text.strip() + f.write(s + '\n') - descr = node.find('description') - if descr != None and descr.text.strip() != '': - f.write('\n### Description \n') - f.write(dokuize_text(descr.text.strip()) + '\n') + descr = node.find('description') + if descr != None and descr.text.strip() != '': + f.write('\n### Description \n') + f.write(dokuize_text(descr.text.strip()) + '\n') - methods = node.find('methods') + methods = node.find('methods') - if methods != None and len(list(methods)) > 0: - f.write('\n### Member Function Description \n') - for m in list(methods): + if methods != None and len(list(methods)) > 0: + f.write('\n### Member Function Description \n') + for m in list(methods): - d = m.find('description') - if d == None or d.text.strip() == '': - continue - f.write('\n#### ' + m.attrib['name'] + '\n') - make_method(f, node.attrib['name'], m, True) - f.write('\n') - f.write(dokuize_text(d.text.strip())) - f.write('\n') + d = m.find('description') + if d == None or d.text.strip() == '': + continue + f.write('\n#### ' + m.attrib['name'] + '\n') + make_method(f, node.attrib['name'], m, True) + f.write('\n') + f.write(dokuize_text(d.text.strip())) + f.write('\n') for file in input_list: - tree = ET.parse(file) - doc = tree.getroot() + tree = ET.parse(file) + doc = tree.getroot() - if 'version' not in doc.attrib: - print "Version missing from 'doc'" - sys.exit(255) + if 'version' not in doc.attrib: + print "Version missing from 'doc'" + sys.exit(255) - version = doc.attrib['version'] + version = doc.attrib['version'] - for c in list(doc): - if c.attrib['name'] in class_names: - continue - class_names.append(c.attrib['name']) - classes[c.attrib['name']] = c + for c in list(doc): + if c.attrib['name'] in class_names: + continue + class_names.append(c.attrib['name']) + classes[c.attrib['name']] = c class_names.sort() make_class_list(class_names, 2) for cn in class_names: - c = classes[cn] - make_doku_class(c) + c = classes[cn] + make_doku_class(c) diff --git a/doc/tools/makerst.py b/doc/tools/makerst.py index 718cf4a275d..583f9afeaad 100644 --- a/doc/tools/makerst.py +++ b/doc/tools/makerst.py @@ -8,505 +8,505 @@ import xml.etree.ElementTree as ET input_list = [] for arg in sys.argv[1:]: - input_list.append(arg) + input_list.append(arg) if len(input_list) < 1: - print 'usage: makerst.py ' - sys.exit(0) + print 'usage: makerst.py ' + sys.exit(0) def validate_tag(elem, tag): - if elem.tag != tag: - print "Tag mismatch, expected '" + tag + "', got " + elem.tag - sys.exit(255) + if elem.tag != tag: + print "Tag mismatch, expected '" + tag + "', got " + elem.tag + sys.exit(255) class_names = [] classes = {} def ul_string(str,ul): - str+="\n" - for i in range(len(str)-1): - str+=ul - str+="\n" - return str + str+="\n" + for i in range(len(str)-1): + str+=ul + str+="\n" + return str def make_class_list(class_list, columns): - f = codecs.open('class_list.rst', 'wb', 'utf-8') - prev = 0 - col_max = len(class_list) / columns + 1 - print ('col max is ', col_max) - col_count = 0 - row_count = 0 - last_initial = '' - fit_columns = [] + f = codecs.open('class_list.rst', 'wb', 'utf-8') + prev = 0 + col_max = len(class_list) / columns + 1 + print ('col max is ', col_max) + col_count = 0 + row_count = 0 + last_initial = '' + fit_columns = [] - for n in range(0, columns): - fit_columns += [[]] + for n in range(0, columns): + fit_columns += [[]] - indexers = [] - last_initial = '' + indexers = [] + last_initial = '' - idx = 0 - for n in class_list: - col = idx / col_max - if col >= columns: - col = columns - 1 - fit_columns[col] += [n] - idx += 1 - if n[:1] != last_initial: - indexers += [n] - last_initial = n[:1] + idx = 0 + for n in class_list: + col = idx / col_max + if col >= columns: + col = columns - 1 + fit_columns[col] += [n] + idx += 1 + if n[:1] != last_initial: + indexers += [n] + last_initial = n[:1] - row_max = 0 - f.write("\n") + row_max = 0 + f.write("\n") - for n in range(0, columns): - if len(fit_columns[n]) > row_max: - row_max = len(fit_columns[n]) + for n in range(0, columns): + if len(fit_columns[n]) > row_max: + row_max = len(fit_columns[n]) - f.write("| ") - for n in range(0, columns): - f.write(" | |") + f.write("| ") + for n in range(0, columns): + f.write(" | |") - f.write("\n") - f.write("+") - for n in range(0, columns): - f.write("--+-------+") - f.write("\n") + f.write("\n") + f.write("+") + for n in range(0, columns): + f.write("--+-------+") + f.write("\n") - for r in range(0, row_max): - s = '+ ' - for c in range(0, columns): - if r >= len(fit_columns[c]): - continue + for r in range(0, row_max): + s = '+ ' + for c in range(0, columns): + if r >= len(fit_columns[c]): + continue - classname = fit_columns[c][r] - initial = classname[0] - if classname in indexers: - s += '**' + initial + '** | ' - else: - s += ' | ' + classname = fit_columns[c][r] + initial = classname[0] + if classname in indexers: + s += '**' + initial + '** | ' + else: + s += ' | ' - s += '[' + classname + '](class_'+ classname.lower()+') | ' + s += '[' + classname + '](class_'+ classname.lower()+') | ' - s += '\n' - f.write(s) + s += '\n' + f.write(s) - for n in range(0, columns): - f.write("--+-------+") - f.write("\n") + for n in range(0, columns): + f.write("--+-------+") + f.write("\n") def rstize_text(text,cclass): - # Linebreak + tabs in the XML should become two line breaks unless in a "codeblock" - pos = 0 - while True: - pos = text.find('\n', pos) - if pos == -1: - break + # Linebreak + tabs in the XML should become two line breaks unless in a "codeblock" + pos = 0 + while True: + pos = text.find('\n', pos) + if pos == -1: + break - pre_text = text[:pos] - while text[pos+1] == '\t': - pos += 1 - post_text = text[pos+1:] + pre_text = text[:pos] + while text[pos+1] == '\t': + pos += 1 + post_text = text[pos+1:] - # Handle codeblocks - if post_text.startswith("[codeblock]"): - end_pos = post_text.find("[/codeblock]") - if end_pos == -1: - sys.exit("ERROR! [codeblock] without a closing tag!") + # Handle codeblocks + if post_text.startswith("[codeblock]"): + end_pos = post_text.find("[/codeblock]") + if end_pos == -1: + sys.exit("ERROR! [codeblock] without a closing tag!") - code_text = post_text[len("[codeblock]"):end_pos] - post_text = post_text[end_pos:] + code_text = post_text[len("[codeblock]"):end_pos] + post_text = post_text[end_pos:] - # Remove extraneous tabs - code_pos = 0 - while True: - code_pos = code_text.find('\n', code_pos) - if code_pos == -1: - break + # Remove extraneous tabs + code_pos = 0 + while True: + code_pos = code_text.find('\n', code_pos) + if code_pos == -1: + break - to_skip = 0 - while code_pos+to_skip+1 < len(code_text) and code_text[code_pos+to_skip+1] == '\t': - to_skip += 1 + to_skip = 0 + while code_pos+to_skip+1 < len(code_text) and code_text[code_pos+to_skip+1] == '\t': + to_skip += 1 - if len(code_text[code_pos+to_skip+1:])==0: - code_text = code_text[:code_pos] + "\n" - code_pos += 1 - else: - code_text = code_text[:code_pos] + "\n " + code_text[code_pos+to_skip+1:] - code_pos += 5 - to_skip + if len(code_text[code_pos+to_skip+1:])==0: + code_text = code_text[:code_pos] + "\n" + code_pos += 1 + else: + code_text = code_text[:code_pos] + "\n " + code_text[code_pos+to_skip+1:] + code_pos += 5 - to_skip - text = pre_text + "\n[codeblock]" + code_text + post_text - pos += len("\n[codeblock]" + code_text) + text = pre_text + "\n[codeblock]" + code_text + post_text + pos += len("\n[codeblock]" + code_text) - # Handle normal text - else: - text = pre_text + "\n\n" + post_text - pos += 2 + # Handle normal text + else: + text = pre_text + "\n\n" + post_text + pos += 2 - # Escape * character to avoid interpreting it as emphasis - pos = 0 - while True: - pos = text.find('*', pos) - if pos == -1: - break - text = text[:pos] + "\*" + text[pos + 1:] - pos += 2 + # Escape * character to avoid interpreting it as emphasis + pos = 0 + while True: + pos = text.find('*', pos) + if pos == -1: + break + text = text[:pos] + "\*" + text[pos + 1:] + pos += 2 - # Escape _ character at the end of a word to avoid interpreting it as an inline hyperlink - pos = 0 - while True: - pos = text.find('_', pos) - if pos == -1: - break - if not text[pos + 1].isalnum(): # don't escape within a snake_case word - text = text[:pos] + "\_" + text[pos + 1:] - pos += 2 - else: - pos += 1 + # Escape _ character at the end of a word to avoid interpreting it as an inline hyperlink + pos = 0 + while True: + pos = text.find('_', pos) + if pos == -1: + break + if not text[pos + 1].isalnum(): # don't escape within a snake_case word + text = text[:pos] + "\_" + text[pos + 1:] + pos += 2 + else: + pos += 1 - # Handle [tags] - pos = 0 - while True: - pos = text.find('[', pos) - if pos == -1: - break + # Handle [tags] + pos = 0 + while True: + pos = text.find('[', pos) + if pos == -1: + break - endq_pos = text.find(']', pos + 1) - if endq_pos == -1: - break + endq_pos = text.find(']', pos + 1) + if endq_pos == -1: + break - pre_text = text[:pos] - post_text = text[endq_pos + 1:] - tag_text = text[pos + 1:endq_pos] + pre_text = text[:pos] + post_text = text[endq_pos + 1:] + tag_text = text[pos + 1:endq_pos] - if tag_text in class_names: - tag_text = make_type(tag_text) - else: # command - cmd = tag_text - space_pos = tag_text.find(' ') - if cmd.find('html') == 0: - cmd = tag_text[:space_pos] - param = tag_text[space_pos + 1:] - tag_text = param - elif cmd.find('method') == 0: - cmd = tag_text[:space_pos] - param = tag_text[space_pos + 1:] + if tag_text in class_names: + tag_text = make_type(tag_text) + else: # command + cmd = tag_text + space_pos = tag_text.find(' ') + if cmd.find('html') == 0: + cmd = tag_text[:space_pos] + param = tag_text[space_pos + 1:] + tag_text = param + elif cmd.find('method') == 0: + cmd = tag_text[:space_pos] + param = tag_text[space_pos + 1:] - if param.find('.') != -1: - (class_param, method_param) = param.split('.') - tag_text = ':ref:`'+class_param+'.'+method_param+'`' - else: - tag_text = ':ref:`' + param + '`' - elif cmd.find('image=') == 0: - tag_text = "" #'![](' + cmd[6:] + ')' - elif cmd.find('url=') == 0: - tag_text = ':ref:`' + cmd[4:] + '<'+cmd[4:]+">`" - elif cmd == '/url': - tag_text = ')' - elif cmd == 'center': - tag_text = '' - elif cmd == '/center': - tag_text = '' - elif cmd == 'codeblock': - tag_text = '\n::\n' - elif cmd == '/codeblock': - tag_text = '' - # Strip newline if the tag was alone on one - if pre_text[-1] == '\n': - pre_text = pre_text[:-1] - elif cmd == 'br': - # Make a new paragraph instead of a linebreak, rst is not so linebreak friendly - tag_text = '\n\n' - # Strip potential leading spaces - while post_text[0] == ' ': - post_text = post_text[1:] - elif cmd == 'i' or cmd == '/i': - tag_text = '*' - elif cmd == 'b' or cmd == '/b': - tag_text = '**' - elif cmd == 'u' or cmd == '/u': - tag_text = '' - elif cmd == 'code' or cmd == '/code': - tag_text = '``' - else: - tag_text = ':ref:`' + tag_text + '`' + if param.find('.') != -1: + (class_param, method_param) = param.split('.') + tag_text = ':ref:`'+class_param+'.'+method_param+'`' + else: + tag_text = ':ref:`' + param + '`' + elif cmd.find('image=') == 0: + tag_text = "" #'![](' + cmd[6:] + ')' + elif cmd.find('url=') == 0: + tag_text = ':ref:`' + cmd[4:] + '<'+cmd[4:]+">`" + elif cmd == '/url': + tag_text = ')' + elif cmd == 'center': + tag_text = '' + elif cmd == '/center': + tag_text = '' + elif cmd == 'codeblock': + tag_text = '\n::\n' + elif cmd == '/codeblock': + tag_text = '' + # Strip newline if the tag was alone on one + if pre_text[-1] == '\n': + pre_text = pre_text[:-1] + elif cmd == 'br': + # Make a new paragraph instead of a linebreak, rst is not so linebreak friendly + tag_text = '\n\n' + # Strip potential leading spaces + while post_text[0] == ' ': + post_text = post_text[1:] + elif cmd == 'i' or cmd == '/i': + tag_text = '*' + elif cmd == 'b' or cmd == '/b': + tag_text = '**' + elif cmd == 'u' or cmd == '/u': + tag_text = '' + elif cmd == 'code' or cmd == '/code': + tag_text = '``' + else: + tag_text = ':ref:`' + tag_text + '`' - text = pre_text + tag_text + post_text - pos = len(pre_text) + len(tag_text) + text = pre_text + tag_text + post_text + pos = len(pre_text) + len(tag_text) # tnode = ET.SubElement(parent,"div") # tnode.text=text - return text + return text def make_type(t): - global class_names - if t in class_names: - return ':ref:`'+t+'`' - return t + global class_names + if t in class_names: + return ':ref:`'+t+'`' + return t def make_method( - f, - name, - m, - declare, - cname, - event=False, - pp=None - ): + f, + name, + m, + declare, + cname, + event=False, + pp=None +): - if (declare or pp==None): - t = '- ' - else: - t = "" + if (declare or pp==None): + t = '- ' + else: + t = "" - ret_type = 'void' - args = list(m) - mdata = {} - mdata['argidx'] = [] - for a in args: - if a.tag == 'return': - idx = -1 - elif a.tag == 'argument': - idx = int(a.attrib['index']) - else: - continue + ret_type = 'void' + args = list(m) + mdata = {} + mdata['argidx'] = [] + for a in args: + if a.tag == 'return': + idx = -1 + elif a.tag == 'argument': + idx = int(a.attrib['index']) + else: + continue - mdata['argidx'].append(idx) - mdata[idx] = a + mdata['argidx'].append(idx) + mdata[idx] = a - if not event: - if -1 in mdata['argidx']: - t += make_type(mdata[-1].attrib['type']) - else: - t += 'void' - t += ' ' + if not event: + if -1 in mdata['argidx']: + t += make_type(mdata[-1].attrib['type']) + else: + t += 'void' + t += ' ' - if declare or pp==None: + if declare or pp==None: - # span.attrib["class"]="funcdecl" - # a=ET.SubElement(span,"a") - # a.attrib["name"]=name+"_"+m.attrib["name"] - # a.text=name+"::"+m.attrib["name"] + # span.attrib["class"]="funcdecl" + # a=ET.SubElement(span,"a") + # a.attrib["name"]=name+"_"+m.attrib["name"] + # a.text=name+"::"+m.attrib["name"] - s = ' **'+m.attrib['name']+'** ' - else: - s = ':ref:`'+ m.attrib['name']+'` ' + s = ' **'+m.attrib['name']+'** ' + else: + s = ':ref:`'+ m.attrib['name']+'` ' - s += ' **(**' - argfound = False - for a in mdata['argidx']: - arg = mdata[a] - if a < 0: - continue - if a > 0: - s += ', ' - else: - s += ' ' + s += ' **(**' + argfound = False + for a in mdata['argidx']: + arg = mdata[a] + if a < 0: + continue + if a > 0: + s += ', ' + else: + s += ' ' - s += make_type(arg.attrib['type']) - if 'name' in arg.attrib: - s += ' ' + arg.attrib['name'] - else: - s += ' arg' + str(a) + s += make_type(arg.attrib['type']) + if 'name' in arg.attrib: + s += ' ' + arg.attrib['name'] + else: + s += ' arg' + str(a) - if 'default' in arg.attrib: - s += '=' + arg.attrib['default'] + if 'default' in arg.attrib: + s += '=' + arg.attrib['default'] - argfound = True + argfound = True - if argfound: - s += ' ' - s += ' **)**' + if argfound: + s += ' ' + s += ' **)**' - if 'qualifiers' in m.attrib: - s += ' ' + m.attrib['qualifiers'] + if 'qualifiers' in m.attrib: + s += ' ' + m.attrib['qualifiers'] # f.write(s) - if (not declare): - if (pp!=None): - pp.append( (t,s) ) - else: - f.write("- "+t+" "+s+"\n") - else: - f.write(t+s+"\n") + if (not declare): + if (pp!=None): + pp.append( (t,s) ) + else: + f.write("- "+t+" "+s+"\n") + else: + f.write(t+s+"\n") def make_heading(title, underline): - return title + '\n' + underline*len(title) + "\n\n" + return title + '\n' + underline*len(title) + "\n\n" def make_rst_class(node): - name = node.attrib['name'] + name = node.attrib['name'] - f = codecs.open("class_"+name.lower() + '.rst', 'wb', 'utf-8') + f = codecs.open("class_"+name.lower() + '.rst', 'wb', 'utf-8') - # Warn contributors not to edit this file directly - f.write(".. Generated automatically by doc/tools/makerst.py in Godot's source tree.\n") - f.write(".. DO NOT EDIT THIS FILE, but the doc/base/classes.xml source instead.\n\n") + # Warn contributors not to edit this file directly + f.write(".. Generated automatically by doc/tools/makerst.py in Godot's source tree.\n") + f.write(".. DO NOT EDIT THIS FILE, but the doc/base/classes.xml source instead.\n\n") - f.write(".. _class_"+name+":\n\n") - f.write(make_heading(name, '=')) + f.write(".. _class_"+name+":\n\n") + f.write(make_heading(name, '=')) - if 'inherits' in node.attrib: - inh = node.attrib['inherits'].strip() + if 'inherits' in node.attrib: + inh = node.attrib['inherits'].strip() # whle inh in classes[cn] - f.write('**Inherits:** ') - first=True - while(inh in classes): - if (not first): - f.write(" **<** ") - else: - first=False + f.write('**Inherits:** ') + first=True + while(inh in classes): + if (not first): + f.write(" **<** ") + else: + first=False - f.write(make_type(inh)) - inode = classes[inh] - if ('inherits' in inode.attrib): - inh=inode.attrib['inherits'].strip() - else: - inh=None + f.write(make_type(inh)) + inode = classes[inh] + if ('inherits' in inode.attrib): + inh=inode.attrib['inherits'].strip() + else: + inh=None - f.write("\n\n") + f.write("\n\n") - inherited=[] - for cn in classes: - c=classes[cn] - if 'inherits' in c.attrib: - if (c.attrib['inherits'].strip()==name): - inherited.append(c.attrib['name']) + inherited=[] + for cn in classes: + c=classes[cn] + if 'inherits' in c.attrib: + if (c.attrib['inherits'].strip()==name): + inherited.append(c.attrib['name']) - if (len(inherited)): - f.write('**Inherited By:** ') - for i in range(len(inherited)): - if (i>0): - f.write(", ") - f.write(make_type(inherited[i])) - f.write("\n\n") - if 'category' in node.attrib: - f.write('**Category:** ' + node.attrib['category'].strip() + "\n\n") + if (len(inherited)): + f.write('**Inherited By:** ') + for i in range(len(inherited)): + if (i>0): + f.write(", ") + f.write(make_type(inherited[i])) + f.write("\n\n") + if 'category' in node.attrib: + f.write('**Category:** ' + node.attrib['category'].strip() + "\n\n") - f.write(make_heading('Brief Description', '-')) - briefd = node.find('brief_description') - if briefd != None: - f.write(rstize_text(briefd.text.strip(),name) + "\n\n") + f.write(make_heading('Brief Description', '-')) + briefd = node.find('brief_description') + if briefd != None: + f.write(rstize_text(briefd.text.strip(),name) + "\n\n") - methods = node.find('methods') + methods = node.find('methods') - if methods != None and len(list(methods)) > 0: - f.write(make_heading('Member Functions', '-')) - ml=[] - for m in list(methods): - make_method(f, node.attrib['name'], m, False,name,False,ml) - longest_t = 0 - longest_s = 0 - for s in ml: - sl = len(s[0]) - if (sl>longest_s): - longest_s=sl - tl = len(s[1]) - if (tl>longest_t): - longest_t=tl + if methods != None and len(list(methods)) > 0: + f.write(make_heading('Member Functions', '-')) + ml=[] + for m in list(methods): + make_method(f, node.attrib['name'], m, False,name,False,ml) + longest_t = 0 + longest_s = 0 + for s in ml: + sl = len(s[0]) + if (sl>longest_s): + longest_s=sl + tl = len(s[1]) + if (tl>longest_t): + longest_t=tl - sep="+" - for i in range(longest_s+2): - sep+="-" - sep+="+" - for i in range(longest_t+2): - sep+="-" - sep+="+\n" - f.write(sep) - for s in ml: - rt = s[0] - while( len(rt) < longest_s ): - rt+=" " - st = s[1] - while( len(st) < longest_t ): - st+=" " - f.write("| "+rt+" | "+st+" |\n") - f.write(sep) - f.write('\n') + sep="+" + for i in range(longest_s+2): + sep+="-" + sep+="+" + for i in range(longest_t+2): + sep+="-" + sep+="+\n" + f.write(sep) + for s in ml: + rt = s[0] + while( len(rt) < longest_s ): + rt+=" " + st = s[1] + while( len(st) < longest_t ): + st+=" " + f.write("| "+rt+" | "+st+" |\n") + f.write(sep) + f.write('\n') - events = node.find('signals') - if events != None and len(list(events)) > 0: - f.write(make_heading('Signals', '-')) - for m in list(events): - make_method(f, node.attrib['name'], m, True,name, True) - f.write('\n') + events = node.find('signals') + if events != None and len(list(events)) > 0: + f.write(make_heading('Signals', '-')) + for m in list(events): + make_method(f, node.attrib['name'], m, True,name, True) + f.write('\n') - members = node.find('members') - if members != None and len(list(members)) > 0: - f.write(make_heading('Member Variables', '-')) + members = node.find('members') + if members != None and len(list(members)) > 0: + f.write(make_heading('Member Variables', '-')) - for c in list(members): - s = '- ' - s += make_type(c.attrib['type']) + ' ' - s += '**' + c.attrib['name'] + '**' - if c.text.strip() != '': - s += ' - ' + c.text.strip() - f.write(s + '\n') - f.write('\n') + for c in list(members): + s = '- ' + s += make_type(c.attrib['type']) + ' ' + s += '**' + c.attrib['name'] + '**' + if c.text.strip() != '': + s += ' - ' + c.text.strip() + f.write(s + '\n') + f.write('\n') - constants = node.find('constants') - if constants != None and len(list(constants)) > 0: - f.write(make_heading('Numeric Constants', '-')) - for c in list(constants): - s = '- ' - s += '**' + c.attrib['name'] + '**' - if 'value' in c.attrib: - s += ' = **' + c.attrib['value'] + '**' - if c.text.strip() != '': - s += ' --- ' + rstize_text(c.text.strip(),name) - f.write(s + '\n') - f.write('\n') + constants = node.find('constants') + if constants != None and len(list(constants)) > 0: + f.write(make_heading('Numeric Constants', '-')) + for c in list(constants): + s = '- ' + s += '**' + c.attrib['name'] + '**' + if 'value' in c.attrib: + s += ' = **' + c.attrib['value'] + '**' + if c.text.strip() != '': + s += ' --- ' + rstize_text(c.text.strip(),name) + f.write(s + '\n') + f.write('\n') - descr = node.find('description') - if descr != None and descr.text.strip() != '': - f.write(make_heading('Description', '-')) - f.write(rstize_text(descr.text.strip(),name) + "\n\n") + descr = node.find('description') + if descr != None and descr.text.strip() != '': + f.write(make_heading('Description', '-')) + f.write(rstize_text(descr.text.strip(),name) + "\n\n") - methods = node.find('methods') - if methods != None and len(list(methods)) > 0: - f.write(make_heading('Member Function Description', '-')) - for m in list(methods): - f.write(".. _class_"+name+"_"+m.attrib['name']+":\n\n") + methods = node.find('methods') + if methods != None and len(list(methods)) > 0: + f.write(make_heading('Member Function Description', '-')) + for m in list(methods): + f.write(".. _class_"+name+"_"+m.attrib['name']+":\n\n") # f.write(ul_string(m.attrib['name'],"^")) - #f.write('\n' + m.attrib['name'] + '\n------\n') - make_method(f, node.attrib['name'], m, True,name) - f.write('\n') - d = m.find('description') - if d == None or d.text.strip() == '': - continue - f.write(rstize_text(d.text.strip(),name)) - f.write("\n\n") - f.write('\n') + #f.write('\n' + m.attrib['name'] + '\n------\n') + make_method(f, node.attrib['name'], m, True,name) + f.write('\n') + d = m.find('description') + if d == None or d.text.strip() == '': + continue + f.write(rstize_text(d.text.strip(),name)) + f.write("\n\n") + f.write('\n') for file in input_list: - tree = ET.parse(file) - doc = tree.getroot() + tree = ET.parse(file) + doc = tree.getroot() - if 'version' not in doc.attrib: - print "Version missing from 'doc'" - sys.exit(255) + if 'version' not in doc.attrib: + print "Version missing from 'doc'" + sys.exit(255) - version = doc.attrib['version'] + version = doc.attrib['version'] - for c in list(doc): - if c.attrib['name'] in class_names: - continue - class_names.append(c.attrib['name']) - classes[c.attrib['name']] = c + for c in list(doc): + if c.attrib['name'] in class_names: + continue + class_names.append(c.attrib['name']) + classes[c.attrib['name']] = c class_names.sort() @@ -514,6 +514,6 @@ class_names.sort() #make_class_list(class_names, 2) for cn in class_names: - c = classes[cn] - make_rst_class(c) + c = classes[cn] + make_rst_class(c) diff --git a/drivers/SCsub b/drivers/SCsub index 8571b0c4ec6..27871bb3370 100644 --- a/drivers/SCsub +++ b/drivers/SCsub @@ -5,7 +5,7 @@ Import('env') env.drivers_sources=[] if ("builtin_zlib" in env and env["builtin_zlib"] == "yes"): - SConscript("zlib/SCsub"); + SConscript("zlib/SCsub"); # OS drivers SConscript('unix/SCsub'); @@ -15,9 +15,9 @@ SConscript('windows/SCsub'); SConscript('alsa/SCsub'); SConscript('pulseaudio/SCsub'); if (env["platform"] == "windows"): - SConscript("rtaudio/SCsub"); + SConscript("rtaudio/SCsub"); if (env["xaudio2"] == "yes"): - SConscript("xaudio2/SCsub"); + SConscript("xaudio2/SCsub"); # Graphics drivers SConscript('gles2/SCsub'); @@ -29,14 +29,14 @@ SConscript("png/SCsub"); # Tools override # FIXME: Should likely be integrated in the tools/ codebase if (env["tools"]=="yes"): - SConscript("convex_decomp/SCsub"); + SConscript("convex_decomp/SCsub"); if env['vsproj']=="yes": - env.AddToVSProject(env.drivers_sources) + env.AddToVSProject(env.drivers_sources) if env.split_drivers: - env.split_lib("drivers") + env.split_lib("drivers") else: - env.add_source_files(env.drivers_sources,"*.cpp") - lib = env.Library("drivers",env.drivers_sources) - env.Prepend(LIBS=[lib]) + env.add_source_files(env.drivers_sources,"*.cpp") + lib = env.Library("drivers",env.drivers_sources) + env.Prepend(LIBS=[lib]) diff --git a/drivers/gl_context/SCsub b/drivers/gl_context/SCsub index cf37e9fe36d..acdc3d52e85 100644 --- a/drivers/gl_context/SCsub +++ b/drivers/gl_context/SCsub @@ -3,19 +3,19 @@ Import('env') if (env["platform"] in ["haiku","osx","windows","x11"]): - # Thirdparty source files - if (env["glew"] != "system"): # builtin - thirdparty_dir = "#thirdparty/glew/" - thirdparty_sources = [ - "glew.c", - ] - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + # Thirdparty source files + if (env["glew"] != "system"): # builtin + thirdparty_dir = "#thirdparty/glew/" + thirdparty_sources = [ + "glew.c", + ] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - env.add_source_files(env.drivers_sources, thirdparty_sources) - env.Append(CPPFLAGS = ['-DGLEW_STATIC']) - env.Append(CPPPATH = [thirdparty_dir]) + env.add_source_files(env.drivers_sources, thirdparty_sources) + env.Append(CPPFLAGS = ['-DGLEW_STATIC']) + env.Append(CPPPATH = [thirdparty_dir]) - env.Append(CPPFLAGS = ['-DGLEW_ENABLED']) + env.Append(CPPFLAGS = ['-DGLEW_ENABLED']) # Godot source files env.add_source_files(env.drivers_sources, "*.cpp") diff --git a/drivers/gles2/shaders/SCsub b/drivers/gles2/shaders/SCsub index 1de56043210..26b6f7f06a0 100644 --- a/drivers/gles2/shaders/SCsub +++ b/drivers/gles2/shaders/SCsub @@ -3,10 +3,10 @@ Import('env') if env['BUILDERS'].has_key('GLSL120GLES'): - env.GLSL120GLES('material.glsl'); - env.GLSL120GLES('canvas.glsl'); - env.GLSL120GLES('canvas_shadow.glsl'); - env.GLSL120GLES('blur.glsl'); - env.GLSL120GLES('copy.glsl'); + env.GLSL120GLES('material.glsl'); + env.GLSL120GLES('canvas.glsl'); + env.GLSL120GLES('canvas_shadow.glsl'); + env.GLSL120GLES('blur.glsl'); + env.GLSL120GLES('copy.glsl'); Export('env') diff --git a/drivers/png/SCsub b/drivers/png/SCsub index 04cb70e1c18..6eabd8901dc 100644 --- a/drivers/png/SCsub +++ b/drivers/png/SCsub @@ -6,43 +6,43 @@ env_png = env.Clone() # Thirdparty source files if (env["libpng"] == "builtin"): - thirdparty_dir = "#thirdparty/libpng/" - thirdparty_sources = [ - "png.c", - "pngerror.c", - "pngget.c", - "pngmem.c", - "pngpread.c", - "pngread.c", - "pngrio.c", - "pngrtran.c", - "pngrutil.c", - "pngset.c", - "pngtrans.c", - "pngwio.c", - "pngwrite.c", - "pngwtran.c", - "pngwutil.c", - ] - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + thirdparty_dir = "#thirdparty/libpng/" + thirdparty_sources = [ + "png.c", + "pngerror.c", + "pngget.c", + "pngmem.c", + "pngpread.c", + "pngread.c", + "pngrio.c", + "pngrtran.c", + "pngrutil.c", + "pngset.c", + "pngtrans.c", + "pngwio.c", + "pngwrite.c", + "pngwtran.c", + "pngwutil.c", + ] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - env_png.add_source_files(env.drivers_sources, thirdparty_sources) - env_png.Append(CPPPATH = [thirdparty_dir]) + env_png.add_source_files(env.drivers_sources, thirdparty_sources) + env_png.Append(CPPPATH = [thirdparty_dir]) - # Currently .ASM filter_neon.S does not compile on NT. - import os - if ("neon_enabled" in env and env["neon_enabled"]) and os.name!="nt": - env_png.Append(CPPFLAGS = ["-DPNG_ARM_NEON_OPT=2"]) - env_neon = env_png.Clone(); - if "S_compiler" in env: - env_neon['CC'] = env['S_compiler'] - neon_sources = [] - neon_sources.append(env_neon.Object(thirdparty_dir + "/arm/arm_init.c")) - neon_sources.append(env_neon.Object(thirdparty_dir + "/arm/filter_neon_intrinsics.c")) - neon_sources.append(env_neon.Object(thirdparty_dir + "/arm/filter_neon.S")) - env.drivers_sources += neon_sources - else: - env_png.Append(CPPFLAGS = ["-DPNG_ARM_NEON_OPT=0"]) + # Currently .ASM filter_neon.S does not compile on NT. + import os + if ("neon_enabled" in env and env["neon_enabled"]) and os.name!="nt": + env_png.Append(CPPFLAGS = ["-DPNG_ARM_NEON_OPT=2"]) + env_neon = env_png.Clone(); + if "S_compiler" in env: + env_neon['CC'] = env['S_compiler'] + neon_sources = [] + neon_sources.append(env_neon.Object(thirdparty_dir + "/arm/arm_init.c")) + neon_sources.append(env_neon.Object(thirdparty_dir + "/arm/filter_neon_intrinsics.c")) + neon_sources.append(env_neon.Object(thirdparty_dir + "/arm/filter_neon.S")) + env.drivers_sources += neon_sources + else: + env_png.Append(CPPFLAGS = ["-DPNG_ARM_NEON_OPT=0"]) # Godot source files env_png.add_source_files(env.drivers_sources, "*.cpp") diff --git a/drivers/rtaudio/SCsub b/drivers/rtaudio/SCsub index f0273dd421f..704c4b6740b 100644 --- a/drivers/rtaudio/SCsub +++ b/drivers/rtaudio/SCsub @@ -7,7 +7,7 @@ Import('env') # Thirdparty source files thirdparty_dir = "#thirdparty/rtaudio/" thirdparty_sources = [ - "RtAudio.cpp", + "RtAudio.cpp", ] thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] diff --git a/drivers/zlib/SCsub b/drivers/zlib/SCsub index 6a099aff525..928128e40ca 100644 --- a/drivers/zlib/SCsub +++ b/drivers/zlib/SCsub @@ -8,17 +8,17 @@ Import('env') # No check here as already done in drivers/SCsub thirdparty_dir = "#thirdparty/zlib/" thirdparty_sources = [ - "adler32.c", - "compress.c", - "crc32.c", - "deflate.c", - "infback.c", - "inffast.c", - "inflate.c", - "inftrees.c", - "trees.c", - "uncompr.c", - "zutil.c", + "adler32.c", + "compress.c", + "crc32.c", + "deflate.c", + "infback.c", + "inffast.c", + "inflate.c", + "inftrees.c", + "trees.c", + "uncompr.c", + "zutil.c", ] thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] diff --git a/methods.py b/methods.py index efe2400c1c8..0ede914aec2 100755 --- a/methods.py +++ b/methods.py @@ -1,258 +1,258 @@ import os def add_source_files(self, sources, filetype, lib_env = None, shared = False): - import glob; - import string; - #if not lib_objects: - if not lib_env: - lib_env = self - if type(filetype) == type(""): + import glob; + import string; + #if not lib_objects: + if not lib_env: + lib_env = self + if type(filetype) == type(""): - dir = self.Dir('.').abspath - list = glob.glob(dir + "/"+filetype) - for f in list: - sources.append( self.Object(f) ) - else: - for f in filetype: - sources.append(self.Object(f)) + dir = self.Dir('.').abspath + list = glob.glob(dir + "/"+filetype) + for f in list: + sources.append( self.Object(f) ) + else: + for f in filetype: + sources.append(self.Object(f)) def build_shader_header( target, source, env ): - for x in source: - print x + for x in source: + print x - name = str(x) - name = name[ name.rfind("/")+1: ] - name = name[ name.rfind("\\")+1: ] - name = name.replace(".","_") + name = str(x) + name = name[ name.rfind("/")+1: ] + name = name[ name.rfind("\\")+1: ] + name = name.replace(".","_") - fs = open(str(x),"r") - fd = open(str(x)+".h","w") - fd.write("/* this file has been generated by SCons, do not edit! */\n") - fd.write("static const char *"+name+"=\n") - line=fs.readline() - while(line): - line=line.replace("\r","") - line=line.replace("\n","") - line=line.replace("\\","\\\\") - line=line.replace("\"","\\\"") - fd.write("\""+line+"\\n\"\n") - line=fs.readline() + fs = open(str(x),"r") + fd = open(str(x)+".h","w") + fd.write("/* this file has been generated by SCons, do not edit! */\n") + fd.write("static const char *"+name+"=\n") + line=fs.readline() + while(line): + line=line.replace("\r","") + line=line.replace("\n","") + line=line.replace("\\","\\\\") + line=line.replace("\"","\\\"") + fd.write("\""+line+"\\n\"\n") + line=fs.readline() - fd.write(";\n") + fd.write(";\n") - return 0 + return 0 def build_glsl_header( filename ): - fs = open(filename,"r") - line=fs.readline() + fs = open(filename,"r") + line=fs.readline() - vertex_lines=[] - fragment_lines=[] - uniforms=[] - attributes=[] - fbos=[] - conditionals=[] - texunits=[] - texunit_names=[] - ubos=[] - ubo_names=[] + vertex_lines=[] + fragment_lines=[] + uniforms=[] + attributes=[] + fbos=[] + conditionals=[] + texunits=[] + texunit_names=[] + ubos=[] + ubo_names=[] - reading="" - line_offset=0 - vertex_offset=0 - fragment_offset=0 + reading="" + line_offset=0 + vertex_offset=0 + fragment_offset=0 - while(line): + while(line): - if (line.find("[vertex]")!=-1): - reading="vertex" - line=fs.readline() - line_offset+=1 - vertex_offset=line_offset - continue + if (line.find("[vertex]")!=-1): + reading="vertex" + line=fs.readline() + line_offset+=1 + vertex_offset=line_offset + continue - if (line.find("[fragment]")!=-1): - reading="fragment" - line=fs.readline() - line_offset+=1 - fragment_offset=line_offset - continue + if (line.find("[fragment]")!=-1): + reading="fragment" + line=fs.readline() + line_offset+=1 + fragment_offset=line_offset + continue - if (line.find("#ifdef ")!=-1): - ifdefline = line.replace("#ifdef ","").strip() - if (not ifdefline in conditionals): - conditionals+=[ifdefline] + if (line.find("#ifdef ")!=-1): + ifdefline = line.replace("#ifdef ","").strip() + if (not ifdefline in conditionals): + conditionals+=[ifdefline] - if (line.find("#elif defined(")!=-1): - ifdefline = line.replace("#elif defined(","").strip() - ifdefline = ifdefline.replace(")","").strip() - if (not ifdefline in conditionals): - conditionals+=[ifdefline] + if (line.find("#elif defined(")!=-1): + ifdefline = line.replace("#elif defined(","").strip() + ifdefline = ifdefline.replace(")","").strip() + if (not ifdefline in conditionals): + conditionals+=[ifdefline] - import re - if re.search(r"^\s*uniform", line): + import re + if re.search(r"^\s*uniform", line): - if (line.lower().find("texunit:")!=-1): - #texture unit - texunit = str(int( line[line.find(":")+1:].strip() )) - uline=line[:line.lower().find("//")] - uline = uline.replace("uniform",""); - uline = uline.replace(";",""); - lines = uline.split(",") - for x in lines: + if (line.lower().find("texunit:")!=-1): + #texture unit + texunit = str(int( line[line.find(":")+1:].strip() )) + uline=line[:line.lower().find("//")] + uline = uline.replace("uniform",""); + uline = uline.replace(";",""); + lines = uline.split(",") + for x in lines: - x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x.strip() + x = x[ x.rfind(" ")+1: ] + if (x.find("[")!=-1): + #unfiorm array + x = x[ :x.find("[") ] - if (not x in texunit_names): - texunits+=[(x,texunit)] - texunit_names+=[x] + if (not x in texunit_names): + texunits+=[(x,texunit)] + texunit_names+=[x] - elif (line.lower().find("ubo:")!=-1): - #ubo - uboidx = str(int( line[line.find(":")+1:].strip() )) - uline=line[:line.lower().find("//")] - uline = uline[uline.find("uniform")+len("uniform"):]; - uline = uline.replace(";",""); - uline = uline.replace("{",""); - lines = uline.split(",") - for x in lines: + elif (line.lower().find("ubo:")!=-1): + #ubo + uboidx = str(int( line[line.find(":")+1:].strip() )) + uline=line[:line.lower().find("//")] + uline = uline[uline.find("uniform")+len("uniform"):]; + uline = uline.replace(";",""); + uline = uline.replace("{",""); + lines = uline.split(",") + for x in lines: - x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x.strip() + x = x[ x.rfind(" ")+1: ] + if (x.find("[")!=-1): + #unfiorm array + x = x[ :x.find("[") ] - if (not x in ubo_names): - ubos+=[(x,uboidx)] - ubo_names+=[x] + if (not x in ubo_names): + ubos+=[(x,uboidx)] + ubo_names+=[x] - else: - uline = line.replace("uniform",""); - uline = uline.replace(";",""); - lines = uline.split(",") - for x in lines: + else: + uline = line.replace("uniform",""); + uline = uline.replace(";",""); + lines = uline.split(",") + for x in lines: - x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x.strip() + x = x[ x.rfind(" ")+1: ] + if (x.find("[")!=-1): + #unfiorm array + x = x[ :x.find("[") ] - if (not x in uniforms): - uniforms+=[x] + if (not x in uniforms): + uniforms+=[x] - if ((line.strip().find("in ")==0 or line.strip().find("attribute ")==0) and line.find("attrib:")!=-1): - uline = line.replace("in ",""); - uline = uline.replace("attribute ",""); - uline = uline.replace(";",""); - uline = uline[ uline.find(" "): ].strip() + if ((line.strip().find("in ")==0 or line.strip().find("attribute ")==0) and line.find("attrib:")!=-1): + uline = line.replace("in ",""); + uline = uline.replace("attribute ",""); + uline = uline.replace(";",""); + uline = uline[ uline.find(" "): ].strip() - if (uline.find("//")!=-1): - name,bind = uline.split("//") - if (bind.find("attrib:")!=-1): - name=name.strip() - bind=bind.replace("attrib:","").strip() - attributes+=[(name,bind)] + if (uline.find("//")!=-1): + name,bind = uline.split("//") + if (bind.find("attrib:")!=-1): + name=name.strip() + bind=bind.replace("attrib:","").strip() + attributes+=[(name,bind)] - if (line.strip().find("out ")==0): - uline = line.replace("out","").strip(); - uline = uline.replace(";",""); - uline = uline[ uline.find(" "): ].strip() + if (line.strip().find("out ")==0): + uline = line.replace("out","").strip(); + uline = uline.replace(";",""); + uline = uline[ uline.find(" "): ].strip() - if (uline.find("//")!=-1): - name,bind = uline.split("//") - if (bind.find("drawbuffer:")!=-1): - name=name.strip() - bind=bind.replace("drawbuffer:","").strip() - fbos+=[(name,bind)] + if (uline.find("//")!=-1): + name,bind = uline.split("//") + if (bind.find("drawbuffer:")!=-1): + name=name.strip() + bind=bind.replace("drawbuffer:","").strip() + fbos+=[(name,bind)] - line=line.replace("\r","") - line=line.replace("\n","") - line=line.replace("\\","\\\\") - line=line.replace("\"","\\\"") - #line=line+"\\n\\" no need to anymore + line=line.replace("\r","") + line=line.replace("\n","") + line=line.replace("\\","\\\\") + line=line.replace("\"","\\\"") + #line=line+"\\n\\" no need to anymore - if (reading=="vertex"): - vertex_lines+=[line] - if (reading=="fragment"): - fragment_lines+=[line] + if (reading=="vertex"): + vertex_lines+=[line] + if (reading=="fragment"): + fragment_lines+=[line] - line=fs.readline() - line_offset+=1 + line=fs.readline() + line_offset+=1 - fs.close(); + fs.close(); - out_file = filename+".h" - fd = open(out_file,"w") + out_file = filename+".h" + fd = open(out_file,"w") - fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); + fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); - out_file_base = out_file - out_file_base = out_file_base[ out_file_base.rfind("/")+1: ] - out_file_base = out_file_base[ out_file_base.rfind("\\")+1: ] + out_file_base = out_file + out_file_base = out_file_base[ out_file_base.rfind("/")+1: ] + out_file_base = out_file_base[ out_file_base.rfind("\\")+1: ] # print("out file "+out_file+" base " +out_file_base) - out_file_ifdef = out_file_base.replace(".","_").upper() - fd.write("#ifndef "+out_file_ifdef+"\n") - fd.write("#define "+out_file_ifdef+"\n") + out_file_ifdef = out_file_base.replace(".","_").upper() + fd.write("#ifndef "+out_file_ifdef+"\n") + fd.write("#define "+out_file_ifdef+"\n") - out_file_class = out_file_base.replace(".glsl.h","").title().replace("_","").replace(".","")+"ShaderGL"; - fd.write("\n\n"); - fd.write("#include \"drivers/opengl/shader_gl.h\"\n\n\n"); - fd.write("class "+out_file_class+" : public ShaderGL {\n\n"); - fd.write("\t virtual String get_shader_name() const { return \""+out_file_class+"\"; }\n"); - fd.write("public:\n\n"); + out_file_class = out_file_base.replace(".glsl.h","").title().replace("_","").replace(".","")+"ShaderGL"; + fd.write("\n\n"); + fd.write("#include \"drivers/opengl/shader_gl.h\"\n\n\n"); + fd.write("class "+out_file_class+" : public ShaderGL {\n\n"); + fd.write("\t virtual String get_shader_name() const { return \""+out_file_class+"\"; }\n"); + fd.write("public:\n\n"); - if (len(conditionals)): - fd.write("\tenum Conditionals {\n"); - for x in conditionals: - fd.write("\t\t"+x+",\n"); - fd.write("\t};\n\n"); - if (len(uniforms)): - fd.write("\tenum Uniforms {\n"); - for x in uniforms: - fd.write("\t\t"+x.upper()+",\n"); - fd.write("\t};\n\n"); + if (len(conditionals)): + fd.write("\tenum Conditionals {\n"); + for x in conditionals: + fd.write("\t\t"+x+",\n"); + fd.write("\t};\n\n"); + if (len(uniforms)): + fd.write("\tenum Uniforms {\n"); + for x in uniforms: + fd.write("\t\t"+x.upper()+",\n"); + fd.write("\t};\n\n"); - fd.write("\t_FORCE_INLINE_ int get_uniform(Uniforms p_uniform) const { return _get_uniform(p_uniform); }\n\n"); - if (len(conditionals)): + fd.write("\t_FORCE_INLINE_ int get_uniform(Uniforms p_uniform) const { return _get_uniform(p_uniform); }\n\n"); + if (len(conditionals)): - fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n"); - fd.write("\t#define _FU if (get_uniform(p_uniform)<0) return; ERR_FAIL_COND( get_active()!=this );\n\n "); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, unsigned long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU GLfloat col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; glUniform4fv(get_uniform(p_uniform),1,col); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU GLfloat vec2[2]={p_vec2.x,p_vec2.y}; glUniform2fv(get_uniform(p_uniform),1,vec2); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU GLfloat vec3[3]={p_vec3.x,p_vec3.y,p_vec3.z}; glUniform3fv(get_uniform(p_uniform),1,vec3); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU glUniform2f(get_uniform(p_uniform),p_a,p_b); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU glUniform3f(get_uniform(p_uniform),p_a,p_b,p_c); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU glUniform4f(get_uniform(p_uniform),p_a,p_b,p_c,p_d); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n"); + fd.write("\t#define _FU if (get_uniform(p_uniform)<0) return; ERR_FAIL_COND( get_active()!=this );\n\n "); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, unsigned long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU GLfloat col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; glUniform4fv(get_uniform(p_uniform),1,col); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU GLfloat vec2[2]={p_vec2.x,p_vec2.y}; glUniform2fv(get_uniform(p_uniform),1,vec2); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU GLfloat vec3[3]={p_vec3.x,p_vec3.y,p_vec3.z}; glUniform3fv(get_uniform(p_uniform),1,vec3); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU glUniform2f(get_uniform(p_uniform),p_a,p_b); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU glUniform3f(get_uniform(p_uniform),p_a,p_b,p_c); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU glUniform4f(get_uniform(p_uniform),p_a,p_b,p_c,p_d); }\n\n"); - fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Transform& p_transform) { _FU + fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Transform& p_transform) { _FU const Transform &tr = p_transform; @@ -283,7 +283,7 @@ def build_glsl_header( filename ): """); - fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Matrix32& p_transform) { _FU + fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Matrix32& p_transform) { _FU const Matrix32 &tr = p_transform; @@ -314,7 +314,7 @@ def build_glsl_header( filename ): """); - fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const CameraMatrix& p_matrix) { _FU + fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const CameraMatrix& p_matrix) { _FU GLfloat matrix[16]; @@ -328,231 +328,231 @@ def build_glsl_header( filename ): glUniformMatrix4fv(get_uniform(p_uniform),1,false,matrix); }; """); - fd.write("\n\n#undef _FU\n\n\n"); + fd.write("\n\n#undef _FU\n\n\n"); - fd.write("\tvirtual void init() {\n\n"); - if (len(conditionals)): + fd.write("\tvirtual void init() {\n\n"); + if (len(conditionals)): - fd.write("\t\tstatic const char* _conditional_strings[]={\n") - if (len(conditionals)): - for x in conditionals: - fd.write("\t\t\t\"#define "+x+"\\n\",\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") + fd.write("\t\tstatic const char* _conditional_strings[]={\n") + if (len(conditionals)): + for x in conditionals: + fd.write("\t\t\t\"#define "+x+"\\n\",\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") - if (len(uniforms)): + if (len(uniforms)): - fd.write("\t\tstatic const char* _uniform_strings[]={\n") - if (len(uniforms)): - for x in uniforms: - fd.write("\t\t\t\""+x+"\",\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") + fd.write("\t\tstatic const char* _uniform_strings[]={\n") + if (len(uniforms)): + for x in uniforms: + fd.write("\t\t\t\""+x+"\",\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") - if (len(attributes)): + if (len(attributes)): - fd.write("\t\tstatic AttributePair _attribute_pairs[]={\n") - for x in attributes: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic AttributePair *_attribute_pairs=NULL;\n") + fd.write("\t\tstatic AttributePair _attribute_pairs[]={\n") + for x in attributes: + fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic AttributePair *_attribute_pairs=NULL;\n") - if (len(fbos)): - fd.write("\t\tstatic FBOPair _fbo_pairs[]={\n") - for x in fbos: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic FBOPair *_fbo_pairs=NULL;\n") + if (len(fbos)): + fd.write("\t\tstatic FBOPair _fbo_pairs[]={\n") + for x in fbos: + fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic FBOPair *_fbo_pairs=NULL;\n") - if (len(ubos)): - fd.write("\t\tstatic UBOPair _ubo_pairs[]={\n") - for x in ubos: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic UBOPair *_ubo_pairs=NULL;\n") + if (len(ubos)): + fd.write("\t\tstatic UBOPair _ubo_pairs[]={\n") + for x in ubos: + fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic UBOPair *_ubo_pairs=NULL;\n") - if (len(texunits)): - fd.write("\t\tstatic TexUnitPair _texunit_pairs[]={\n") - for x in texunits: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n") + if (len(texunits)): + fd.write("\t\tstatic TexUnitPair _texunit_pairs[]={\n") + for x in texunits: + fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n") - fd.write("\t\tstatic const char* _vertex_code=\"\\\n") - for x in vertex_lines: - fd.write("\t\t\t"+x+"\n"); - fd.write("\t\t\";\n\n"); + fd.write("\t\tstatic const char* _vertex_code=\"\\\n") + for x in vertex_lines: + fd.write("\t\t\t"+x+"\n"); + fd.write("\t\t\";\n\n"); - fd.write("\t\tstatic const int _vertex_code_start="+str(vertex_offset)+";\n") + fd.write("\t\tstatic const int _vertex_code_start="+str(vertex_offset)+";\n") - fd.write("\t\tstatic const char* _fragment_code=\"\\\n") - for x in fragment_lines: - fd.write("\t\t\t"+x+"\n"); - fd.write("\t\t\";\n\n"); + fd.write("\t\tstatic const char* _fragment_code=\"\\\n") + for x in fragment_lines: + fd.write("\t\t\t"+x+"\n"); + fd.write("\t\t\";\n\n"); - fd.write("\t\tstatic const int _fragment_code_start="+str(fragment_offset)+";\n") + fd.write("\t\tstatic const int _fragment_code_start="+str(fragment_offset)+";\n") - fd.write("\t\tsetup(_conditional_strings,"+str(len(conditionals))+",_uniform_strings,"+str(len(uniforms))+",_attribute_pairs,"+str(len(attributes))+",_fbo_pairs,"+str(len(fbos))+",_ubo_pairs,"+str(len(ubos))+",_texunit_pairs,"+str(len(texunits))+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") - fd.write("\t};\n\n") + fd.write("\t\tsetup(_conditional_strings,"+str(len(conditionals))+",_uniform_strings,"+str(len(uniforms))+",_attribute_pairs,"+str(len(attributes))+",_fbo_pairs,"+str(len(fbos))+",_ubo_pairs,"+str(len(ubos))+",_texunit_pairs,"+str(len(texunits))+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") + fd.write("\t};\n\n") - fd.write("};\n\n"); - fd.write("#endif\n\n"); - fd.close(); + fd.write("};\n\n"); + fd.write("#endif\n\n"); + fd.close(); def build_glsl_headers( target, source, env ): - for x in source: + for x in source: - build_glsl_header(str(x)); + build_glsl_header(str(x)); - return 0 + return 0 def build_hlsl_dx9_header( filename ): - fs = open(filename,"r") - line=fs.readline() + fs = open(filename,"r") + line=fs.readline() - vertex_lines=[] - fragment_lines=[] - uniforms=[] - fragment_uniforms=[] - attributes=[] - fbos=[] - conditionals=[] + vertex_lines=[] + fragment_lines=[] + uniforms=[] + fragment_uniforms=[] + attributes=[] + fbos=[] + conditionals=[] - reading="" - line_offset=0 - vertex_offset=0 - fragment_offset=0 + reading="" + line_offset=0 + vertex_offset=0 + fragment_offset=0 - while(line): + while(line): - if (line.find("[vertex]")!=-1): - reading="vertex" - line=fs.readline() - line_offset+=1 - vertex_offset=line_offset - continue + if (line.find("[vertex]")!=-1): + reading="vertex" + line=fs.readline() + line_offset+=1 + vertex_offset=line_offset + continue - if (line.find("[fragment]")!=-1): - reading="fragment" - line=fs.readline() - line_offset+=1 - fragment_offset=line_offset - continue + if (line.find("[fragment]")!=-1): + reading="fragment" + line=fs.readline() + line_offset+=1 + fragment_offset=line_offset + continue - if (line.find("#ifdef ")!=-1): - ifdefline = line.replace("#ifdef ","").strip() - if (not ifdefline in conditionals): - conditionals+=[ifdefline] + if (line.find("#ifdef ")!=-1): + ifdefline = line.replace("#ifdef ","").strip() + if (not ifdefline in conditionals): + conditionals+=[ifdefline] - if (line.find("#elif defined(")!=-1): - ifdefline = line.replace("#elif defined(","").strip() - ifdefline = ifdefline.replace(")","").strip() - if (not ifdefline in conditionals): - conditionals+=[ifdefline] - if (line.find("uniform")!=-1): - uline = line.replace("uniform",""); - uline = uline.replace(";",""); - lines = uline.split(",") - for x in lines: + if (line.find("#elif defined(")!=-1): + ifdefline = line.replace("#elif defined(","").strip() + ifdefline = ifdefline.replace(")","").strip() + if (not ifdefline in conditionals): + conditionals+=[ifdefline] + if (line.find("uniform")!=-1): + uline = line.replace("uniform",""); + uline = uline.replace(";",""); + lines = uline.split(",") + for x in lines: - x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x.strip() + x = x[ x.rfind(" ")+1: ] + if (x.find("[")!=-1): + #unfiorm array + x = x[ :x.find("[") ] - if (not x in uniforms): - uniforms+=[x] - fragment_uniforms+=[reading=="fragment"] - line=line.replace("\r","") - line=line.replace("\n","") - line=line.replace("\\","\\\\") - line=line.replace("\"","\\\"") - line=line+"\\n\\" + if (not x in uniforms): + uniforms+=[x] + fragment_uniforms+=[reading=="fragment"] + line=line.replace("\r","") + line=line.replace("\n","") + line=line.replace("\\","\\\\") + line=line.replace("\"","\\\"") + line=line+"\\n\\" - if (reading=="vertex"): - vertex_lines+=[line] - if (reading=="fragment"): - fragment_lines+=[line] + if (reading=="vertex"): + vertex_lines+=[line] + if (reading=="fragment"): + fragment_lines+=[line] - line=fs.readline() - line_offset+=1 + line=fs.readline() + line_offset+=1 - fs.close(); + fs.close(); - out_file = filename+".h" - fd = open(out_file,"w") + out_file = filename+".h" + fd = open(out_file,"w") - fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); + fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); - out_file_base = out_file - out_file_base = out_file_base[ out_file_base.rfind("/")+1: ] - out_file_base = out_file_base[ out_file_base.rfind("\\")+1: ] + out_file_base = out_file + out_file_base = out_file_base[ out_file_base.rfind("/")+1: ] + out_file_base = out_file_base[ out_file_base.rfind("\\")+1: ] # print("out file "+out_file+" base " +out_file_base) - out_file_ifdef = out_file_base.replace(".","_").upper() - fd.write("#ifndef "+out_file_ifdef+"\n") - fd.write("#define "+out_file_ifdef+"\n") + out_file_ifdef = out_file_base.replace(".","_").upper() + fd.write("#ifndef "+out_file_ifdef+"\n") + fd.write("#define "+out_file_ifdef+"\n") - out_file_class = out_file_base.replace(".hlsl.h","").title().replace("_","").replace(".","")+"ShaderDX9"; - fd.write("\n\n"); - fd.write("#include \"drivers/directx9/shader_dx9.h\"\n\n\n"); - fd.write("class "+out_file_class+" : public ShaderDX9 {\n\n"); - fd.write("\t virtual String get_shader_name() const { return \""+out_file_class+"\"; }\n"); - fd.write("public:\n\n"); + out_file_class = out_file_base.replace(".hlsl.h","").title().replace("_","").replace(".","")+"ShaderDX9"; + fd.write("\n\n"); + fd.write("#include \"drivers/directx9/shader_dx9.h\"\n\n\n"); + fd.write("class "+out_file_class+" : public ShaderDX9 {\n\n"); + fd.write("\t virtual String get_shader_name() const { return \""+out_file_class+"\"; }\n"); + fd.write("public:\n\n"); - if (len(conditionals)): - fd.write("\tenum Conditionals {\n"); - for x in conditionals: - fd.write("\t\t"+x+",\n"); - fd.write("\t};\n\n"); - if (len(uniforms)): - fd.write("\tenum Uniforms {\n"); - for x in uniforms: - fd.write("\t\t"+x.upper()+",\n"); - fd.write("\t};\n\n"); + if (len(conditionals)): + fd.write("\tenum Conditionals {\n"); + for x in conditionals: + fd.write("\t\t"+x+",\n"); + fd.write("\t};\n\n"); + if (len(uniforms)): + fd.write("\tenum Uniforms {\n"); + for x in uniforms: + fd.write("\t\t"+x.upper()+",\n"); + fd.write("\t};\n\n"); - if (len(conditionals)): - fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n"); + if (len(conditionals)): + fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n"); - fd.write("\t#define _FU if (!_uniform_valid(p_uniform)) return; ERR_FAIL_COND( get_active()!=this );\n\n "); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, bool p_value) { _FU set_uniformb(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU set_uniformf(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU set_uniformf(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint64_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int64_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, unsigned long p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, long p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU float col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; set_uniformfv(p_uniform,col); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU float vec2[4]={p_vec2.x,p_vec2.y,0,0}; set_uniformfv(p_uniform,vec2); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU float vec3[4]={p_vec3.x,p_vec3.y,p_vec3.z,0}; set_uniformfv(p_uniform,vec3); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU float vec2[4]={p_a,p_b,0,0}; set_uniformfv(p_uniform,vec2); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU float vec3[4]={p_a,p_b,p_c,0}; set_uniformfv(p_uniform,vec3); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU float vec4[4]={p_a,p_b,p_c,p_d}; set_uniformfv(p_uniform,vec4); }\n\n"); + fd.write("\t#define _FU if (!_uniform_valid(p_uniform)) return; ERR_FAIL_COND( get_active()!=this );\n\n "); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, bool p_value) { _FU set_uniformb(p_uniform,p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU set_uniformf(p_uniform,p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU set_uniformf(p_uniform,p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); + #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint64_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); + #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int64_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, unsigned long p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, long p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU float col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; set_uniformfv(p_uniform,col); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU float vec2[4]={p_vec2.x,p_vec2.y,0,0}; set_uniformfv(p_uniform,vec2); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU float vec3[4]={p_vec3.x,p_vec3.y,p_vec3.z,0}; set_uniformfv(p_uniform,vec3); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU float vec2[4]={p_a,p_b,0,0}; set_uniformfv(p_uniform,vec2); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU float vec3[4]={p_a,p_b,p_c,0}; set_uniformfv(p_uniform,vec3); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU float vec4[4]={p_a,p_b,p_c,p_d}; set_uniformfv(p_uniform,vec4); }\n\n"); - fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Transform& p_transform) { _FU + fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Transform& p_transform) { _FU const Transform &tr = p_transform; @@ -581,7 +581,7 @@ def build_hlsl_dx9_header( filename ): """); - fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const CameraMatrix& p_matrix) { _FU + fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const CameraMatrix& p_matrix) { _FU float matrix[16]; @@ -595,299 +595,299 @@ def build_hlsl_dx9_header( filename ): set_uniformfv(p_uniform,&matrix[0],4); }; """); - fd.write("\n\n#undef _FU\n\n\n"); + fd.write("\n\n#undef _FU\n\n\n"); - fd.write("\tvirtual void init(IDirect3DDevice9 *p_device,ShaderSupport p_version) {\n\n"); - if (len(conditionals)): + fd.write("\tvirtual void init(IDirect3DDevice9 *p_device,ShaderSupport p_version) {\n\n"); + if (len(conditionals)): - fd.write("\t\tstatic const char* _conditional_strings[]={\n") - if (len(conditionals)): - for x in conditionals: - fd.write("\t\t\t\""+x+"\",\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") + fd.write("\t\tstatic const char* _conditional_strings[]={\n") + if (len(conditionals)): + for x in conditionals: + fd.write("\t\t\t\""+x+"\",\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") - if (len(uniforms)): + if (len(uniforms)): - fd.write("\t\tstatic const char* _uniform_strings[]={\n") - if (len(uniforms)): - for x in uniforms: - fd.write("\t\t\t\""+x+"\",\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\tstatic const char* _uniform_strings[]={\n") + if (len(uniforms)): + for x in uniforms: + fd.write("\t\t\t\""+x+"\",\n"); + fd.write("\t\t};\n\n"); - fd.write("\t\tstatic const bool _fragment_uniforms[]={\n") + fd.write("\t\tstatic const bool _fragment_uniforms[]={\n") - if (len(uniforms)): - for x in fragment_uniforms: - if (x): - fd.write("\t\t\ttrue,\n"); - else: - fd.write("\t\t\tfalse,\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") - fd.write("\t\tstatic const bool *_fragment_uniforms=NULL;\n") + if (len(uniforms)): + for x in fragment_uniforms: + if (x): + fd.write("\t\t\ttrue,\n"); + else: + fd.write("\t\t\tfalse,\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") + fd.write("\t\tstatic const bool *_fragment_uniforms=NULL;\n") - fd.write("\t\tstatic const char* _vertex_code=\"\\\n") - for x in vertex_lines: - fd.write("\t\t\t"+x+"\n"); - fd.write("\t\t\";\n\n"); + fd.write("\t\tstatic const char* _vertex_code=\"\\\n") + for x in vertex_lines: + fd.write("\t\t\t"+x+"\n"); + fd.write("\t\t\";\n\n"); - fd.write("\t\tstatic const int _vertex_code_start="+str(vertex_offset)+";\n") + fd.write("\t\tstatic const int _vertex_code_start="+str(vertex_offset)+";\n") - fd.write("\t\tstatic const char* _fragment_code=\"\\\n") - for x in fragment_lines: - fd.write("\t\t\t"+x+"\n"); - fd.write("\t\t\";\n\n"); + fd.write("\t\tstatic const char* _fragment_code=\"\\\n") + for x in fragment_lines: + fd.write("\t\t\t"+x+"\n"); + fd.write("\t\t\";\n\n"); - fd.write("\t\tstatic const int _fragment_code_start="+str(fragment_offset)+";\n") + fd.write("\t\tstatic const int _fragment_code_start="+str(fragment_offset)+";\n") - fd.write("\t\tsetup(p_device,p_version,_conditional_strings,"+str(len(conditionals))+",_uniform_strings,"+str(len(uniforms))+",_fragment_uniforms,_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") - fd.write("\t};\n\n") + fd.write("\t\tsetup(p_device,p_version,_conditional_strings,"+str(len(conditionals))+",_uniform_strings,"+str(len(uniforms))+",_fragment_uniforms,_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") + fd.write("\t};\n\n") - fd.write("};\n\n"); - fd.write("#endif\n\n"); - fd.close(); + fd.write("};\n\n"); + fd.write("#endif\n\n"); + fd.close(); def build_hlsl_dx9_headers( target, source, env ): - for x in source: + for x in source: - build_hlsl_dx9_header(str(x)); + build_hlsl_dx9_header(str(x)); - return 0 + return 0 class LegacyGLHeaderStruct: - def __init__(self): - self.vertex_lines=[] - self.fragment_lines=[] - self.uniforms=[] - self.attributes=[] - self.fbos=[] - self.conditionals=[] - self.enums={} - self.texunits=[] - self.texunit_names=[] - self.ubos=[] - self.ubo_names=[] - - self.vertex_included_files=[] - self.fragment_included_files=[] - - self.reading="" - self.line_offset=0 - self.vertex_offset=0 - self.fragment_offset=0 - + def __init__(self): + self.vertex_lines=[] + self.fragment_lines=[] + self.uniforms=[] + self.attributes=[] + self.fbos=[] + self.conditionals=[] + self.enums={} + self.texunits=[] + self.texunit_names=[] + self.ubos=[] + self.ubo_names=[] + + self.vertex_included_files=[] + self.fragment_included_files=[] + + self.reading="" + self.line_offset=0 + self.vertex_offset=0 + self.fragment_offset=0 + def include_file_in_legacygl_header( filename, header_data, depth ): - fs = open(filename,"r") - line=fs.readline() + fs = open(filename,"r") + line=fs.readline() - while(line): + while(line): - if (line.find("[vertex]")!=-1): - header_data.reading="vertex" - line=fs.readline() - header_data.line_offset+=1 - header_data.vertex_offset=header_data.line_offset - continue + if (line.find("[vertex]")!=-1): + header_data.reading="vertex" + line=fs.readline() + header_data.line_offset+=1 + header_data.vertex_offset=header_data.line_offset + continue - if (line.find("[fragment]")!=-1): - header_data.reading="fragment" - line=fs.readline() - header_data.line_offset+=1 - header_data.fragment_offset=header_data.line_offset - continue - - while(line.find("#include ")!=-1): - includeline = line.replace("#include ","").strip()[1:-1] - - import os.path - - included_file = os.path.relpath(os.path.dirname(filename) + "/" + includeline) - if (not included_file in header_data.vertex_included_files and header_data.reading=="vertex"): - header_data.vertex_included_files+=[included_file] - if(include_file_in_legacygl_header( included_file, header_data, depth + 1 ) == None): - print "Error in file '" + filename + "': #include " + includeline + "could not be found!" - elif (not included_file in header_data.fragment_included_files and header_data.reading=="fragment"): - header_data.fragment_included_files+=[included_file] - if(include_file_in_legacygl_header( included_file, header_data, depth + 1 ) == None): - print "Error in file '" + filename + "': #include " + includeline + "could not be found!" - - line=fs.readline() + if (line.find("[fragment]")!=-1): + header_data.reading="fragment" + line=fs.readline() + header_data.line_offset+=1 + header_data.fragment_offset=header_data.line_offset + continue - if (line.find("#ifdef ")!=-1 or line.find("#elif defined(")!=-1): - if (line.find("#ifdef ")!=-1): - ifdefline = line.replace("#ifdef ","").strip() - else: - ifdefline = line.replace("#elif defined(","").strip() - ifdefline = ifdefline.replace(")","").strip() + while(line.find("#include ")!=-1): + includeline = line.replace("#include ","").strip()[1:-1] - if (line.find("_EN_")!=-1): - enumbase = ifdefline[:ifdefline.find("_EN_")]; - ifdefline = ifdefline.replace("_EN_","_") - line = line.replace("_EN_","_") + import os.path + + included_file = os.path.relpath(os.path.dirname(filename) + "/" + includeline) + if (not included_file in header_data.vertex_included_files and header_data.reading=="vertex"): + header_data.vertex_included_files+=[included_file] + if(include_file_in_legacygl_header( included_file, header_data, depth + 1 ) == None): + print "Error in file '" + filename + "': #include " + includeline + "could not be found!" + elif (not included_file in header_data.fragment_included_files and header_data.reading=="fragment"): + header_data.fragment_included_files+=[included_file] + if(include_file_in_legacygl_header( included_file, header_data, depth + 1 ) == None): + print "Error in file '" + filename + "': #include " + includeline + "could not be found!" + + line=fs.readline() + + if (line.find("#ifdef ")!=-1 or line.find("#elif defined(")!=-1): + if (line.find("#ifdef ")!=-1): + ifdefline = line.replace("#ifdef ","").strip() + else: + ifdefline = line.replace("#elif defined(","").strip() + ifdefline = ifdefline.replace(")","").strip() + + if (line.find("_EN_")!=-1): + enumbase = ifdefline[:ifdefline.find("_EN_")]; + ifdefline = ifdefline.replace("_EN_","_") + line = line.replace("_EN_","_") # print(enumbase+":"+ifdefline); - if (enumbase not in header_data.enums): - header_data.enums[enumbase]=[] - if (ifdefline not in header_data.enums[enumbase]): - header_data.enums[enumbase].append(ifdefline); + if (enumbase not in header_data.enums): + header_data.enums[enumbase]=[] + if (ifdefline not in header_data.enums[enumbase]): + header_data.enums[enumbase].append(ifdefline); - elif (not ifdefline in header_data.conditionals): - header_data.conditionals+=[ifdefline] + elif (not ifdefline in header_data.conditionals): + header_data.conditionals+=[ifdefline] - if (line.find("uniform")!=-1 and line.lower().find("texunit:")!=-1): - #texture unit - texunitstr = line[line.find(":")+1:].strip() - if (texunitstr=="auto"): - texunit="-1" - else: - texunit = str(int(texunitstr )) - uline=line[:line.lower().find("//")] - uline = uline.replace("uniform",""); - uline = uline.replace("highp",""); - uline = uline.replace(";",""); - lines = uline.split(",") - for x in lines: + if (line.find("uniform")!=-1 and line.lower().find("texunit:")!=-1): + #texture unit + texunitstr = line[line.find(":")+1:].strip() + if (texunitstr=="auto"): + texunit="-1" + else: + texunit = str(int(texunitstr )) + uline=line[:line.lower().find("//")] + uline = uline.replace("uniform",""); + uline = uline.replace("highp",""); + uline = uline.replace(";",""); + lines = uline.split(",") + for x in lines: - x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x.strip() + x = x[ x.rfind(" ")+1: ] + if (x.find("[")!=-1): + #unfiorm array + x = x[ :x.find("[") ] - if (not x in header_data.texunit_names): - header_data.texunits+=[(x,texunit)] - header_data.texunit_names+=[x] + if (not x in header_data.texunit_names): + header_data.texunits+=[(x,texunit)] + header_data.texunit_names+=[x] - elif (line.find("uniform")!=-1): - uline = line.replace("uniform",""); - uline = uline.replace(";",""); - lines = uline.split(",") - for x in lines: + elif (line.find("uniform")!=-1): + uline = line.replace("uniform",""); + uline = uline.replace(";",""); + lines = uline.split(",") + for x in lines: - x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x.strip() + x = x[ x.rfind(" ")+1: ] + if (x.find("[")!=-1): + #unfiorm array + x = x[ :x.find("[") ] - if (not x in header_data.uniforms): - header_data.uniforms+=[x] + if (not x in header_data.uniforms): + header_data.uniforms+=[x] - if ((line.strip().find("in ")==0 or line.strip().find("attribute ")==0) and line.find("attrib:")!=-1): - uline = line.replace("in ",""); - uline = uline.replace("attribute ",""); - uline = uline.replace("highp ",""); - uline = uline.replace(";",""); - uline = uline[ uline.find(" "): ].strip() + if ((line.strip().find("in ")==0 or line.strip().find("attribute ")==0) and line.find("attrib:")!=-1): + uline = line.replace("in ",""); + uline = uline.replace("attribute ",""); + uline = uline.replace("highp ",""); + uline = uline.replace(";",""); + uline = uline[ uline.find(" "): ].strip() - if (uline.find("//")!=-1): - name,bind = uline.split("//") - if (bind.find("attrib:")!=-1): - name=name.strip() - bind=bind.replace("attrib:","").strip() - header_data.attributes+=[(name,bind)] + if (uline.find("//")!=-1): + name,bind = uline.split("//") + if (bind.find("attrib:")!=-1): + name=name.strip() + bind=bind.replace("attrib:","").strip() + header_data.attributes+=[(name,bind)] - line=line.replace("\r","") - line=line.replace("\n","") - #line=line.replace("\\","\\\\") - #line=line.replace("\"","\\\"") - #line=line+"\\n\\" + line=line.replace("\r","") + line=line.replace("\n","") + #line=line.replace("\\","\\\\") + #line=line.replace("\"","\\\"") + #line=line+"\\n\\" - if (header_data.reading=="vertex"): - header_data.vertex_lines+=[line] - if (header_data.reading=="fragment"): - header_data.fragment_lines+=[line] + if (header_data.reading=="vertex"): + header_data.vertex_lines+=[line] + if (header_data.reading=="fragment"): + header_data.fragment_lines+=[line] + + line=fs.readline() + header_data.line_offset+=1 + + fs.close(); + + return header_data - line=fs.readline() - header_data.line_offset+=1 - - fs.close(); - - return header_data - def build_legacygl_header( filename, include, class_suffix, output_attribs ): - - header_data = LegacyGLHeaderStruct() - include_file_in_legacygl_header( filename, header_data, 0 ) - out_file = filename+".h" - fd = open(out_file,"w") - - enum_constants=[] + header_data = LegacyGLHeaderStruct() + include_file_in_legacygl_header( filename, header_data, 0 ) - fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); + out_file = filename+".h" + fd = open(out_file,"w") - out_file_base = out_file - out_file_base = out_file_base[ out_file_base.rfind("/")+1: ] - out_file_base = out_file_base[ out_file_base.rfind("\\")+1: ] + enum_constants=[] + + fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); + + out_file_base = out_file + out_file_base = out_file_base[ out_file_base.rfind("/")+1: ] + out_file_base = out_file_base[ out_file_base.rfind("\\")+1: ] # print("out file "+out_file+" base " +out_file_base) - out_file_ifdef = out_file_base.replace(".","_").upper() - fd.write("#ifndef "+out_file_ifdef+class_suffix+"_120\n") - fd.write("#define "+out_file_ifdef+class_suffix+"_120\n") + out_file_ifdef = out_file_base.replace(".","_").upper() + fd.write("#ifndef "+out_file_ifdef+class_suffix+"_120\n") + fd.write("#define "+out_file_ifdef+class_suffix+"_120\n") - out_file_class = out_file_base.replace(".glsl.h","").title().replace("_","").replace(".","")+"Shader"+class_suffix; - fd.write("\n\n"); - fd.write("#include \"" + include + "\"\n\n\n"); - fd.write("class "+out_file_class+" : public Shader"+class_suffix+" {\n\n"); - fd.write("\t virtual String get_shader_name() const { return \""+out_file_class+"\"; }\n"); + out_file_class = out_file_base.replace(".glsl.h","").title().replace("_","").replace(".","")+"Shader"+class_suffix; + fd.write("\n\n"); + fd.write("#include \"" + include + "\"\n\n\n"); + fd.write("class "+out_file_class+" : public Shader"+class_suffix+" {\n\n"); + fd.write("\t virtual String get_shader_name() const { return \""+out_file_class+"\"; }\n"); - fd.write("public:\n\n"); + fd.write("public:\n\n"); - if (len(header_data.conditionals)): - fd.write("\tenum Conditionals {\n"); - for x in header_data.conditionals: - fd.write("\t\t"+x.upper()+",\n"); - fd.write("\t};\n\n"); + if (len(header_data.conditionals)): + fd.write("\tenum Conditionals {\n"); + for x in header_data.conditionals: + fd.write("\t\t"+x.upper()+",\n"); + fd.write("\t};\n\n"); - if (len(header_data.uniforms)): - fd.write("\tenum Uniforms {\n"); - for x in header_data.uniforms: - fd.write("\t\t"+x.upper()+",\n"); - fd.write("\t};\n\n"); + if (len(header_data.uniforms)): + fd.write("\tenum Uniforms {\n"); + for x in header_data.uniforms: + fd.write("\t\t"+x.upper()+",\n"); + fd.write("\t};\n\n"); - fd.write("\t_FORCE_INLINE_ int get_uniform(Uniforms p_uniform) const { return _get_uniform(p_uniform); }\n\n"); - if (len(header_data.conditionals)): + fd.write("\t_FORCE_INLINE_ int get_uniform(Uniforms p_uniform) const { return _get_uniform(p_uniform); }\n\n"); + if (len(header_data.conditionals)): - fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n"); - fd.write("\t#define _FU if (get_uniform(p_uniform)<0) return; ERR_FAIL_COND( get_active()!=this );\n\n "); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, unsigned long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU GLfloat col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; glUniform4fv(get_uniform(p_uniform),1,col); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU GLfloat vec2[2]={p_vec2.x,p_vec2.y}; glUniform2fv(get_uniform(p_uniform),1,vec2); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU GLfloat vec3[3]={p_vec3.x,p_vec3.y,p_vec3.z}; glUniform3fv(get_uniform(p_uniform),1,vec3); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU glUniform2f(get_uniform(p_uniform),p_a,p_b); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU glUniform3f(get_uniform(p_uniform),p_a,p_b,p_c); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU glUniform4f(get_uniform(p_uniform),p_a,p_b,p_c,p_d); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n"); + fd.write("\t#define _FU if (get_uniform(p_uniform)<0) return; ERR_FAIL_COND( get_active()!=this );\n\n "); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, unsigned long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU GLfloat col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; glUniform4fv(get_uniform(p_uniform),1,col); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU GLfloat vec2[2]={p_vec2.x,p_vec2.y}; glUniform2fv(get_uniform(p_uniform),1,vec2); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU GLfloat vec3[3]={p_vec3.x,p_vec3.y,p_vec3.z}; glUniform3fv(get_uniform(p_uniform),1,vec3); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU glUniform2f(get_uniform(p_uniform),p_a,p_b); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU glUniform3f(get_uniform(p_uniform),p_a,p_b,p_c); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU glUniform4f(get_uniform(p_uniform),p_a,p_b,p_c,p_d); }\n\n"); - fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Transform& p_transform) { _FU + fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Transform& p_transform) { _FU const Transform &tr = p_transform; @@ -918,7 +918,7 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): """); - fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Matrix32& p_transform) { _FU + fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Matrix32& p_transform) { _FU const Matrix32 &tr = p_transform; @@ -949,7 +949,7 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): """); - fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const CameraMatrix& p_matrix) { _FU + fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const CameraMatrix& p_matrix) { _FU GLfloat matrix[16]; @@ -963,277 +963,277 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): glUniformMatrix4fv(get_uniform(p_uniform),1,false,matrix); }; """); - fd.write("\n\n#undef _FU\n\n\n"); + fd.write("\n\n#undef _FU\n\n\n"); - fd.write("\tvirtual void init() {\n\n"); + fd.write("\tvirtual void init() {\n\n"); - enum_value_count=0; + enum_value_count=0; - if (len(header_data.enums)): + if (len(header_data.enums)): - fd.write("\t\t//Written using math, given nonstandarity of 64 bits integer constants..\n"); - fd.write("\t\tstatic const Enum _enums[]={\n") + fd.write("\t\t//Written using math, given nonstandarity of 64 bits integer constants..\n"); + fd.write("\t\tstatic const Enum _enums[]={\n") - bitofs=len(header_data.conditionals) - enum_vals=[] + bitofs=len(header_data.conditionals) + enum_vals=[] - for xv in header_data.enums: - x=header_data.enums[xv] - bits=1 - amt = len(x); + for xv in header_data.enums: + x=header_data.enums[xv] + bits=1 + amt = len(x); # print(x) - while(2**bits < amt): - bits+=1 + while(2**bits < amt): + bits+=1 # print("amount: "+str(amt)+" bits "+str(bits)); - strs="{" - for i in range(amt): - strs+="\"#define "+x[i]+"\\n\"," + strs="{" + for i in range(amt): + strs+="\"#define "+x[i]+"\\n\"," - v={} - v["set_mask"]="uint64_t("+str(i)+")<<"+str(bitofs) - v["clear_mask"]="((uint64_t(1)<<40)-1) ^ (((uint64_t(1)<<"+str(bits)+") - 1)<<"+str(bitofs)+")" - enum_vals.append(v) - enum_constants.append(x[i]) + v={} + v["set_mask"]="uint64_t("+str(i)+")<<"+str(bitofs) + v["clear_mask"]="((uint64_t(1)<<40)-1) ^ (((uint64_t(1)<<"+str(bits)+") - 1)<<"+str(bitofs)+")" + enum_vals.append(v) + enum_constants.append(x[i]) - strs+="NULL}" + strs+="NULL}" - fd.write("\t\t\t{(uint64_t(1<<"+str(bits)+")-1)<<"+str(bitofs)+","+str(bitofs)+","+strs+"},\n"); - bitofs+=bits + fd.write("\t\t\t{(uint64_t(1<<"+str(bits)+")-1)<<"+str(bitofs)+","+str(bitofs)+","+strs+"},\n"); + bitofs+=bits - fd.write("\t\t};\n\n"); + fd.write("\t\t};\n\n"); - fd.write("\t\tstatic const EnumValue _enum_values[]={\n") + fd.write("\t\tstatic const EnumValue _enum_values[]={\n") - enum_value_count=len(enum_vals); - for x in enum_vals: - fd.write("\t\t\t{"+x["set_mask"]+","+x["clear_mask"]+"},\n"); + enum_value_count=len(enum_vals); + for x in enum_vals: + fd.write("\t\t\t{"+x["set_mask"]+","+x["clear_mask"]+"},\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic const Enum *_enums=NULL;\n") - fd.write("\t\tstatic const EnumValue *_enum_values=NULL;\n") + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic const Enum *_enums=NULL;\n") + fd.write("\t\tstatic const EnumValue *_enum_values=NULL;\n") - if (len(header_data.conditionals)): + if (len(header_data.conditionals)): - fd.write("\t\tstatic const char* _conditional_strings[]={\n") - if (len(header_data.conditionals)): - for x in header_data.conditionals: - fd.write("\t\t\t\"#define "+x+"\\n\",\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") + fd.write("\t\tstatic const char* _conditional_strings[]={\n") + if (len(header_data.conditionals)): + for x in header_data.conditionals: + fd.write("\t\t\t\"#define "+x+"\\n\",\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") - if (len(header_data.uniforms)): + if (len(header_data.uniforms)): - fd.write("\t\tstatic const char* _uniform_strings[]={\n") - if (len(header_data.uniforms)): - for x in header_data.uniforms: - fd.write("\t\t\t\""+x+"\",\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") + fd.write("\t\tstatic const char* _uniform_strings[]={\n") + if (len(header_data.uniforms)): + for x in header_data.uniforms: + fd.write("\t\t\t\""+x+"\",\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") - if output_attribs: - if (len(header_data.attributes)): + if output_attribs: + if (len(header_data.attributes)): - fd.write("\t\tstatic AttributePair _attribute_pairs[]={\n") - for x in header_data.attributes: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic AttributePair *_attribute_pairs=NULL;\n") + fd.write("\t\tstatic AttributePair _attribute_pairs[]={\n") + for x in header_data.attributes: + fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic AttributePair *_attribute_pairs=NULL;\n") - if (len(header_data.texunits)): - fd.write("\t\tstatic TexUnitPair _texunit_pairs[]={\n") - for x in header_data.texunits: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); - fd.write("\t\t};\n\n"); - else: - fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n") + if (len(header_data.texunits)): + fd.write("\t\tstatic TexUnitPair _texunit_pairs[]={\n") + for x in header_data.texunits: + fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t};\n\n"); + else: + fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n") - fd.write("\t\tstatic const char _vertex_code[]={\n") - for x in header_data.vertex_lines: - for i in range(len(x)): - fd.write(str(ord(x[i]))+","); + fd.write("\t\tstatic const char _vertex_code[]={\n") + for x in header_data.vertex_lines: + for i in range(len(x)): + fd.write(str(ord(x[i]))+","); - fd.write(str(ord('\n'))+","); - fd.write("\t\t0};\n\n"); + fd.write(str(ord('\n'))+","); + fd.write("\t\t0};\n\n"); - fd.write("\t\tstatic const int _vertex_code_start="+str(header_data.vertex_offset)+";\n") + fd.write("\t\tstatic const int _vertex_code_start="+str(header_data.vertex_offset)+";\n") - fd.write("\t\tstatic const char _fragment_code[]={\n") - for x in header_data.fragment_lines: - for i in range(len(x)): - fd.write(str(ord(x[i]))+","); + fd.write("\t\tstatic const char _fragment_code[]={\n") + for x in header_data.fragment_lines: + for i in range(len(x)): + fd.write(str(ord(x[i]))+","); - fd.write(str(ord('\n'))+","); - fd.write("\t\t0};\n\n"); + fd.write(str(ord('\n'))+","); + fd.write("\t\t0};\n\n"); - fd.write("\t\tstatic const int _fragment_code_start="+str(header_data.fragment_offset)+";\n") + fd.write("\t\tstatic const int _fragment_code_start="+str(header_data.fragment_offset)+";\n") - if output_attribs: - fd.write("\t\tsetup(_conditional_strings,"+str(len(header_data.conditionals))+",_uniform_strings,"+str(len(header_data.uniforms))+",_attribute_pairs,"+str(len(header_data.attributes))+", _texunit_pairs,"+str(len(header_data.texunits))+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") - else: - fd.write("\t\tsetup(_conditional_strings,"+str(len(header_data.conditionals))+",_uniform_strings,"+str(len(header_data.uniforms))+",_texunit_pairs,"+str(len(header_data.texunits))+",_enums,"+str(len(header_data.enums))+",_enum_values,"+str(enum_value_count)+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") + if output_attribs: + fd.write("\t\tsetup(_conditional_strings,"+str(len(header_data.conditionals))+",_uniform_strings,"+str(len(header_data.uniforms))+",_attribute_pairs,"+str(len(header_data.attributes))+", _texunit_pairs,"+str(len(header_data.texunits))+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") + else: + fd.write("\t\tsetup(_conditional_strings,"+str(len(header_data.conditionals))+",_uniform_strings,"+str(len(header_data.uniforms))+",_texunit_pairs,"+str(len(header_data.texunits))+",_enums,"+str(len(header_data.enums))+",_enum_values,"+str(enum_value_count)+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") - fd.write("\t};\n\n") + fd.write("\t};\n\n") - if (len(enum_constants)): + if (len(enum_constants)): - fd.write("\tenum EnumConditionals {\n") - for x in enum_constants: - fd.write("\t\t"+x.upper()+",\n"); - fd.write("\t};\n\n"); - fd.write("\tvoid set_enum_conditional(EnumConditionals p_cond) { _set_enum_conditional(p_cond); }\n") + fd.write("\tenum EnumConditionals {\n") + for x in enum_constants: + fd.write("\t\t"+x.upper()+",\n"); + fd.write("\t};\n\n"); + fd.write("\tvoid set_enum_conditional(EnumConditionals p_cond) { _set_enum_conditional(p_cond); }\n") - fd.write("};\n\n"); - fd.write("#endif\n\n"); - fd.close(); + fd.write("};\n\n"); + fd.write("#endif\n\n"); + fd.close(); def build_legacygl_headers( target, source, env ): - for x in source: + for x in source: - build_legacygl_header(str(x), include = "drivers/legacygl/shader_lgl.h", class_suffix = "LGL", output_attribs = False); + build_legacygl_header(str(x), include = "drivers/legacygl/shader_lgl.h", class_suffix = "LGL", output_attribs = False); - return 0 + return 0 def build_gles2_headers( target, source, env ): - for x in source: - build_legacygl_header(str(x), include="drivers/gles2/shader_gles2.h", class_suffix = "GLES2", output_attribs = True) + for x in source: + build_legacygl_header(str(x), include="drivers/gles2/shader_gles2.h", class_suffix = "GLES2", output_attribs = True) def update_version(): - rev = "custom_build" + rev = "custom_build" - if (os.getenv("BUILD_REVISION")!=None): - rev=os.getenv("BUILD_REVISION") - print("Using custom revision: "+rev) - import version + if (os.getenv("BUILD_REVISION")!=None): + rev=os.getenv("BUILD_REVISION") + print("Using custom revision: "+rev) + import version - f=open("core/version.h","wb") - f.write("#define VERSION_SHORT_NAME "+str(version.short_name)+"\n") - f.write("#define VERSION_NAME "+str(version.name)+"\n") - f.write("#define VERSION_MAJOR "+str(version.major)+"\n") - f.write("#define VERSION_MINOR "+str(version.minor)+"\n") - if (hasattr(version, 'patch')): - f.write("#define VERSION_PATCH "+str(version.patch)+"\n") - f.write("#define VERSION_REVISION "+str(rev)+"\n") - f.write("#define VERSION_STATUS "+str(version.status)+"\n") - import datetime - f.write("#define VERSION_YEAR "+str(datetime.datetime.now().year)+"\n") + f=open("core/version.h","wb") + f.write("#define VERSION_SHORT_NAME "+str(version.short_name)+"\n") + f.write("#define VERSION_NAME "+str(version.name)+"\n") + f.write("#define VERSION_MAJOR "+str(version.major)+"\n") + f.write("#define VERSION_MINOR "+str(version.minor)+"\n") + if (hasattr(version, 'patch')): + f.write("#define VERSION_PATCH "+str(version.patch)+"\n") + f.write("#define VERSION_REVISION "+str(rev)+"\n") + f.write("#define VERSION_STATUS "+str(version.status)+"\n") + import datetime + f.write("#define VERSION_YEAR "+str(datetime.datetime.now().year)+"\n") def parse_cg_file(fname, uniforms, sizes, conditionals): - import re - fs = open(fname, "r") - line=fs.readline() + import re + fs = open(fname, "r") + line=fs.readline() - while line: + while line: - if re.match(r"^\s*uniform", line): + if re.match(r"^\s*uniform", line): - res = re.match(r"uniform ([\d\w]*) ([\d\w]*)") - type = res.groups(1) - name = res.groups(2) + res = re.match(r"uniform ([\d\w]*) ([\d\w]*)") + type = res.groups(1) + name = res.groups(2) - uniforms.append(name); + uniforms.append(name); - if (type.find("texobj") != -1): - sizes.append(1); - else: - t = re.match(r"float(\d)x(\d)", type); - if t: - sizes.append(int(t.groups(1)) * int(t.groups(2))) - else: - t = re.match(r"float(\d)", type); - sizes.append(int(t.groups(1))) + if (type.find("texobj") != -1): + sizes.append(1); + else: + t = re.match(r"float(\d)x(\d)", type); + if t: + sizes.append(int(t.groups(1)) * int(t.groups(2))) + else: + t = re.match(r"float(\d)", type); + sizes.append(int(t.groups(1))) - if line.find("[branch]") != -1: - conditionals.append(name); + if line.find("[branch]") != -1: + conditionals.append(name); - line = fs.readline(); + line = fs.readline(); def build_cg_shader(sname): - vp_uniforms = [] - vp_uniform_sizes = [] - vp_conditionals = [] + vp_uniforms = [] + vp_uniform_sizes = [] + vp_conditionals = [] - parse_cg_file("vp_"+sname+".cg", vp_uniforms, vp_uniform_sizes, vp_conditionals); + parse_cg_file("vp_"+sname+".cg", vp_uniforms, vp_uniform_sizes, vp_conditionals); - fp_uniforms = [] - fp_uniform_sizes = [] - fp_conditionals = [] + fp_uniforms = [] + fp_uniform_sizes = [] + fp_conditionals = [] - parse_cg_file("fp_"+sname+".cg", fp_uniforms, fp_uniform_sizes, fp_conditionals); + parse_cg_file("fp_"+sname+".cg", fp_uniforms, fp_uniform_sizes, fp_conditionals); - fd = open("shader_"+sname+".cg.h", "w"); + fd = open("shader_"+sname+".cg.h", "w"); - fd.write('\n#include "shader_cell.h"\n'); - fd.write("\nclass Shader_" + sname + " : public ShaderCell {\n"); - fd.write("\n\tstatic struct VertexUniforms[] = {\n"); + fd.write('\n#include "shader_cell.h"\n'); + fd.write("\nclass Shader_" + sname + " : public ShaderCell {\n"); + fd.write("\n\tstatic struct VertexUniforms[] = {\n"); - offset = 0; - for i in range(0, len(vp_uniforms)): + offset = 0; + for i in range(0, len(vp_uniforms)): - fd.write('\t\t{ "%s", %d, %d },\n' % (vp_uniforms[i], offset, vp_uniform_sizes[i])) - offset = offset + vp_uniform_sizes[i]; - fd.write("\t};\n\n"); + fd.write('\t\t{ "%s", %d, %d },\n' % (vp_uniforms[i], offset, vp_uniform_sizes[i])) + offset = offset + vp_uniform_sizes[i]; + fd.write("\t};\n\n"); - fd.write("public:\n\n"); + fd.write("public:\n\n"); - fd.write("\tenum {\n"); + fd.write("\tenum {\n"); - for i in range(0, len(vp_uniforms)): + for i in range(0, len(vp_uniforms)): - fd.write('\t\tVP_%s,\n' % vp_uniforms[i].upper()) + fd.write('\t\tVP_%s,\n' % vp_uniforms[i].upper()) - fd.write("\t};\n"); + fd.write("\t};\n"); import glob def detect_modules(): - module_list=[] - includes_cpp="" - register_cpp="" - unregister_cpp="" + module_list=[] + includes_cpp="" + register_cpp="" + unregister_cpp="" - files = glob.glob("modules/*") - files.sort() #so register_module_types does not change that often, and also plugins are registered in alphabetic order - for x in files: - if (not os.path.isdir(x)): - continue - x=x.replace("modules/","") # rest of world - x=x.replace("modules\\","") # win32 - module_list.append(x) - try: - with open("modules/"+x+"/register_types.h"): - includes_cpp+='#include "modules/'+x+'/register_types.h"\n' - register_cpp+='#ifdef MODULE_'+x.upper()+'_ENABLED\n' - register_cpp+='\tregister_'+x+'_types();\n' - register_cpp+='#endif\n' - unregister_cpp+='#ifdef MODULE_'+x.upper()+'_ENABLED\n' - unregister_cpp+='\tunregister_'+x+'_types();\n' - unregister_cpp+='#endif\n' - except IOError: - pass + files = glob.glob("modules/*") + files.sort() #so register_module_types does not change that often, and also plugins are registered in alphabetic order + for x in files: + if (not os.path.isdir(x)): + continue + x=x.replace("modules/","") # rest of world + x=x.replace("modules\\","") # win32 + module_list.append(x) + try: + with open("modules/"+x+"/register_types.h"): + includes_cpp+='#include "modules/'+x+'/register_types.h"\n' + register_cpp+='#ifdef MODULE_'+x.upper()+'_ENABLED\n' + register_cpp+='\tregister_'+x+'_types();\n' + register_cpp+='#endif\n' + unregister_cpp+='#ifdef MODULE_'+x.upper()+'_ENABLED\n' + unregister_cpp+='\tunregister_'+x+'_types();\n' + unregister_cpp+='#endif\n' + except IOError: + pass - modules_cpp=""" + modules_cpp=""" // modules.cpp - THIS FILE IS GENERATED, DO NOT EDIT!!!!!!! #include "register_module_types.h" @@ -1254,31 +1254,31 @@ void unregister_module_types() { """ - f=open("modules/register_module_types.cpp","wb") - f.write(modules_cpp) + f=open("modules/register_module_types.cpp","wb") + f.write(modules_cpp) - return module_list + return module_list def win32_spawn(sh, escape, cmd, args, env): - import subprocess - newargs = ' '.join(args[1:]) - cmdline = cmd + " " + newargs - startupinfo = subprocess.STARTUPINFO() - #startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - for e in env: - if type(env[e]) != type(""): - env[e] = str(env[e]) - proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, startupinfo=startupinfo, shell = False, env = env) - data, err = proc.communicate() - rv = proc.wait() - if rv: - print "=====" - print err - print "=====" - return rv + import subprocess + newargs = ' '.join(args[1:]) + cmdline = cmd + " " + newargs + startupinfo = subprocess.STARTUPINFO() + #startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + for e in env: + if type(env[e]) != type(""): + env[e] = str(env[e]) + proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, startupinfo=startupinfo, shell = False, env = env) + data, err = proc.communicate() + rv = proc.wait() + if rv: + print "=====" + print err + print "=====" + return rv """ def win32_spawn(sh, escape, cmd, args, spawnenv): @@ -1310,47 +1310,47 @@ def win32_spawn(sh, escape, cmd, args, spawnenv): """ def android_add_maven_repository(self,url): - self.android_maven_repos.append(url) + self.android_maven_repos.append(url) def android_add_dependency(self,depline): - self.android_dependencies.append(depline) + self.android_dependencies.append(depline) def android_add_java_dir(self,subpath): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath - self.android_java_dirs.append(base_path) + base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath + self.android_java_dirs.append(base_path) def android_add_res_dir(self,subpath): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath - self.android_res_dirs.append(base_path) + base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath + self.android_res_dirs.append(base_path) def android_add_aidl_dir(self,subpath): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath - self.android_aidl_dirs.append(base_path) + base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath + self.android_aidl_dirs.append(base_path) def android_add_jni_dir(self,subpath): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath - self.android_jni_dirs.append(base_path) + base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath + self.android_jni_dirs.append(base_path) def android_add_default_config(self,config): - self.android_default_config.append(config) - + self.android_default_config.append(config) + def android_add_to_manifest(self,file): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+file - f = open(base_path,"rb") - self.android_manifest_chunk+=f.read() + base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+file + f = open(base_path,"rb") + self.android_manifest_chunk+=f.read() def android_add_to_permissions(self,file): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+file - f = open(base_path,"rb") - self.android_permission_chunk+=f.read() + base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+file + f = open(base_path,"rb") + self.android_permission_chunk+=f.read() def android_add_to_attributes(self,file): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+file - f = open(base_path,"rb") - self.android_appattributes_chunk+=f.read() + base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+file + f = open(base_path,"rb") + self.android_appattributes_chunk+=f.read() def disable_module(self): - self.disabled_modules.append(self.current_module) + self.disabled_modules.append(self.current_module) def use_windows_spawn_fix(self, platform=None): if (os.name!="nt"): - return #not needed, only for windows + return #not needed, only for windows # On Windows, due to the limited command line length, when creating a static library # from a very high number of objects SCons will invoke "ar" once per object file; @@ -1365,211 +1365,211 @@ def use_windows_spawn_fix(self, platform=None): import subprocess def mySubProcess(cmdline,env): - prefix = "" - if(platform == 'javascript'): - prefix = "python.exe " + prefix = "" + if(platform == 'javascript'): + prefix = "python.exe " - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - proc = subprocess.Popen(prefix + cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, startupinfo=startupinfo, shell = False, env = env) - data, err = proc.communicate() - rv = proc.wait() - if rv: - print "=====" - print err - print "=====" - return rv + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + proc = subprocess.Popen(prefix + cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, startupinfo=startupinfo, shell = False, env = env) + data, err = proc.communicate() + rv = proc.wait() + if rv: + print "=====" + print err + print "=====" + return rv def mySpawn(sh, escape, cmd, args, env): - newargs = ' '.join(args[1:]) - cmdline = cmd + " " + newargs + newargs = ' '.join(args[1:]) + cmdline = cmd + " " + newargs - rv=0 - env = {str(key): str(value) for key, value in env.iteritems()} - if len(cmdline) > 32000 and cmd.endswith("ar") : - cmdline = cmd + " " + args[1] + " " + args[2] + " " - for i in range(3,len(args)) : - rv = mySubProcess( cmdline + args[i], env ) - if rv : - break - else: - rv = mySubProcess( cmdline, env ) + rv=0 + env = {str(key): str(value) for key, value in env.iteritems()} + if len(cmdline) > 32000 and cmd.endswith("ar") : + cmdline = cmd + " " + args[1] + " " + args[2] + " " + for i in range(3,len(args)) : + rv = mySubProcess( cmdline + args[i], env ) + if rv : + break + else: + rv = mySubProcess( cmdline, env ) - return rv + return rv self['SPAWN'] = mySpawn def split_lib(self, libname): - import string - env = self + import string + env = self - num = 0 - cur_base = "" - max_src = 64 - list = [] - lib_list = [] + num = 0 + cur_base = "" + max_src = 64 + list = [] + lib_list = [] - for f in getattr(env, libname + "_sources"): - fname = "" - if type(f) == type(""): - fname = env.File(f).path - else: - fname = env.File(f)[0].path - fname = fname.replace("\\", "/") - base = string.join(fname.split("/")[:2], "/") - if base != cur_base and len(list) > max_src: - if num > 0: - lib = env.Library(libname + str(num), list) - lib_list.append(lib) - list = [] - num = num + 1 - cur_base = base - list.append(f) + for f in getattr(env, libname + "_sources"): + fname = "" + if type(f) == type(""): + fname = env.File(f).path + else: + fname = env.File(f)[0].path + fname = fname.replace("\\", "/") + base = string.join(fname.split("/")[:2], "/") + if base != cur_base and len(list) > max_src: + if num > 0: + lib = env.Library(libname + str(num), list) + lib_list.append(lib) + list = [] + num = num + 1 + cur_base = base + list.append(f) - lib = env.Library(libname + str(num), list) - lib_list.append(lib) + lib = env.Library(libname + str(num), list) + lib_list.append(lib) - if len(lib_list) > 0: - import os, sys - if os.name == 'posix' and sys.platform == 'msys': - env.Replace(ARFLAGS = ['rcsT']) - lib = env.Library(libname + "_collated", lib_list) - lib_list = [lib] + if len(lib_list) > 0: + import os, sys + if os.name == 'posix' and sys.platform == 'msys': + env.Replace(ARFLAGS = ['rcsT']) + lib = env.Library(libname + "_collated", lib_list) + lib_list = [lib] - lib_base = [] - env.add_source_files(lib_base, "*.cpp") - lib_list.insert(0, env.Library(libname, lib_base)) + lib_base = [] + env.add_source_files(lib_base, "*.cpp") + lib_list.insert(0, env.Library(libname, lib_base)) - env.Prepend(LIBS = lib_list) + env.Prepend(LIBS = lib_list) def save_active_platforms(apnames,ap): - for x in ap: - pth = x+"/logo.png" + for x in ap: + pth = x+"/logo.png" # print("open path: "+pth) - pngf=open(pth,"rb"); - b=pngf.read(1); - str=" /* AUTOGENERATED FILE, DO NOT EDIT */ \n" - str+=" static const unsigned char _"+x[9:]+"_logo[]={" - while(len(b)==1): - str+=hex(ord(b)) - b=pngf.read(1); - if (len(b)==1): - str+="," + pngf=open(pth,"rb"); + b=pngf.read(1); + str=" /* AUTOGENERATED FILE, DO NOT EDIT */ \n" + str+=" static const unsigned char _"+x[9:]+"_logo[]={" + while(len(b)==1): + str+=hex(ord(b)) + b=pngf.read(1); + if (len(b)==1): + str+="," - str+="};\n" + str+="};\n" - wf = x+"/logo.h" - logow = open(wf,"wb") - logow.write(str) + wf = x+"/logo.h" + logow = open(wf,"wb") + logow.write(str) def no_verbose(sys,env): - #If the output is not a terminal, do nothing - if not sys.stdout.isatty(): - return + #If the output is not a terminal, do nothing + if not sys.stdout.isatty(): + return - colors = {} - colors['cyan'] = '\033[96m' - colors['purple'] = '\033[95m' - colors['blue'] = '\033[94m' - colors['green'] = '\033[92m' - colors['yellow'] = '\033[93m' - colors['red'] = '\033[91m' - colors['end'] = '\033[0m' + colors = {} + colors['cyan'] = '\033[96m' + colors['purple'] = '\033[95m' + colors['blue'] = '\033[94m' + colors['green'] = '\033[92m' + colors['yellow'] = '\033[93m' + colors['red'] = '\033[91m' + colors['end'] = '\033[0m' - compile_source_message = '%sCompiling %s==> %s$SOURCE%s' % (colors['blue'], colors['purple'], colors['yellow'], colors['end']) - java_compile_source_message = '%sCompiling %s==> %s$SOURCE%s' % (colors['blue'], colors['purple'], colors['yellow'], colors['end']) - compile_shared_source_message = '%sCompiling shared %s==> %s$SOURCE%s' % (colors['blue'], colors['purple'], colors['yellow'], colors['end']) - link_program_message = '%sLinking Program %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) - link_library_message = '%sLinking Static Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) - ranlib_library_message = '%sRanlib Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) - link_shared_library_message = '%sLinking Shared Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) - java_library_message = '%sCreating Java Archive %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) + compile_source_message = '%sCompiling %s==> %s$SOURCE%s' % (colors['blue'], colors['purple'], colors['yellow'], colors['end']) + java_compile_source_message = '%sCompiling %s==> %s$SOURCE%s' % (colors['blue'], colors['purple'], colors['yellow'], colors['end']) + compile_shared_source_message = '%sCompiling shared %s==> %s$SOURCE%s' % (colors['blue'], colors['purple'], colors['yellow'], colors['end']) + link_program_message = '%sLinking Program %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) + link_library_message = '%sLinking Static Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) + ranlib_library_message = '%sRanlib Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) + link_shared_library_message = '%sLinking Shared Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) + java_library_message = '%sCreating Java Archive %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) - env.Append( CXXCOMSTR=[compile_source_message] ) - env.Append( CCCOMSTR=[compile_source_message] ) - env.Append( SHCCCOMSTR=[compile_shared_source_message] ) - env.Append( SHCXXCOMSTR=[compile_shared_source_message] ) - env.Append( ARCOMSTR=[link_library_message] ) - env.Append( RANLIBCOMSTR=[ranlib_library_message] ) - env.Append( SHLINKCOMSTR=[link_shared_library_message] ) - env.Append( LINKCOMSTR=[link_program_message] ) - env.Append( JARCOMSTR=[java_library_message] ) - env.Append( JAVACCOMSTR=[java_compile_source_message] ) + env.Append( CXXCOMSTR=[compile_source_message] ) + env.Append( CCCOMSTR=[compile_source_message] ) + env.Append( SHCCCOMSTR=[compile_shared_source_message] ) + env.Append( SHCXXCOMSTR=[compile_shared_source_message] ) + env.Append( ARCOMSTR=[link_library_message] ) + env.Append( RANLIBCOMSTR=[ranlib_library_message] ) + env.Append( SHLINKCOMSTR=[link_shared_library_message] ) + env.Append( LINKCOMSTR=[link_program_message] ) + env.Append( JARCOMSTR=[java_library_message] ) + env.Append( JAVACCOMSTR=[java_compile_source_message] ) def detect_visual_c_compiler_version(tools_env): - # tools_env is the variable scons uses to call tools that execute tasks, SCons's env['ENV'] that executes tasks... - # (see the SCons documentation for more information on what it does)... - # in order for this function to be well encapsulated i choose to force it to recieve SCons's TOOLS env (env['ENV'] - # and not scons setup environment (env)... so make sure you call the right environment on it or it will fail to detect - # the propper vc version that will be called + # tools_env is the variable scons uses to call tools that execute tasks, SCons's env['ENV'] that executes tasks... + # (see the SCons documentation for more information on what it does)... + # in order for this function to be well encapsulated i choose to force it to recieve SCons's TOOLS env (env['ENV'] + # and not scons setup environment (env)... so make sure you call the right environment on it or it will fail to detect + # the propper vc version that will be called - # These is no flag to give to visual c compilers to set the architecture, ie scons bits argument (32,64,ARM etc) - # There are many different cl.exe files that are run, and each one compiles & links to a different architecture - # As far as I know, the only way to figure out what compiler will be run when Scons calls cl.exe via Program() - # is to check the PATH varaible and figure out which one will be called first. Code bellow does that and returns: - # the following string values: + # These is no flag to give to visual c compilers to set the architecture, ie scons bits argument (32,64,ARM etc) + # There are many different cl.exe files that are run, and each one compiles & links to a different architecture + # As far as I know, the only way to figure out what compiler will be run when Scons calls cl.exe via Program() + # is to check the PATH varaible and figure out which one will be called first. Code bellow does that and returns: + # the following string values: - # "" Compiler not detected - # "amd64" Native 64 bit compiler - # "amd64_x86" 64 bit Cross Compiler for 32 bit - # "x86" Native 32 bit compiler - # "x86_amd64" 32 bit Cross Compiler for 64 bit + # "" Compiler not detected + # "amd64" Native 64 bit compiler + # "amd64_x86" 64 bit Cross Compiler for 32 bit + # "x86" Native 32 bit compiler + # "x86_amd64" 32 bit Cross Compiler for 64 bit - # There are other architectures, but Godot does not support them currently, so this function does not detect arm/amd64_arm - # and similar architectures/compilers + # There are other architectures, but Godot does not support them currently, so this function does not detect arm/amd64_arm + # and similar architectures/compilers - # Set chosen compiler to "not detected" - vc_chosen_compiler_index = -1 - vc_chosen_compiler_str = "" + # Set chosen compiler to "not detected" + vc_chosen_compiler_index = -1 + vc_chosen_compiler_str = "" - # find() works with -1 so big ifs bellow are needed... the simplest solution, in fact - # First test if amd64 and amd64_x86 compilers are present in the path - vc_amd64_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"]+"BIN\\amd64;") - if(vc_amd64_compiler_detection_index > -1): - vc_chosen_compiler_index = vc_amd64_compiler_detection_index - vc_chosen_compiler_str = "amd64" + # find() works with -1 so big ifs bellow are needed... the simplest solution, in fact + # First test if amd64 and amd64_x86 compilers are present in the path + vc_amd64_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"]+"BIN\\amd64;") + if(vc_amd64_compiler_detection_index > -1): + vc_chosen_compiler_index = vc_amd64_compiler_detection_index + vc_chosen_compiler_str = "amd64" - vc_amd64_x86_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"]+"BIN\\amd64_x86;") - if(vc_amd64_x86_compiler_detection_index > -1 - and (vc_chosen_compiler_index == -1 - or vc_chosen_compiler_index > vc_amd64_x86_compiler_detection_index)): - vc_chosen_compiler_index = vc_amd64_x86_compiler_detection_index - vc_chosen_compiler_str = "amd64_x86" + vc_amd64_x86_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"]+"BIN\\amd64_x86;") + if(vc_amd64_x86_compiler_detection_index > -1 + and (vc_chosen_compiler_index == -1 + or vc_chosen_compiler_index > vc_amd64_x86_compiler_detection_index)): + vc_chosen_compiler_index = vc_amd64_x86_compiler_detection_index + vc_chosen_compiler_str = "amd64_x86" - # Now check the 32 bit compilers - vc_x86_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"]+"BIN;") - if(vc_x86_compiler_detection_index > -1 - and (vc_chosen_compiler_index == -1 - or vc_chosen_compiler_index > vc_x86_compiler_detection_index)): - vc_chosen_compiler_index = vc_x86_compiler_detection_index - vc_chosen_compiler_str = "x86" + # Now check the 32 bit compilers + vc_x86_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"]+"BIN;") + if(vc_x86_compiler_detection_index > -1 + and (vc_chosen_compiler_index == -1 + or vc_chosen_compiler_index > vc_x86_compiler_detection_index)): + vc_chosen_compiler_index = vc_x86_compiler_detection_index + vc_chosen_compiler_str = "x86" - vc_x86_amd64_compiler_detection_index = tools_env["PATH"].find(tools_env['VCINSTALLDIR']+"BIN\\x86_amd64;") - if(vc_x86_amd64_compiler_detection_index > -1 - and (vc_chosen_compiler_index == -1 - or vc_chosen_compiler_index > vc_x86_amd64_compiler_detection_index)): - vc_chosen_compiler_index = vc_x86_amd64_compiler_detection_index - vc_chosen_compiler_str = "x86_amd64" + vc_x86_amd64_compiler_detection_index = tools_env["PATH"].find(tools_env['VCINSTALLDIR']+"BIN\\x86_amd64;") + if(vc_x86_amd64_compiler_detection_index > -1 + and (vc_chosen_compiler_index == -1 + or vc_chosen_compiler_index > vc_x86_amd64_compiler_detection_index)): + vc_chosen_compiler_index = vc_x86_amd64_compiler_detection_index + vc_chosen_compiler_str = "x86_amd64" - # debug help - #print vc_amd64_compiler_detection_index - #print vc_amd64_x86_compiler_detection_index - #print vc_x86_compiler_detection_index - #print vc_x86_amd64_compiler_detection_index - #print "chosen "+str(vc_chosen_compiler_index)+ " | "+str(vc_chosen_compiler_str) + # debug help + #print vc_amd64_compiler_detection_index + #print vc_amd64_x86_compiler_detection_index + #print vc_x86_compiler_detection_index + #print vc_x86_amd64_compiler_detection_index + #print "chosen "+str(vc_chosen_compiler_index)+ " | "+str(vc_chosen_compiler_str) - return vc_chosen_compiler_str + return vc_chosen_compiler_str def precious_program(env, program, sources, **args): - program = env.ProgramOriginal(program, sources, **args) - env.Precious(program) - return program + program = env.ProgramOriginal(program, sources, **args) + env.Precious(program) + return program diff --git a/modules/SCsub b/modules/SCsub index 40842480868..76ba9116121 100644 --- a/modules/SCsub +++ b/modules/SCsub @@ -7,16 +7,16 @@ env_modules = env.Clone() Export('env_modules') env.modules_sources=[ - "register_module_types.cpp", + "register_module_types.cpp", ] #env.add_source_files(env.modules_sources,"*.cpp") Export('env') for x in env.module_list: - if (x in env.disabled_modules): - continue - env_modules.Append(CPPFLAGS=["-DMODULE_"+x.upper()+"_ENABLED"]) - SConscript(x+"/SCsub") + if (x in env.disabled_modules): + continue + env_modules.Append(CPPFLAGS=["-DMODULE_"+x.upper()+"_ENABLED"]) + SConscript(x+"/SCsub") lib = env_modules.Library("modules",env.modules_sources) diff --git a/modules/chibi/config.py b/modules/chibi/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/chibi/config.py +++ b/modules/chibi/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/cscript/config.py b/modules/cscript/config.py index ea7e83378a9..ae0a3d75eed 100644 --- a/modules/cscript/config.py +++ b/modules/cscript/config.py @@ -1,11 +1,11 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/dds/config.py b/modules/dds/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/dds/config.py +++ b/modules/dds/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/enet/SCsub b/modules/enet/SCsub index 5175803f44c..14aa59fdb8c 100644 --- a/modules/enet/SCsub +++ b/modules/enet/SCsub @@ -8,21 +8,21 @@ Import('env_modules') env_enet = env_modules.Clone() if (env["enet"] != "system"): # builtin - thirdparty_dir = "#thirdparty/enet/" - thirdparty_sources = [ - "callbacks.c", - "compress.c", - "host.c", - "list.c", - "packet.c", - "peer.c", - "protocol.c", - "unix.c", - "win32.c", - ] - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + thirdparty_dir = "#thirdparty/enet/" + thirdparty_sources = [ + "callbacks.c", + "compress.c", + "host.c", + "list.c", + "packet.c", + "peer.c", + "protocol.c", + "unix.c", + "win32.c", + ] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - env_enet.add_source_files(env.modules_sources, thirdparty_sources) - env_enet.Append(CPPPATH = [thirdparty_dir]) + env_enet.add_source_files(env.modules_sources, thirdparty_sources) + env_enet.Append(CPPPATH = [thirdparty_dir]) env_enet.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/enet/config.py b/modules/enet/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/enet/config.py +++ b/modules/enet/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/etc1/SCsub b/modules/etc1/SCsub index ea035fcde33..3d19e7c9a76 100644 --- a/modules/etc1/SCsub +++ b/modules/etc1/SCsub @@ -9,7 +9,7 @@ env_etc1 = env_modules.Clone() # Not unbundled so far since not widespread as shared library thirdparty_dir = "#thirdparty/rg-etc1/" thirdparty_sources = [ - "rg_etc1.cpp", + "rg_etc1.cpp", ] thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] diff --git a/modules/etc1/config.py b/modules/etc1/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/etc1/config.py +++ b/modules/etc1/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/freetype/SCsub b/modules/freetype/SCsub index 1f759dee9ac..0e21deac6a5 100644 --- a/modules/freetype/SCsub +++ b/modules/freetype/SCsub @@ -6,65 +6,65 @@ Import('env') # Thirdparty source files if (env["freetype"] != "system"): # builtin - thirdparty_dir = "#thirdparty/freetype/" - thirdparty_sources = [ - "src/autofit/autofit.c", - "src/base/ftapi.c", - "src/base/ftbase.c", - "src/base/ftbbox.c", - "src/base/ftbdf.c", - "src/base/ftbitmap.c", - "src/base/ftcid.c", - "src/base/ftdebug.c", - "src/base/ftfntfmt.c", - "src/base/ftfstype.c", - "src/base/ftgasp.c", - "src/base/ftglyph.c", - "src/base/ftgxval.c", - "src/base/ftinit.c", - "src/base/ftlcdfil.c", - "src/base/ftmm.c", - "src/base/ftotval.c", - "src/base/ftpatent.c", - "src/base/ftpfr.c", - "src/base/ftpic.c", - "src/base/ftstroke.c", - "src/base/ftsynth.c", - "src/base/ftsystem.c", - "src/base/fttype1.c", - "src/base/ftwinfnt.c", - "src/bdf/bdf.c", - "src/cache/ftcache.c", - "src/cff/cff.c", - "src/cid/type1cid.c", - "src/gxvalid/gxvalid.c", - "src/otvalid/otvalid.c", - "src/pcf/pcf.c", - "src/pfr/pfr.c", - "src/psaux/psaux.c", - "src/pshinter/pshinter.c", - "src/psnames/psnames.c", - "src/raster/raster.c", - "src/sfnt/sfnt.c", - "src/smooth/smooth.c", - "src/truetype/truetype.c", - "src/type1/type1.c", - "src/type42/type42.c", - "src/winfonts/winfnt.c", - ] - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + thirdparty_dir = "#thirdparty/freetype/" + thirdparty_sources = [ + "src/autofit/autofit.c", + "src/base/ftapi.c", + "src/base/ftbase.c", + "src/base/ftbbox.c", + "src/base/ftbdf.c", + "src/base/ftbitmap.c", + "src/base/ftcid.c", + "src/base/ftdebug.c", + "src/base/ftfntfmt.c", + "src/base/ftfstype.c", + "src/base/ftgasp.c", + "src/base/ftglyph.c", + "src/base/ftgxval.c", + "src/base/ftinit.c", + "src/base/ftlcdfil.c", + "src/base/ftmm.c", + "src/base/ftotval.c", + "src/base/ftpatent.c", + "src/base/ftpfr.c", + "src/base/ftpic.c", + "src/base/ftstroke.c", + "src/base/ftsynth.c", + "src/base/ftsystem.c", + "src/base/fttype1.c", + "src/base/ftwinfnt.c", + "src/bdf/bdf.c", + "src/cache/ftcache.c", + "src/cff/cff.c", + "src/cid/type1cid.c", + "src/gxvalid/gxvalid.c", + "src/otvalid/otvalid.c", + "src/pcf/pcf.c", + "src/pfr/pfr.c", + "src/psaux/psaux.c", + "src/pshinter/pshinter.c", + "src/psnames/psnames.c", + "src/raster/raster.c", + "src/sfnt/sfnt.c", + "src/smooth/smooth.c", + "src/truetype/truetype.c", + "src/type1/type1.c", + "src/type42/type42.c", + "src/winfonts/winfnt.c", + ] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - # Include header for WinRT to fix build issues - if "platform" in env and env["platform"] == "winrt": - env.Append(CCFLAGS = ['/FI', '"modules/freetype/winrtdef.h"']) + # Include header for WinRT to fix build issues + if "platform" in env and env["platform"] == "winrt": + env.Append(CCFLAGS = ['/FI', '"modules/freetype/winrtdef.h"']) - env.Append(CPPPATH = [thirdparty_dir, thirdparty_dir + "/include"]) + env.Append(CPPPATH = [thirdparty_dir, thirdparty_dir + "/include"]) - # also requires libpng headers - if (env["libpng"] != "system"): # builtin - env.Append(CPPPATH = ["#thirdparty/libpng"]) + # also requires libpng headers + if (env["libpng"] != "system"): # builtin + env.Append(CPPPATH = ["#thirdparty/libpng"]) - """ FIXME: Remove this commented code if Windows can handle the monolithic lib + """ FIXME: Remove this commented code if Windows can handle the monolithic lib # fix for Windows' shell miserably failing on long lines, split in two libraries half1 = [] half2 = [] @@ -80,8 +80,8 @@ if (env["freetype"] != "system"): # builtin env.Append(LIBS = [lib]) """ - lib = env.Library("freetype_builtin", thirdparty_sources) - env.Append(LIBS = [lib]) + lib = env.Library("freetype_builtin", thirdparty_sources) + env.Append(LIBS = [lib]) # Godot source files env.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/freetype/config.py b/modules/freetype/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/freetype/config.py +++ b/modules/freetype/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/gdscript/config.py b/modules/gdscript/config.py index ea7e83378a9..ae0a3d75eed 100644 --- a/modules/gdscript/config.py +++ b/modules/gdscript/config.py @@ -1,11 +1,11 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/gridmap/config.py b/modules/gridmap/config.py index ea7e83378a9..ae0a3d75eed 100644 --- a/modules/gridmap/config.py +++ b/modules/gridmap/config.py @@ -1,11 +1,11 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/ik/config.py b/modules/ik/config.py index f9bd7da08d3..ae0a3d75eed 100644 --- a/modules/ik/config.py +++ b/modules/ik/config.py @@ -1,11 +1,11 @@ def can_build(platform): - return True - - + return True + + def configure(env): - pass - - - + pass + + + diff --git a/modules/jpg/SCsub b/modules/jpg/SCsub index 28fb81895de..1ecc9c0fd66 100644 --- a/modules/jpg/SCsub +++ b/modules/jpg/SCsub @@ -9,7 +9,7 @@ env_jpg = env_modules.Clone() # Not unbundled for now as they are not commonly available as shared library thirdparty_dir = "#thirdparty/jpeg-compressor/" thirdparty_sources = [ - "jpgd.cpp", + "jpgd.cpp", ] thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] diff --git a/modules/jpg/config.py b/modules/jpg/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/jpg/config.py +++ b/modules/jpg/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/mpc/SCsub b/modules/mpc/SCsub index 09f0c05daa9..deadf8687d8 100644 --- a/modules/mpc/SCsub +++ b/modules/mpc/SCsub @@ -7,22 +7,22 @@ env_mpc = env_modules.Clone() # Thirdparty source files if (env["libmpcdec"] != "system"): # builtin - thirdparty_dir = "#thirdparty/libmpcdec/" - thirdparty_sources = [ - "huffman.c", - "mpc_bits_reader.c", - "mpc_decoder.c", - "mpc_demux.c", - "mpc_reader.c", - "requant.c", - "streaminfo.c", - "synth_filter.c", - ] + thirdparty_dir = "#thirdparty/libmpcdec/" + thirdparty_sources = [ + "huffman.c", + "mpc_bits_reader.c", + "mpc_decoder.c", + "mpc_demux.c", + "mpc_reader.c", + "requant.c", + "streaminfo.c", + "synth_filter.c", + ] - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - env_mpc.add_source_files(env.modules_sources, thirdparty_sources) - env_mpc.Append(CPPPATH = [thirdparty_dir]) + env_mpc.add_source_files(env.modules_sources, thirdparty_sources) + env_mpc.Append(CPPPATH = [thirdparty_dir]) # Godot source files env_mpc.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/mpc/config.py b/modules/mpc/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/mpc/config.py +++ b/modules/mpc/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/ogg/SCsub b/modules/ogg/SCsub index 2e1fe2e0c0d..3946e548427 100644 --- a/modules/ogg/SCsub +++ b/modules/ogg/SCsub @@ -7,15 +7,15 @@ env_ogg = env_modules.Clone() # Thirdparty source files if (env["libogg"] != "system"): # builtin - thirdparty_dir = "#thirdparty/libogg/" - thirdparty_sources = [ - "bitwise.c", - "framing.c", - ] - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + thirdparty_dir = "#thirdparty/libogg/" + thirdparty_sources = [ + "bitwise.c", + "framing.c", + ] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - env_ogg.add_source_files(env.modules_sources, thirdparty_sources) - env_ogg.Append(CPPPATH = [thirdparty_dir]) + env_ogg.add_source_files(env.modules_sources, thirdparty_sources) + env_ogg.Append(CPPPATH = [thirdparty_dir]) # Godot source files env_ogg.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/ogg/config.py b/modules/ogg/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/ogg/config.py +++ b/modules/ogg/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/openssl/SCsub b/modules/openssl/SCsub index 2327cf483cb..325816fb19c 100644 --- a/modules/openssl/SCsub +++ b/modules/openssl/SCsub @@ -7,672 +7,672 @@ env_openssl = env_modules.Clone() # Thirdparty source files if (env["openssl"] != "system"): # builtin - thirdparty_dir = "#thirdparty/openssl/" + thirdparty_dir = "#thirdparty/openssl/" - thirdparty_sources = [ - "ssl/t1_lib.c", - "ssl/t1_ext.c", - "ssl/s3_srvr.c", - "ssl/t1_enc.c", - "ssl/t1_meth.c", - "ssl/s23_clnt.c", - "ssl/ssl_asn1.c", - "ssl/tls_srp.c", - "ssl/kssl.c", - "ssl/d1_both.c", - "ssl/t1_clnt.c", - "ssl/bio_ssl.c", - "ssl/d1_srtp.c", - "ssl/t1_reneg.c", - "ssl/ssl_cert.c", - "ssl/s3_lib.c", - "ssl/d1_srvr.c", - "ssl/s23_meth.c", - "ssl/ssl_stat.c", - "ssl/ssl_err.c", - "ssl/ssl_algs.c", - "ssl/s3_cbc.c", - "ssl/d1_clnt.c", - "ssl/s3_pkt.c", - "ssl/d1_meth.c", - "ssl/s3_both.c", - "ssl/s2_enc.c", - "ssl/s3_meth.c", - "ssl/s3_enc.c", - "ssl/s23_pkt.c", - "ssl/s2_pkt.c", - "ssl/d1_pkt.c", - "ssl/ssl_rsa.c", - "ssl/s23_srvr.c", - "ssl/s2_meth.c", - "ssl/s3_clnt.c", - "ssl/s23_lib.c", - "ssl/t1_srvr.c", - "ssl/ssl_lib.c", - "ssl/ssl_txt.c", - "ssl/s2_srvr.c", - "ssl/ssl_sess.c", - "ssl/s2_clnt.c", - "ssl/d1_lib.c", - "ssl/s2_lib.c", - "ssl/ssl_err2.c", - "ssl/ssl_ciph.c", - "crypto/dsa/dsa_lib.c", - "crypto/dsa/dsa_pmeth.c", - "crypto/dsa/dsa_ossl.c", - "crypto/dsa/dsa_gen.c", - "crypto/dsa/dsa_asn1.c", - "crypto/dsa/dsa_prn.c", - "crypto/dsa/dsa_sign.c", - "crypto/dsa/dsa_key.c", - "crypto/dsa/dsa_vrf.c", - "crypto/dsa/dsa_err.c", - "crypto/dsa/dsa_ameth.c", - "crypto/dsa/dsa_depr.c", - "crypto/x509/x509_lu.c", - "crypto/x509/x509cset.c", - "crypto/x509/x509_set.c", - "crypto/x509/x509_d2.c", - "crypto/x509/x509_txt.c", - "crypto/x509/x509rset.c", - "crypto/x509/by_dir.c", - "crypto/x509/x509_vpm.c", - "crypto/x509/x509_vfy.c", - "crypto/x509/x509_trs.c", - "crypto/x509/by_file.c", - "crypto/x509/x509_obj.c", - "crypto/x509/x509spki.c", - "crypto/x509/x509_v3.c", - "crypto/x509/x509_req.c", - "crypto/x509/x509_att.c", - "crypto/x509/x_all.c", - "crypto/x509/x509_ext.c", - "crypto/x509/x509type.c", - "crypto/x509/x509_def.c", - "crypto/x509/x509_err.c", - "crypto/x509/x509name.c", - "crypto/x509/x509_r2x.c", - "crypto/x509/x509_cmp.c", - "crypto/asn1/x_pkey.c", - "crypto/asn1/a_gentm.c", - "crypto/asn1/x_sig.c", - "crypto/asn1/t_req.c", - "crypto/asn1/t_pkey.c", - "crypto/asn1/p8_pkey.c", - "crypto/asn1/a_i2d_fp.c", - "crypto/asn1/x_val.c", - "crypto/asn1/f_string.c", - "crypto/asn1/p5_pbe.c", - "crypto/asn1/bio_ndef.c", - "crypto/asn1/a_bool.c", - "crypto/asn1/asn1_gen.c", - "crypto/asn1/x_algor.c", - "crypto/asn1/bio_asn1.c", - "crypto/asn1/asn_mime.c", - "crypto/asn1/t_x509.c", - "crypto/asn1/a_strex.c", - "crypto/asn1/x_nx509.c", - "crypto/asn1/asn1_err.c", - "crypto/asn1/x_crl.c", - "crypto/asn1/a_print.c", - "crypto/asn1/a_type.c", - "crypto/asn1/tasn_new.c", - "crypto/asn1/n_pkey.c", - "crypto/asn1/x_bignum.c", - "crypto/asn1/asn_pack.c", - "crypto/asn1/evp_asn1.c", - "crypto/asn1/t_bitst.c", - "crypto/asn1/x_req.c", - "crypto/asn1/a_time.c", - "crypto/asn1/x_name.c", - "crypto/asn1/x_pubkey.c", - "crypto/asn1/tasn_typ.c", - "crypto/asn1/asn_moid.c", - "crypto/asn1/a_utctm.c", - "crypto/asn1/asn1_lib.c", - "crypto/asn1/x_x509a.c", - "crypto/asn1/a_set.c", - "crypto/asn1/t_crl.c", - "crypto/asn1/p5_pbev2.c", - "crypto/asn1/tasn_enc.c", - "crypto/asn1/a_mbstr.c", - "crypto/asn1/tasn_dec.c", - "crypto/asn1/x_x509.c", - "crypto/asn1/a_octet.c", - "crypto/asn1/x_long.c", - "crypto/asn1/a_bytes.c", - "crypto/asn1/t_x509a.c", - "crypto/asn1/a_enum.c", - "crypto/asn1/a_int.c", - "crypto/asn1/tasn_prn.c", - "crypto/asn1/i2d_pr.c", - "crypto/asn1/a_utf8.c", - "crypto/asn1/t_spki.c", - "crypto/asn1/a_digest.c", - "crypto/asn1/a_dup.c", - "crypto/asn1/i2d_pu.c", - "crypto/asn1/a_verify.c", - "crypto/asn1/f_enum.c", - "crypto/asn1/a_sign.c", - "crypto/asn1/d2i_pr.c", - "crypto/asn1/asn1_par.c", - "crypto/asn1/x_spki.c", - "crypto/asn1/a_d2i_fp.c", - "crypto/asn1/f_int.c", - "crypto/asn1/x_exten.c", - "crypto/asn1/tasn_utl.c", - "crypto/asn1/nsseq.c", - "crypto/asn1/a_bitstr.c", - "crypto/asn1/x_info.c", - "crypto/asn1/a_strnid.c", - "crypto/asn1/a_object.c", - "crypto/asn1/tasn_fre.c", - "crypto/asn1/d2i_pu.c", - "crypto/asn1/ameth_lib.c", - "crypto/asn1/x_attrib.c", - "crypto/evp/m_sha.c", - "crypto/evp/e_camellia.c", - "crypto/evp/e_aes.c", - "crypto/evp/bio_b64.c", - "crypto/evp/m_sigver.c", - "crypto/evp/m_wp.c", - "crypto/evp/m_sha1.c", - "crypto/evp/p_seal.c", - "crypto/evp/c_alld.c", - "crypto/evp/p5_crpt.c", - "crypto/evp/e_rc4.c", - "crypto/evp/m_ecdsa.c", - "crypto/evp/bio_enc.c", - "crypto/evp/e_des3.c", - "crypto/evp/m_null.c", - "crypto/evp/bio_ok.c", - "crypto/evp/pmeth_gn.c", - "crypto/evp/e_rc5.c", - "crypto/evp/e_rc2.c", - "crypto/evp/p_dec.c", - "crypto/evp/p_verify.c", - "crypto/evp/e_rc4_hmac_md5.c", - "crypto/evp/pmeth_lib.c", - "crypto/evp/m_ripemd.c", - "crypto/evp/m_md5.c", - "crypto/evp/e_bf.c", - "crypto/evp/p_enc.c", - "crypto/evp/m_dss.c", - "crypto/evp/bio_md.c", - "crypto/evp/evp_pbe.c", - "crypto/evp/e_seed.c", - "crypto/evp/e_cast.c", - "crypto/evp/p_open.c", - "crypto/evp/p5_crpt2.c", - "crypto/evp/m_dss1.c", - "crypto/evp/names.c", - "crypto/evp/evp_acnf.c", - "crypto/evp/e_des.c", - "crypto/evp/evp_cnf.c", - "crypto/evp/evp_lib.c", - "crypto/evp/digest.c", - "crypto/evp/evp_err.c", - "crypto/evp/evp_enc.c", - "crypto/evp/e_old.c", - "crypto/evp/c_all.c", - "crypto/evp/m_md2.c", - "crypto/evp/e_xcbc_d.c", - "crypto/evp/pmeth_fn.c", - "crypto/evp/p_lib.c", - "crypto/evp/evp_key.c", - "crypto/evp/encode.c", - "crypto/evp/e_aes_cbc_hmac_sha1.c", - "crypto/evp/e_aes_cbc_hmac_sha256.c", - "crypto/evp/m_mdc2.c", - "crypto/evp/e_null.c", - "crypto/evp/p_sign.c", - "crypto/evp/e_idea.c", - "crypto/evp/c_allc.c", - "crypto/evp/evp_pkey.c", - "crypto/evp/m_md4.c", - "crypto/ex_data.c", - "crypto/pkcs12/p12_p8e.c", - "crypto/pkcs12/p12_crt.c", - "crypto/pkcs12/p12_utl.c", - "crypto/pkcs12/p12_attr.c", - "crypto/pkcs12/p12_npas.c", - "crypto/pkcs12/p12_decr.c", - "crypto/pkcs12/p12_init.c", - "crypto/pkcs12/p12_kiss.c", - "crypto/pkcs12/p12_add.c", - "crypto/pkcs12/p12_p8d.c", - "crypto/pkcs12/p12_mutl.c", - "crypto/pkcs12/p12_crpt.c", - "crypto/pkcs12/pk12err.c", - "crypto/pkcs12/p12_asn.c", - "crypto/pkcs12/p12_key.c", - "crypto/ecdh/ech_key.c", - "crypto/ecdh/ech_ossl.c", - "crypto/ecdh/ech_lib.c", - "crypto/ecdh/ech_err.c", - "crypto/ecdh/ech_kdf.c", - "crypto/o_str.c", - "crypto/conf/conf_api.c", - "crypto/conf/conf_err.c", - "crypto/conf/conf_def.c", - "crypto/conf/conf_lib.c", - "crypto/conf/conf_mall.c", - "crypto/conf/conf_sap.c", - "crypto/conf/conf_mod.c", - "crypto/ebcdic.c", - "crypto/ecdsa/ecs_lib.c", - "crypto/ecdsa/ecs_asn1.c", - "crypto/ecdsa/ecs_ossl.c", - "crypto/ecdsa/ecs_vrf.c", - "crypto/ecdsa/ecs_sign.c", - "crypto/ecdsa/ecs_err.c", - "crypto/dso/dso_win32.c", - "crypto/dso/dso_lib.c", - "crypto/dso/dso_dlfcn.c", - "crypto/dso/dso_dl.c", - "crypto/dso/dso_beos.c", - "crypto/dso/dso_null.c", - "crypto/dso/dso_vms.c", - "crypto/dso/dso_err.c", - "crypto/dso/dso_openssl.c", - "crypto/cryptlib.c", - "crypto/md5/md5_one.c", - "crypto/md5/md5_dgst.c", - "crypto/pkcs7/pkcs7err.c", - "crypto/pkcs7/pk7_smime.c", - "crypto/pkcs7/bio_pk7.c", - "crypto/pkcs7/pk7_mime.c", - "crypto/pkcs7/pk7_lib.c", - "crypto/pkcs7/pk7_asn1.c", - "crypto/pkcs7/pk7_doit.c", - "crypto/pkcs7/pk7_attr.c", - "crypto/md4/md4_one.c", - "crypto/md4/md4_dgst.c", - "crypto/o_dir.c", - "crypto/buffer/buf_err.c", - "crypto/buffer/buf_str.c", - "crypto/buffer/buffer.c", - "crypto/cms/cms_lib.c", - "crypto/cms/cms_io.c", - "crypto/cms/cms_err.c", - "crypto/cms/cms_dd.c", - "crypto/cms/cms_smime.c", - "crypto/cms/cms_att.c", - "crypto/cms/cms_pwri.c", - "crypto/cms/cms_cd.c", - "crypto/cms/cms_sd.c", - "crypto/cms/cms_asn1.c", - "crypto/cms/cms_env.c", - "crypto/cms/cms_enc.c", - "crypto/cms/cms_ess.c", - "crypto/cms/cms_kari.c", - "crypto/mem_dbg.c", - "crypto/uid.c", - "crypto/stack/stack.c", - "crypto/ec/ec_ameth.c", - "crypto/ec/ec_err.c", - "crypto/ec/ec_lib.c", - "crypto/ec/ec_curve.c", - "crypto/ec/ec_oct.c", - "crypto/ec/ec_asn1.c", - "crypto/ec/ecp_oct.c", - "crypto/ec/ec_print.c", - "crypto/ec/ec2_smpl.c", - "crypto/ec/ecp_nistp224.c", - "crypto/ec/ec2_oct.c", - "crypto/ec/eck_prn.c", - "crypto/ec/ec_key.c", - "crypto/ec/ecp_nist.c", - "crypto/ec/ec_check.c", - "crypto/ec/ecp_smpl.c", - "crypto/ec/ec2_mult.c", - "crypto/ec/ecp_mont.c", - "crypto/ec/ecp_nistp521.c", - "crypto/ec/ec_mult.c", - "crypto/ec/ecp_nistputil.c", - "crypto/ec/ec_pmeth.c", - "crypto/ec/ec_cvt.c", - "crypto/ec/ecp_nistp256.c", - "crypto/krb5/krb5_asn.c", - "crypto/hmac/hmac.c", - "crypto/hmac/hm_ameth.c", - "crypto/hmac/hm_pmeth.c", - "crypto/comp/c_rle.c", - "crypto/comp/c_zlib.c", - "crypto/comp/comp_lib.c", - "crypto/comp/comp_err.c", - "crypto/des/fcrypt.c", - "crypto/des/str2key.c", - "crypto/des/cbc_cksm.c", - "crypto/des/des_enc.c", - "crypto/des/ofb_enc.c", - "crypto/des/read2pwd.c", - "crypto/des/ecb3_enc.c", - "crypto/des/rand_key.c", - "crypto/des/cfb64ede.c", - "crypto/des/rpc_enc.c", - "crypto/des/ofb64ede.c", - "crypto/des/qud_cksm.c", - "crypto/des/enc_writ.c", - "crypto/des/set_key.c", - "crypto/des/xcbc_enc.c", - "crypto/des/fcrypt_b.c", - "crypto/des/ede_cbcm_enc.c", - "crypto/des/des_old2.c", - "crypto/des/cfb_enc.c", - "crypto/des/ecb_enc.c", - "crypto/des/enc_read.c", - "crypto/des/des_old.c", - "crypto/des/ofb64enc.c", - "crypto/des/pcbc_enc.c", - "crypto/des/cbc_enc.c", - "crypto/des/cfb64enc.c", - "crypto/lhash/lh_stats.c", - "crypto/lhash/lhash.c", - "crypto/x509v3/v3_genn.c", - "crypto/x509v3/pcy_cache.c", - "crypto/x509v3/v3_sxnet.c", - "crypto/x509v3/v3_scts.c", - "crypto/x509v3/v3err.c", - "crypto/x509v3/v3_conf.c", - "crypto/x509v3/v3_utl.c", - "crypto/x509v3/v3_akeya.c", - "crypto/x509v3/v3_lib.c", - "crypto/x509v3/pcy_lib.c", - "crypto/x509v3/v3_cpols.c", - "crypto/x509v3/v3_ia5.c", - "crypto/x509v3/v3_bitst.c", - "crypto/x509v3/v3_skey.c", - "crypto/x509v3/v3_info.c", - "crypto/x509v3/v3_asid.c", - "crypto/x509v3/pcy_tree.c", - "crypto/x509v3/v3_pcons.c", - "crypto/x509v3/v3_bcons.c", - "crypto/x509v3/v3_pku.c", - "crypto/x509v3/v3_ocsp.c", - "crypto/x509v3/pcy_map.c", - "crypto/x509v3/v3_ncons.c", - "crypto/x509v3/v3_purp.c", - "crypto/x509v3/v3_enum.c", - "crypto/x509v3/v3_pmaps.c", - "crypto/x509v3/pcy_node.c", - "crypto/x509v3/v3_pcia.c", - "crypto/x509v3/v3_crld.c", - "crypto/x509v3/v3_pci.c", - "crypto/x509v3/v3_akey.c", - "crypto/x509v3/v3_addr.c", - "crypto/x509v3/v3_int.c", - "crypto/x509v3/v3_alt.c", - "crypto/x509v3/v3_extku.c", - "crypto/x509v3/v3_prn.c", - "crypto/x509v3/pcy_data.c", - "crypto/aes/aes_ofb.c", - "crypto/aes/aes_ctr.c", - "crypto/aes/aes_ecb.c", - "crypto/aes/aes_cfb.c", - "crypto/aes/aes_wrap.c", - "crypto/aes/aes_ige.c", - "crypto/aes/aes_misc.c", - "crypto/pqueue/pqueue.c", - "crypto/sha/sha_one.c", - "crypto/sha/sha_dgst.c", - "crypto/sha/sha512.c", - "crypto/sha/sha1_one.c", - "crypto/sha/sha1dgst.c", - "crypto/sha/sha256.c", - "crypto/whrlpool/wp_dgst.c", - "crypto/objects/obj_xref.c", - "crypto/objects/o_names.c", - "crypto/objects/obj_err.c", - "crypto/objects/obj_dat.c", - "crypto/objects/obj_lib.c", - "crypto/mem.c", - "crypto/fips_ers.c", - "crypto/o_fips.c", - "crypto/engine/eng_rdrand.c", - "crypto/engine/eng_err.c", - "crypto/engine/tb_ecdsa.c", - "crypto/engine/tb_rsa.c", - "crypto/engine/tb_cipher.c", - "crypto/engine/tb_dsa.c", - "crypto/engine/eng_lib.c", - "crypto/engine/tb_asnmth.c", - "crypto/engine/tb_ecdh.c", - "crypto/engine/tb_dh.c", - "crypto/engine/tb_store.c", - "crypto/engine/eng_init.c", - "crypto/engine/eng_cnf.c", - "crypto/engine/eng_all.c", - "crypto/engine/tb_digest.c", - "crypto/engine/tb_pkmeth.c", - "crypto/engine/eng_table.c", - "crypto/engine/eng_ctrl.c", - "crypto/engine/eng_list.c", - "crypto/engine/eng_cryptodev.c", - "crypto/engine/eng_pkey.c", - "crypto/engine/tb_rand.c", - "crypto/engine/eng_openssl.c", - "crypto/engine/eng_fat.c", - "crypto/engine/eng_dyn.c", - "crypto/ts/ts_rsp_verify.c", - "crypto/ts/ts_req_print.c", - "crypto/ts/ts_verify_ctx.c", - "crypto/ts/ts_req_utils.c", - "crypto/ts/ts_err.c", - "crypto/ts/ts_rsp_print.c", - "crypto/ts/ts_rsp_utils.c", - "crypto/ts/ts_lib.c", - "crypto/ts/ts_conf.c", - "crypto/ts/ts_asn1.c", - "crypto/ts/ts_rsp_sign.c", - "crypto/ocsp/ocsp_ext.c", - "crypto/ocsp/ocsp_cl.c", - "crypto/ocsp/ocsp_ht.c", - "crypto/ocsp/ocsp_lib.c", - "crypto/ocsp/ocsp_srv.c", - "crypto/ocsp/ocsp_vfy.c", - "crypto/ocsp/ocsp_err.c", - "crypto/ocsp/ocsp_prn.c", - "crypto/ocsp/ocsp_asn.c", - "crypto/bf/bf_cfb64.c", - "crypto/bf/bf_ecb.c", - "crypto/bf/bf_enc.c", - "crypto/bf/bf_skey.c", - "crypto/bf/bf_ofb64.c", - "crypto/idea/i_skey.c", - "crypto/idea/i_ofb64.c", - "crypto/idea/i_cbc.c", - "crypto/idea/i_ecb.c", - "crypto/idea/i_cfb64.c", - "crypto/cmac/cm_ameth.c", - "crypto/cmac/cmac.c", - "crypto/cmac/cm_pmeth.c", - "crypto/dh/dh_lib.c", - "crypto/dh/dh_key.c", - "crypto/dh/dh_asn1.c", - "crypto/dh/dh_depr.c", - "crypto/dh/dh_pmeth.c", - "crypto/dh/dh_prn.c", - "crypto/dh/dh_gen.c", - "crypto/dh/dh_ameth.c", - "crypto/dh/dh_check.c", - "crypto/dh/dh_err.c", - "crypto/dh/dh_kdf.c", - "crypto/dh/dh_rfc5114.c", - "crypto/modes/ccm128.c", - "crypto/modes/ofb128.c", - "crypto/modes/cts128.c", - "crypto/modes/ctr128.c", - "crypto/modes/gcm128.c", - "crypto/modes/cbc128.c", - "crypto/modes/cfb128.c", - "crypto/modes/xts128.c", - "crypto/modes/wrap128.c", - "crypto/camellia/cmll_cfb.c", - "crypto/camellia/cmll_ecb.c", - "crypto/camellia/cmll_utl.c", - "crypto/camellia/cmll_misc.c", - "crypto/camellia/cmll_ofb.c", - "crypto/camellia/cmll_ctr.c", - "crypto/seed/seed_ecb.c", - "crypto/seed/seed_cbc.c", - "crypto/seed/seed.c", - "crypto/seed/seed_ofb.c", - "crypto/seed/seed_cfb.c", - "crypto/txt_db/txt_db.c", - "crypto/cpt_err.c", - "crypto/pem/pem_pk8.c", - "crypto/pem/pem_lib.c", - "crypto/pem/pem_sign.c", - "crypto/pem/pem_all.c", - "crypto/pem/pem_info.c", - "crypto/pem/pem_pkey.c", - "crypto/pem/pem_seal.c", - "crypto/pem/pem_err.c", - "crypto/pem/pem_xaux.c", - "crypto/pem/pvkfmt.c", - "crypto/pem/pem_x509.c", - "crypto/pem/pem_oth.c", - "crypto/rand/rand_lib.c", - "crypto/rand/randfile.c", - "crypto/rand/rand_os2.c", - "crypto/rand/rand_unix.c", - "crypto/rand/rand_nw.c", - "crypto/rand/md_rand.c", - "crypto/rand/rand_err.c", - "crypto/rand/rand_win.c", - "crypto/rand/rand_egd.c", - "crypto/cversion.c", - "crypto/cast/c_ecb.c", - "crypto/cast/c_skey.c", - "crypto/cast/c_ofb64.c", - "crypto/cast/c_enc.c", - "crypto/cast/c_cfb64.c", - "crypto/o_time.c", - "crypto/mdc2/mdc2dgst.c", - "crypto/mdc2/mdc2_one.c", - "crypto/rc4/rc4_utl.c", - "crypto/ui/ui_compat.c", - "crypto/ui/ui_util.c", - "crypto/ui/ui_lib.c", - "crypto/ui/ui_err.c", - "crypto/ui/ui_openssl.c", - "crypto/bio/bf_buff.c", - "crypto/bio/bss_null.c", - "crypto/bio/bss_acpt.c", - "crypto/bio/bss_conn.c", - "crypto/bio/bss_fd.c", - "crypto/bio/bf_null.c", - "crypto/bio/bio_err.c", - "crypto/bio/bss_sock.c", - "crypto/bio/bss_mem.c", - "crypto/bio/b_dump.c", - "crypto/bio/b_print.c", - "crypto/bio/b_sock.c", - "crypto/bio/bss_dgram.c", - "crypto/bio/bf_nbio.c", - "crypto/bio/bio_lib.c", - "crypto/bio/bss_file.c", - "crypto/bio/bss_bio.c", - "crypto/bio/bss_log.c", - "crypto/bio/bio_cb.c", - "crypto/o_init.c", - "crypto/rc2/rc2_skey.c", - "crypto/rc2/rc2_cbc.c", - "crypto/rc2/rc2cfb64.c", - "crypto/rc2/rc2_ecb.c", - "crypto/rc2/rc2ofb64.c", - "crypto/bn/bn_x931p.c", - "crypto/bn/bn_blind.c", - "crypto/bn/bn_gf2m.c", - "crypto/bn/bn_const.c", - "crypto/bn/bn_sqr.c", - "crypto/bn/bn_nist.c", - "crypto/bn/bn_rand.c", - "crypto/bn/bn_err.c", - "crypto/bn/bn_div.c", - "crypto/bn/bn_kron.c", - "crypto/bn/bn_ctx.c", - "crypto/bn/bn_shift.c", - "crypto/bn/bn_mod.c", - "crypto/bn/bn_exp2.c", - "crypto/bn/bn_word.c", - "crypto/bn/bn_add.c", - "crypto/bn/bn_exp.c", - "crypto/bn/bn_mont.c", - "crypto/bn/bn_print.c", - "crypto/bn/bn_mul.c", - "crypto/bn/bn_prime.c", - "crypto/bn/bn_depr.c", - "crypto/bn/bn_gcd.c", - "crypto/bn/bn_mpi.c", - "crypto/bn/bn_sqrt.c", - "crypto/bn/bn_recp.c", - "crypto/bn/bn_lib.c", - "crypto/ripemd/rmd_dgst.c", - "crypto/ripemd/rmd_one.c", - "crypto/rsa/rsa_x931.c", - "crypto/rsa/rsa_depr.c", - "crypto/rsa/rsa_saos.c", - "crypto/rsa/rsa_crpt.c", - "crypto/rsa/rsa_pss.c", - "crypto/rsa/rsa_oaep.c", - "crypto/rsa/rsa_null.c", - "crypto/rsa/rsa_gen.c", - "crypto/rsa/rsa_prn.c", - "crypto/rsa/rsa_pmeth.c", - "crypto/rsa/rsa_asn1.c", - "crypto/rsa/rsa_ssl.c", - "crypto/rsa/rsa_ameth.c", - "crypto/rsa/rsa_pk1.c", - "crypto/rsa/rsa_err.c", - "crypto/rsa/rsa_lib.c", - "crypto/rsa/rsa_none.c", - "crypto/rsa/rsa_chk.c", - "crypto/rsa/rsa_eay.c", - "crypto/rsa/rsa_sign.c", - "crypto/srp/srp_lib.c", - "crypto/srp/srp_vfy.c", - "crypto/err/err.c", - "crypto/err/err_prn.c", - "crypto/err/err_all.c", - "crypto/mem_clr.c", - "crypto/rc4/rc4_skey.c", - "crypto/rc4/rc4_enc.c", - "crypto/camellia/camellia.c", - "crypto/camellia/cmll_cbc.c", - #"crypto/aes/aes_x86core.c", - "crypto/aes/aes_core.c", - "crypto/aes/aes_cbc.c", - "crypto/whrlpool/wp_block.c", - "crypto/bn/bn_asm.c", - ] + thirdparty_sources = [ + "ssl/t1_lib.c", + "ssl/t1_ext.c", + "ssl/s3_srvr.c", + "ssl/t1_enc.c", + "ssl/t1_meth.c", + "ssl/s23_clnt.c", + "ssl/ssl_asn1.c", + "ssl/tls_srp.c", + "ssl/kssl.c", + "ssl/d1_both.c", + "ssl/t1_clnt.c", + "ssl/bio_ssl.c", + "ssl/d1_srtp.c", + "ssl/t1_reneg.c", + "ssl/ssl_cert.c", + "ssl/s3_lib.c", + "ssl/d1_srvr.c", + "ssl/s23_meth.c", + "ssl/ssl_stat.c", + "ssl/ssl_err.c", + "ssl/ssl_algs.c", + "ssl/s3_cbc.c", + "ssl/d1_clnt.c", + "ssl/s3_pkt.c", + "ssl/d1_meth.c", + "ssl/s3_both.c", + "ssl/s2_enc.c", + "ssl/s3_meth.c", + "ssl/s3_enc.c", + "ssl/s23_pkt.c", + "ssl/s2_pkt.c", + "ssl/d1_pkt.c", + "ssl/ssl_rsa.c", + "ssl/s23_srvr.c", + "ssl/s2_meth.c", + "ssl/s3_clnt.c", + "ssl/s23_lib.c", + "ssl/t1_srvr.c", + "ssl/ssl_lib.c", + "ssl/ssl_txt.c", + "ssl/s2_srvr.c", + "ssl/ssl_sess.c", + "ssl/s2_clnt.c", + "ssl/d1_lib.c", + "ssl/s2_lib.c", + "ssl/ssl_err2.c", + "ssl/ssl_ciph.c", + "crypto/dsa/dsa_lib.c", + "crypto/dsa/dsa_pmeth.c", + "crypto/dsa/dsa_ossl.c", + "crypto/dsa/dsa_gen.c", + "crypto/dsa/dsa_asn1.c", + "crypto/dsa/dsa_prn.c", + "crypto/dsa/dsa_sign.c", + "crypto/dsa/dsa_key.c", + "crypto/dsa/dsa_vrf.c", + "crypto/dsa/dsa_err.c", + "crypto/dsa/dsa_ameth.c", + "crypto/dsa/dsa_depr.c", + "crypto/x509/x509_lu.c", + "crypto/x509/x509cset.c", + "crypto/x509/x509_set.c", + "crypto/x509/x509_d2.c", + "crypto/x509/x509_txt.c", + "crypto/x509/x509rset.c", + "crypto/x509/by_dir.c", + "crypto/x509/x509_vpm.c", + "crypto/x509/x509_vfy.c", + "crypto/x509/x509_trs.c", + "crypto/x509/by_file.c", + "crypto/x509/x509_obj.c", + "crypto/x509/x509spki.c", + "crypto/x509/x509_v3.c", + "crypto/x509/x509_req.c", + "crypto/x509/x509_att.c", + "crypto/x509/x_all.c", + "crypto/x509/x509_ext.c", + "crypto/x509/x509type.c", + "crypto/x509/x509_def.c", + "crypto/x509/x509_err.c", + "crypto/x509/x509name.c", + "crypto/x509/x509_r2x.c", + "crypto/x509/x509_cmp.c", + "crypto/asn1/x_pkey.c", + "crypto/asn1/a_gentm.c", + "crypto/asn1/x_sig.c", + "crypto/asn1/t_req.c", + "crypto/asn1/t_pkey.c", + "crypto/asn1/p8_pkey.c", + "crypto/asn1/a_i2d_fp.c", + "crypto/asn1/x_val.c", + "crypto/asn1/f_string.c", + "crypto/asn1/p5_pbe.c", + "crypto/asn1/bio_ndef.c", + "crypto/asn1/a_bool.c", + "crypto/asn1/asn1_gen.c", + "crypto/asn1/x_algor.c", + "crypto/asn1/bio_asn1.c", + "crypto/asn1/asn_mime.c", + "crypto/asn1/t_x509.c", + "crypto/asn1/a_strex.c", + "crypto/asn1/x_nx509.c", + "crypto/asn1/asn1_err.c", + "crypto/asn1/x_crl.c", + "crypto/asn1/a_print.c", + "crypto/asn1/a_type.c", + "crypto/asn1/tasn_new.c", + "crypto/asn1/n_pkey.c", + "crypto/asn1/x_bignum.c", + "crypto/asn1/asn_pack.c", + "crypto/asn1/evp_asn1.c", + "crypto/asn1/t_bitst.c", + "crypto/asn1/x_req.c", + "crypto/asn1/a_time.c", + "crypto/asn1/x_name.c", + "crypto/asn1/x_pubkey.c", + "crypto/asn1/tasn_typ.c", + "crypto/asn1/asn_moid.c", + "crypto/asn1/a_utctm.c", + "crypto/asn1/asn1_lib.c", + "crypto/asn1/x_x509a.c", + "crypto/asn1/a_set.c", + "crypto/asn1/t_crl.c", + "crypto/asn1/p5_pbev2.c", + "crypto/asn1/tasn_enc.c", + "crypto/asn1/a_mbstr.c", + "crypto/asn1/tasn_dec.c", + "crypto/asn1/x_x509.c", + "crypto/asn1/a_octet.c", + "crypto/asn1/x_long.c", + "crypto/asn1/a_bytes.c", + "crypto/asn1/t_x509a.c", + "crypto/asn1/a_enum.c", + "crypto/asn1/a_int.c", + "crypto/asn1/tasn_prn.c", + "crypto/asn1/i2d_pr.c", + "crypto/asn1/a_utf8.c", + "crypto/asn1/t_spki.c", + "crypto/asn1/a_digest.c", + "crypto/asn1/a_dup.c", + "crypto/asn1/i2d_pu.c", + "crypto/asn1/a_verify.c", + "crypto/asn1/f_enum.c", + "crypto/asn1/a_sign.c", + "crypto/asn1/d2i_pr.c", + "crypto/asn1/asn1_par.c", + "crypto/asn1/x_spki.c", + "crypto/asn1/a_d2i_fp.c", + "crypto/asn1/f_int.c", + "crypto/asn1/x_exten.c", + "crypto/asn1/tasn_utl.c", + "crypto/asn1/nsseq.c", + "crypto/asn1/a_bitstr.c", + "crypto/asn1/x_info.c", + "crypto/asn1/a_strnid.c", + "crypto/asn1/a_object.c", + "crypto/asn1/tasn_fre.c", + "crypto/asn1/d2i_pu.c", + "crypto/asn1/ameth_lib.c", + "crypto/asn1/x_attrib.c", + "crypto/evp/m_sha.c", + "crypto/evp/e_camellia.c", + "crypto/evp/e_aes.c", + "crypto/evp/bio_b64.c", + "crypto/evp/m_sigver.c", + "crypto/evp/m_wp.c", + "crypto/evp/m_sha1.c", + "crypto/evp/p_seal.c", + "crypto/evp/c_alld.c", + "crypto/evp/p5_crpt.c", + "crypto/evp/e_rc4.c", + "crypto/evp/m_ecdsa.c", + "crypto/evp/bio_enc.c", + "crypto/evp/e_des3.c", + "crypto/evp/m_null.c", + "crypto/evp/bio_ok.c", + "crypto/evp/pmeth_gn.c", + "crypto/evp/e_rc5.c", + "crypto/evp/e_rc2.c", + "crypto/evp/p_dec.c", + "crypto/evp/p_verify.c", + "crypto/evp/e_rc4_hmac_md5.c", + "crypto/evp/pmeth_lib.c", + "crypto/evp/m_ripemd.c", + "crypto/evp/m_md5.c", + "crypto/evp/e_bf.c", + "crypto/evp/p_enc.c", + "crypto/evp/m_dss.c", + "crypto/evp/bio_md.c", + "crypto/evp/evp_pbe.c", + "crypto/evp/e_seed.c", + "crypto/evp/e_cast.c", + "crypto/evp/p_open.c", + "crypto/evp/p5_crpt2.c", + "crypto/evp/m_dss1.c", + "crypto/evp/names.c", + "crypto/evp/evp_acnf.c", + "crypto/evp/e_des.c", + "crypto/evp/evp_cnf.c", + "crypto/evp/evp_lib.c", + "crypto/evp/digest.c", + "crypto/evp/evp_err.c", + "crypto/evp/evp_enc.c", + "crypto/evp/e_old.c", + "crypto/evp/c_all.c", + "crypto/evp/m_md2.c", + "crypto/evp/e_xcbc_d.c", + "crypto/evp/pmeth_fn.c", + "crypto/evp/p_lib.c", + "crypto/evp/evp_key.c", + "crypto/evp/encode.c", + "crypto/evp/e_aes_cbc_hmac_sha1.c", + "crypto/evp/e_aes_cbc_hmac_sha256.c", + "crypto/evp/m_mdc2.c", + "crypto/evp/e_null.c", + "crypto/evp/p_sign.c", + "crypto/evp/e_idea.c", + "crypto/evp/c_allc.c", + "crypto/evp/evp_pkey.c", + "crypto/evp/m_md4.c", + "crypto/ex_data.c", + "crypto/pkcs12/p12_p8e.c", + "crypto/pkcs12/p12_crt.c", + "crypto/pkcs12/p12_utl.c", + "crypto/pkcs12/p12_attr.c", + "crypto/pkcs12/p12_npas.c", + "crypto/pkcs12/p12_decr.c", + "crypto/pkcs12/p12_init.c", + "crypto/pkcs12/p12_kiss.c", + "crypto/pkcs12/p12_add.c", + "crypto/pkcs12/p12_p8d.c", + "crypto/pkcs12/p12_mutl.c", + "crypto/pkcs12/p12_crpt.c", + "crypto/pkcs12/pk12err.c", + "crypto/pkcs12/p12_asn.c", + "crypto/pkcs12/p12_key.c", + "crypto/ecdh/ech_key.c", + "crypto/ecdh/ech_ossl.c", + "crypto/ecdh/ech_lib.c", + "crypto/ecdh/ech_err.c", + "crypto/ecdh/ech_kdf.c", + "crypto/o_str.c", + "crypto/conf/conf_api.c", + "crypto/conf/conf_err.c", + "crypto/conf/conf_def.c", + "crypto/conf/conf_lib.c", + "crypto/conf/conf_mall.c", + "crypto/conf/conf_sap.c", + "crypto/conf/conf_mod.c", + "crypto/ebcdic.c", + "crypto/ecdsa/ecs_lib.c", + "crypto/ecdsa/ecs_asn1.c", + "crypto/ecdsa/ecs_ossl.c", + "crypto/ecdsa/ecs_vrf.c", + "crypto/ecdsa/ecs_sign.c", + "crypto/ecdsa/ecs_err.c", + "crypto/dso/dso_win32.c", + "crypto/dso/dso_lib.c", + "crypto/dso/dso_dlfcn.c", + "crypto/dso/dso_dl.c", + "crypto/dso/dso_beos.c", + "crypto/dso/dso_null.c", + "crypto/dso/dso_vms.c", + "crypto/dso/dso_err.c", + "crypto/dso/dso_openssl.c", + "crypto/cryptlib.c", + "crypto/md5/md5_one.c", + "crypto/md5/md5_dgst.c", + "crypto/pkcs7/pkcs7err.c", + "crypto/pkcs7/pk7_smime.c", + "crypto/pkcs7/bio_pk7.c", + "crypto/pkcs7/pk7_mime.c", + "crypto/pkcs7/pk7_lib.c", + "crypto/pkcs7/pk7_asn1.c", + "crypto/pkcs7/pk7_doit.c", + "crypto/pkcs7/pk7_attr.c", + "crypto/md4/md4_one.c", + "crypto/md4/md4_dgst.c", + "crypto/o_dir.c", + "crypto/buffer/buf_err.c", + "crypto/buffer/buf_str.c", + "crypto/buffer/buffer.c", + "crypto/cms/cms_lib.c", + "crypto/cms/cms_io.c", + "crypto/cms/cms_err.c", + "crypto/cms/cms_dd.c", + "crypto/cms/cms_smime.c", + "crypto/cms/cms_att.c", + "crypto/cms/cms_pwri.c", + "crypto/cms/cms_cd.c", + "crypto/cms/cms_sd.c", + "crypto/cms/cms_asn1.c", + "crypto/cms/cms_env.c", + "crypto/cms/cms_enc.c", + "crypto/cms/cms_ess.c", + "crypto/cms/cms_kari.c", + "crypto/mem_dbg.c", + "crypto/uid.c", + "crypto/stack/stack.c", + "crypto/ec/ec_ameth.c", + "crypto/ec/ec_err.c", + "crypto/ec/ec_lib.c", + "crypto/ec/ec_curve.c", + "crypto/ec/ec_oct.c", + "crypto/ec/ec_asn1.c", + "crypto/ec/ecp_oct.c", + "crypto/ec/ec_print.c", + "crypto/ec/ec2_smpl.c", + "crypto/ec/ecp_nistp224.c", + "crypto/ec/ec2_oct.c", + "crypto/ec/eck_prn.c", + "crypto/ec/ec_key.c", + "crypto/ec/ecp_nist.c", + "crypto/ec/ec_check.c", + "crypto/ec/ecp_smpl.c", + "crypto/ec/ec2_mult.c", + "crypto/ec/ecp_mont.c", + "crypto/ec/ecp_nistp521.c", + "crypto/ec/ec_mult.c", + "crypto/ec/ecp_nistputil.c", + "crypto/ec/ec_pmeth.c", + "crypto/ec/ec_cvt.c", + "crypto/ec/ecp_nistp256.c", + "crypto/krb5/krb5_asn.c", + "crypto/hmac/hmac.c", + "crypto/hmac/hm_ameth.c", + "crypto/hmac/hm_pmeth.c", + "crypto/comp/c_rle.c", + "crypto/comp/c_zlib.c", + "crypto/comp/comp_lib.c", + "crypto/comp/comp_err.c", + "crypto/des/fcrypt.c", + "crypto/des/str2key.c", + "crypto/des/cbc_cksm.c", + "crypto/des/des_enc.c", + "crypto/des/ofb_enc.c", + "crypto/des/read2pwd.c", + "crypto/des/ecb3_enc.c", + "crypto/des/rand_key.c", + "crypto/des/cfb64ede.c", + "crypto/des/rpc_enc.c", + "crypto/des/ofb64ede.c", + "crypto/des/qud_cksm.c", + "crypto/des/enc_writ.c", + "crypto/des/set_key.c", + "crypto/des/xcbc_enc.c", + "crypto/des/fcrypt_b.c", + "crypto/des/ede_cbcm_enc.c", + "crypto/des/des_old2.c", + "crypto/des/cfb_enc.c", + "crypto/des/ecb_enc.c", + "crypto/des/enc_read.c", + "crypto/des/des_old.c", + "crypto/des/ofb64enc.c", + "crypto/des/pcbc_enc.c", + "crypto/des/cbc_enc.c", + "crypto/des/cfb64enc.c", + "crypto/lhash/lh_stats.c", + "crypto/lhash/lhash.c", + "crypto/x509v3/v3_genn.c", + "crypto/x509v3/pcy_cache.c", + "crypto/x509v3/v3_sxnet.c", + "crypto/x509v3/v3_scts.c", + "crypto/x509v3/v3err.c", + "crypto/x509v3/v3_conf.c", + "crypto/x509v3/v3_utl.c", + "crypto/x509v3/v3_akeya.c", + "crypto/x509v3/v3_lib.c", + "crypto/x509v3/pcy_lib.c", + "crypto/x509v3/v3_cpols.c", + "crypto/x509v3/v3_ia5.c", + "crypto/x509v3/v3_bitst.c", + "crypto/x509v3/v3_skey.c", + "crypto/x509v3/v3_info.c", + "crypto/x509v3/v3_asid.c", + "crypto/x509v3/pcy_tree.c", + "crypto/x509v3/v3_pcons.c", + "crypto/x509v3/v3_bcons.c", + "crypto/x509v3/v3_pku.c", + "crypto/x509v3/v3_ocsp.c", + "crypto/x509v3/pcy_map.c", + "crypto/x509v3/v3_ncons.c", + "crypto/x509v3/v3_purp.c", + "crypto/x509v3/v3_enum.c", + "crypto/x509v3/v3_pmaps.c", + "crypto/x509v3/pcy_node.c", + "crypto/x509v3/v3_pcia.c", + "crypto/x509v3/v3_crld.c", + "crypto/x509v3/v3_pci.c", + "crypto/x509v3/v3_akey.c", + "crypto/x509v3/v3_addr.c", + "crypto/x509v3/v3_int.c", + "crypto/x509v3/v3_alt.c", + "crypto/x509v3/v3_extku.c", + "crypto/x509v3/v3_prn.c", + "crypto/x509v3/pcy_data.c", + "crypto/aes/aes_ofb.c", + "crypto/aes/aes_ctr.c", + "crypto/aes/aes_ecb.c", + "crypto/aes/aes_cfb.c", + "crypto/aes/aes_wrap.c", + "crypto/aes/aes_ige.c", + "crypto/aes/aes_misc.c", + "crypto/pqueue/pqueue.c", + "crypto/sha/sha_one.c", + "crypto/sha/sha_dgst.c", + "crypto/sha/sha512.c", + "crypto/sha/sha1_one.c", + "crypto/sha/sha1dgst.c", + "crypto/sha/sha256.c", + "crypto/whrlpool/wp_dgst.c", + "crypto/objects/obj_xref.c", + "crypto/objects/o_names.c", + "crypto/objects/obj_err.c", + "crypto/objects/obj_dat.c", + "crypto/objects/obj_lib.c", + "crypto/mem.c", + "crypto/fips_ers.c", + "crypto/o_fips.c", + "crypto/engine/eng_rdrand.c", + "crypto/engine/eng_err.c", + "crypto/engine/tb_ecdsa.c", + "crypto/engine/tb_rsa.c", + "crypto/engine/tb_cipher.c", + "crypto/engine/tb_dsa.c", + "crypto/engine/eng_lib.c", + "crypto/engine/tb_asnmth.c", + "crypto/engine/tb_ecdh.c", + "crypto/engine/tb_dh.c", + "crypto/engine/tb_store.c", + "crypto/engine/eng_init.c", + "crypto/engine/eng_cnf.c", + "crypto/engine/eng_all.c", + "crypto/engine/tb_digest.c", + "crypto/engine/tb_pkmeth.c", + "crypto/engine/eng_table.c", + "crypto/engine/eng_ctrl.c", + "crypto/engine/eng_list.c", + "crypto/engine/eng_cryptodev.c", + "crypto/engine/eng_pkey.c", + "crypto/engine/tb_rand.c", + "crypto/engine/eng_openssl.c", + "crypto/engine/eng_fat.c", + "crypto/engine/eng_dyn.c", + "crypto/ts/ts_rsp_verify.c", + "crypto/ts/ts_req_print.c", + "crypto/ts/ts_verify_ctx.c", + "crypto/ts/ts_req_utils.c", + "crypto/ts/ts_err.c", + "crypto/ts/ts_rsp_print.c", + "crypto/ts/ts_rsp_utils.c", + "crypto/ts/ts_lib.c", + "crypto/ts/ts_conf.c", + "crypto/ts/ts_asn1.c", + "crypto/ts/ts_rsp_sign.c", + "crypto/ocsp/ocsp_ext.c", + "crypto/ocsp/ocsp_cl.c", + "crypto/ocsp/ocsp_ht.c", + "crypto/ocsp/ocsp_lib.c", + "crypto/ocsp/ocsp_srv.c", + "crypto/ocsp/ocsp_vfy.c", + "crypto/ocsp/ocsp_err.c", + "crypto/ocsp/ocsp_prn.c", + "crypto/ocsp/ocsp_asn.c", + "crypto/bf/bf_cfb64.c", + "crypto/bf/bf_ecb.c", + "crypto/bf/bf_enc.c", + "crypto/bf/bf_skey.c", + "crypto/bf/bf_ofb64.c", + "crypto/idea/i_skey.c", + "crypto/idea/i_ofb64.c", + "crypto/idea/i_cbc.c", + "crypto/idea/i_ecb.c", + "crypto/idea/i_cfb64.c", + "crypto/cmac/cm_ameth.c", + "crypto/cmac/cmac.c", + "crypto/cmac/cm_pmeth.c", + "crypto/dh/dh_lib.c", + "crypto/dh/dh_key.c", + "crypto/dh/dh_asn1.c", + "crypto/dh/dh_depr.c", + "crypto/dh/dh_pmeth.c", + "crypto/dh/dh_prn.c", + "crypto/dh/dh_gen.c", + "crypto/dh/dh_ameth.c", + "crypto/dh/dh_check.c", + "crypto/dh/dh_err.c", + "crypto/dh/dh_kdf.c", + "crypto/dh/dh_rfc5114.c", + "crypto/modes/ccm128.c", + "crypto/modes/ofb128.c", + "crypto/modes/cts128.c", + "crypto/modes/ctr128.c", + "crypto/modes/gcm128.c", + "crypto/modes/cbc128.c", + "crypto/modes/cfb128.c", + "crypto/modes/xts128.c", + "crypto/modes/wrap128.c", + "crypto/camellia/cmll_cfb.c", + "crypto/camellia/cmll_ecb.c", + "crypto/camellia/cmll_utl.c", + "crypto/camellia/cmll_misc.c", + "crypto/camellia/cmll_ofb.c", + "crypto/camellia/cmll_ctr.c", + "crypto/seed/seed_ecb.c", + "crypto/seed/seed_cbc.c", + "crypto/seed/seed.c", + "crypto/seed/seed_ofb.c", + "crypto/seed/seed_cfb.c", + "crypto/txt_db/txt_db.c", + "crypto/cpt_err.c", + "crypto/pem/pem_pk8.c", + "crypto/pem/pem_lib.c", + "crypto/pem/pem_sign.c", + "crypto/pem/pem_all.c", + "crypto/pem/pem_info.c", + "crypto/pem/pem_pkey.c", + "crypto/pem/pem_seal.c", + "crypto/pem/pem_err.c", + "crypto/pem/pem_xaux.c", + "crypto/pem/pvkfmt.c", + "crypto/pem/pem_x509.c", + "crypto/pem/pem_oth.c", + "crypto/rand/rand_lib.c", + "crypto/rand/randfile.c", + "crypto/rand/rand_os2.c", + "crypto/rand/rand_unix.c", + "crypto/rand/rand_nw.c", + "crypto/rand/md_rand.c", + "crypto/rand/rand_err.c", + "crypto/rand/rand_win.c", + "crypto/rand/rand_egd.c", + "crypto/cversion.c", + "crypto/cast/c_ecb.c", + "crypto/cast/c_skey.c", + "crypto/cast/c_ofb64.c", + "crypto/cast/c_enc.c", + "crypto/cast/c_cfb64.c", + "crypto/o_time.c", + "crypto/mdc2/mdc2dgst.c", + "crypto/mdc2/mdc2_one.c", + "crypto/rc4/rc4_utl.c", + "crypto/ui/ui_compat.c", + "crypto/ui/ui_util.c", + "crypto/ui/ui_lib.c", + "crypto/ui/ui_err.c", + "crypto/ui/ui_openssl.c", + "crypto/bio/bf_buff.c", + "crypto/bio/bss_null.c", + "crypto/bio/bss_acpt.c", + "crypto/bio/bss_conn.c", + "crypto/bio/bss_fd.c", + "crypto/bio/bf_null.c", + "crypto/bio/bio_err.c", + "crypto/bio/bss_sock.c", + "crypto/bio/bss_mem.c", + "crypto/bio/b_dump.c", + "crypto/bio/b_print.c", + "crypto/bio/b_sock.c", + "crypto/bio/bss_dgram.c", + "crypto/bio/bf_nbio.c", + "crypto/bio/bio_lib.c", + "crypto/bio/bss_file.c", + "crypto/bio/bss_bio.c", + "crypto/bio/bss_log.c", + "crypto/bio/bio_cb.c", + "crypto/o_init.c", + "crypto/rc2/rc2_skey.c", + "crypto/rc2/rc2_cbc.c", + "crypto/rc2/rc2cfb64.c", + "crypto/rc2/rc2_ecb.c", + "crypto/rc2/rc2ofb64.c", + "crypto/bn/bn_x931p.c", + "crypto/bn/bn_blind.c", + "crypto/bn/bn_gf2m.c", + "crypto/bn/bn_const.c", + "crypto/bn/bn_sqr.c", + "crypto/bn/bn_nist.c", + "crypto/bn/bn_rand.c", + "crypto/bn/bn_err.c", + "crypto/bn/bn_div.c", + "crypto/bn/bn_kron.c", + "crypto/bn/bn_ctx.c", + "crypto/bn/bn_shift.c", + "crypto/bn/bn_mod.c", + "crypto/bn/bn_exp2.c", + "crypto/bn/bn_word.c", + "crypto/bn/bn_add.c", + "crypto/bn/bn_exp.c", + "crypto/bn/bn_mont.c", + "crypto/bn/bn_print.c", + "crypto/bn/bn_mul.c", + "crypto/bn/bn_prime.c", + "crypto/bn/bn_depr.c", + "crypto/bn/bn_gcd.c", + "crypto/bn/bn_mpi.c", + "crypto/bn/bn_sqrt.c", + "crypto/bn/bn_recp.c", + "crypto/bn/bn_lib.c", + "crypto/ripemd/rmd_dgst.c", + "crypto/ripemd/rmd_one.c", + "crypto/rsa/rsa_x931.c", + "crypto/rsa/rsa_depr.c", + "crypto/rsa/rsa_saos.c", + "crypto/rsa/rsa_crpt.c", + "crypto/rsa/rsa_pss.c", + "crypto/rsa/rsa_oaep.c", + "crypto/rsa/rsa_null.c", + "crypto/rsa/rsa_gen.c", + "crypto/rsa/rsa_prn.c", + "crypto/rsa/rsa_pmeth.c", + "crypto/rsa/rsa_asn1.c", + "crypto/rsa/rsa_ssl.c", + "crypto/rsa/rsa_ameth.c", + "crypto/rsa/rsa_pk1.c", + "crypto/rsa/rsa_err.c", + "crypto/rsa/rsa_lib.c", + "crypto/rsa/rsa_none.c", + "crypto/rsa/rsa_chk.c", + "crypto/rsa/rsa_eay.c", + "crypto/rsa/rsa_sign.c", + "crypto/srp/srp_lib.c", + "crypto/srp/srp_vfy.c", + "crypto/err/err.c", + "crypto/err/err_prn.c", + "crypto/err/err_all.c", + "crypto/mem_clr.c", + "crypto/rc4/rc4_skey.c", + "crypto/rc4/rc4_enc.c", + "crypto/camellia/camellia.c", + "crypto/camellia/cmll_cbc.c", + #"crypto/aes/aes_x86core.c", + "crypto/aes/aes_core.c", + "crypto/aes/aes_cbc.c", + "crypto/whrlpool/wp_block.c", + "crypto/bn/bn_asm.c", + ] - if "platform" in env and env["platform"] == "winrt": - thirdparty_sources += ['winrt.cpp'] + if "platform" in env and env["platform"] == "winrt": + thirdparty_sources += ['winrt.cpp'] - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - env_openssl.add_source_files(env.modules_sources, thirdparty_sources) + env_openssl.add_source_files(env.modules_sources, thirdparty_sources) - # FIXME: Clone the environment to make a env_openssl and not pollute the modules env - thirdparty_include_paths = [ - "", - "crypto", - "crypto/asn1", - "crypto/evp", - "crypto/modes", - "openssl", - ] - env_openssl.Append(CPPPATH = [thirdparty_dir + "/" + dir for dir in thirdparty_include_paths]) + # FIXME: Clone the environment to make a env_openssl and not pollute the modules env + thirdparty_include_paths = [ + "", + "crypto", + "crypto/asn1", + "crypto/evp", + "crypto/modes", + "openssl", + ] + env_openssl.Append(CPPPATH = [thirdparty_dir + "/" + dir for dir in thirdparty_include_paths]) - env_openssl.Append(CPPFLAGS = ["-DOPENSSL_NO_ASM", "-DOPENSSL_THREADS", "-DL_ENDIAN"]) + env_openssl.Append(CPPFLAGS = ["-DOPENSSL_NO_ASM", "-DOPENSSL_THREADS", "-DL_ENDIAN"]) - # Workaround for compilation error with GCC/Clang when -Werror is too greedy (GH-4517) - import os - import methods - if not (os.name=="nt" and os.getenv("VCINSTALLDIR")): # not Windows and not MSVC - env_openssl.Append(CFLAGS = ["-Wno-error=implicit-function-declaration"]) + # Workaround for compilation error with GCC/Clang when -Werror is too greedy (GH-4517) + import os + import methods + if not (os.name=="nt" and os.getenv("VCINSTALLDIR")): # not Windows and not MSVC + env_openssl.Append(CFLAGS = ["-Wno-error=implicit-function-declaration"]) # Module sources @@ -681,7 +681,7 @@ env_openssl.add_source_files(env.modules_sources, "*.c") # platform/winrt need to know openssl is available, pass to main env if "platform" in env and env["platform"] == "winrt": - env.Append(CPPPATH = [thirdparty_dir]) - env.Append(CPPFLAGS = ['-DOPENSSL_ENABLED']); + env.Append(CPPPATH = [thirdparty_dir]) + env.Append(CPPFLAGS = ['-DOPENSSL_ENABLED']); Export('env') diff --git a/modules/openssl/config.py b/modules/openssl/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/openssl/config.py +++ b/modules/openssl/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/opus/SCsub b/modules/opus/SCsub index 603f5a48c42..2cfd439ebef 100644 --- a/modules/opus/SCsub +++ b/modules/opus/SCsub @@ -7,209 +7,209 @@ env_opus = env_modules.Clone() # Thirdparty source files if (env["opus"] != "system"): # builtin - thirdparty_dir = "#thirdparty/opus/" + thirdparty_dir = "#thirdparty/opus/" - thirdparty_sources = [ - "silk/tables_other.c", - "silk/sum_sqr_shift.c", - "silk/PLC.c", - "silk/dec_API.c", - "silk/decode_pulses.c", - "silk/inner_prod_aligned.c", - "silk/init_encoder.c", - "silk/interpolate.c", - "silk/stereo_encode_pred.c", - "silk/decode_frame.c", - "silk/NLSF_del_dec_quant.c", - "silk/VAD.c", - "silk/resampler_private_AR2.c", - "silk/NLSF_unpack.c", - "silk/resampler_down2.c", - "silk/sort.c", - "silk/resampler_private_IIR_FIR.c", - "silk/resampler_down2_3.c", - "silk/resampler_private_up2_HQ.c", - "silk/tables_gain.c", - "silk/stereo_find_predictor.c", - "silk/stereo_quant_pred.c", - "silk/NLSF_stabilize.c", - "silk/ana_filt_bank_1.c", - "silk/check_control_input.c", - "silk/bwexpander.c", - "silk/A2NLSF.c", - "silk/LPC_inv_pred_gain.c", - "silk/log2lin.c", - "silk/process_NLSFs.c", - "silk/sigm_Q15.c", - "silk/VQ_WMat_EC.c", - "silk/quant_LTP_gains.c", - "silk/resampler_private_down_FIR.c", - "silk/NLSF_decode.c", - "silk/control_codec.c", - "silk/NLSF_VQ_weights_laroia.c", - "silk/decode_pitch.c", - "silk/stereo_decode_pred.c", - "silk/tables_pulses_per_block.c", - "silk/init_decoder.c", - "silk/table_LSF_cos.c", - "silk/decode_core.c", - "silk/code_signs.c", - "silk/enc_API.c", - "silk/tables_LTP.c", - "silk/pitch_est_tables.c", - "silk/biquad_alt.c", - "silk/encode_indices.c", - "silk/tables_NLSF_CB_WB.c", - "silk/debug.c", - "silk/decode_parameters.c", - "silk/tables_pitch_lag.c", - "silk/NLSF2A.c", - "silk/resampler.c", - "silk/decode_indices.c", - "silk/NLSF_VQ.c", - "silk/bwexpander_32.c", - "silk/tables_NLSF_CB_NB_MB.c", - "silk/encode_pulses.c", - "silk/NSQ_del_dec.c", - "silk/control_SNR.c", - "silk/shell_coder.c", - "silk/NLSF_encode.c", - "silk/stereo_MS_to_LR.c", - "silk/stereo_LR_to_MS.c", - "silk/HP_variable_cutoff.c", - "silk/LPC_analysis_filter.c", - "silk/CNG.c", - "silk/decoder_set_fs.c", - "silk/resampler_rom.c", - "silk/control_audio_bandwidth.c", - "silk/lin2log.c", - "silk/LP_variable_cutoff.c", - "silk/NSQ.c", - "silk/gain_quant.c", - "celt/laplace.c", - "celt/vq.c", - "celt/quant_bands.c", - "celt/kiss_fft.c", - "celt/entcode.c", - "celt/entenc.c", - "celt/celt_lpc.c", - "celt/pitch.c", - "celt/rate.c", - "celt/mathops.c", - #"celt/arm/armcpu.c", - #"celt/arm/celt_neon_intr.c", - #"celt/arm/celt_ne10_mdct.c", - #"celt/arm/celt_ne10_fft.c", - #"celt/arm/arm_celt_map.c", - "celt/celt_encoder.c", - "celt/celt.c", - "celt/bands.c", - "celt/cwrs.c", - "celt/entdec.c", - "celt/celt_decoder.c", - "celt/mdct.c", - "celt/modes.c", - "repacketizer.c", - "mlp_data.c", - "opus_multistream.c", - "opusfile.c", - "opus_encoder.c", - "analysis.c", - "mlp.c", - "info.c", - "stream.c", - "opus_decoder.c", - "internal.c", - "wincerts.c", - "opus.c", - "opus_multistream_encoder.c", - "http.c", - "opus_multistream_decoder.c" - ] + thirdparty_sources = [ + "silk/tables_other.c", + "silk/sum_sqr_shift.c", + "silk/PLC.c", + "silk/dec_API.c", + "silk/decode_pulses.c", + "silk/inner_prod_aligned.c", + "silk/init_encoder.c", + "silk/interpolate.c", + "silk/stereo_encode_pred.c", + "silk/decode_frame.c", + "silk/NLSF_del_dec_quant.c", + "silk/VAD.c", + "silk/resampler_private_AR2.c", + "silk/NLSF_unpack.c", + "silk/resampler_down2.c", + "silk/sort.c", + "silk/resampler_private_IIR_FIR.c", + "silk/resampler_down2_3.c", + "silk/resampler_private_up2_HQ.c", + "silk/tables_gain.c", + "silk/stereo_find_predictor.c", + "silk/stereo_quant_pred.c", + "silk/NLSF_stabilize.c", + "silk/ana_filt_bank_1.c", + "silk/check_control_input.c", + "silk/bwexpander.c", + "silk/A2NLSF.c", + "silk/LPC_inv_pred_gain.c", + "silk/log2lin.c", + "silk/process_NLSFs.c", + "silk/sigm_Q15.c", + "silk/VQ_WMat_EC.c", + "silk/quant_LTP_gains.c", + "silk/resampler_private_down_FIR.c", + "silk/NLSF_decode.c", + "silk/control_codec.c", + "silk/NLSF_VQ_weights_laroia.c", + "silk/decode_pitch.c", + "silk/stereo_decode_pred.c", + "silk/tables_pulses_per_block.c", + "silk/init_decoder.c", + "silk/table_LSF_cos.c", + "silk/decode_core.c", + "silk/code_signs.c", + "silk/enc_API.c", + "silk/tables_LTP.c", + "silk/pitch_est_tables.c", + "silk/biquad_alt.c", + "silk/encode_indices.c", + "silk/tables_NLSF_CB_WB.c", + "silk/debug.c", + "silk/decode_parameters.c", + "silk/tables_pitch_lag.c", + "silk/NLSF2A.c", + "silk/resampler.c", + "silk/decode_indices.c", + "silk/NLSF_VQ.c", + "silk/bwexpander_32.c", + "silk/tables_NLSF_CB_NB_MB.c", + "silk/encode_pulses.c", + "silk/NSQ_del_dec.c", + "silk/control_SNR.c", + "silk/shell_coder.c", + "silk/NLSF_encode.c", + "silk/stereo_MS_to_LR.c", + "silk/stereo_LR_to_MS.c", + "silk/HP_variable_cutoff.c", + "silk/LPC_analysis_filter.c", + "silk/CNG.c", + "silk/decoder_set_fs.c", + "silk/resampler_rom.c", + "silk/control_audio_bandwidth.c", + "silk/lin2log.c", + "silk/LP_variable_cutoff.c", + "silk/NSQ.c", + "silk/gain_quant.c", + "celt/laplace.c", + "celt/vq.c", + "celt/quant_bands.c", + "celt/kiss_fft.c", + "celt/entcode.c", + "celt/entenc.c", + "celt/celt_lpc.c", + "celt/pitch.c", + "celt/rate.c", + "celt/mathops.c", + #"celt/arm/armcpu.c", + #"celt/arm/celt_neon_intr.c", + #"celt/arm/celt_ne10_mdct.c", + #"celt/arm/celt_ne10_fft.c", + #"celt/arm/arm_celt_map.c", + "celt/celt_encoder.c", + "celt/celt.c", + "celt/bands.c", + "celt/cwrs.c", + "celt/entdec.c", + "celt/celt_decoder.c", + "celt/mdct.c", + "celt/modes.c", + "repacketizer.c", + "mlp_data.c", + "opus_multistream.c", + "opusfile.c", + "opus_encoder.c", + "analysis.c", + "mlp.c", + "info.c", + "stream.c", + "opus_decoder.c", + "internal.c", + "wincerts.c", + "opus.c", + "opus_multistream_encoder.c", + "http.c", + "opus_multistream_decoder.c" + ] - opus_sources_silk = [] + opus_sources_silk = [] - if("opus_fixed_point" in env and env.opus_fixed_point=="yes"): - env_opus.Append(CFLAGS = ["-DFIXED_POINT"]) - opus_sources_silk = [ - "silk/fixed/schur64_FIX.c", - "silk/fixed/residual_energy16_FIX.c", - "silk/fixed/encode_frame_FIX.c", - "silk/fixed/regularize_correlations_FIX.c", - "silk/fixed/apply_sine_window_FIX.c", - "silk/fixed/solve_LS_FIX.c", - "silk/fixed/schur_FIX.c", - "silk/fixed/pitch_analysis_core_FIX.c", - "silk/fixed/noise_shape_analysis_FIX.c", - "silk/fixed/find_LTP_FIX.c", - "silk/fixed/vector_ops_FIX.c", - "silk/fixed/autocorr_FIX.c", - "silk/fixed/warped_autocorrelation_FIX.c", - "silk/fixed/find_pitch_lags_FIX.c", - "silk/fixed/k2a_Q16_FIX.c", - "silk/fixed/LTP_scale_ctrl_FIX.c", - "silk/fixed/corrMatrix_FIX.c", - "silk/fixed/prefilter_FIX.c", - "silk/fixed/find_LPC_FIX.c", - "silk/fixed/residual_energy_FIX.c", - "silk/fixed/process_gains_FIX.c", - "silk/fixed/LTP_analysis_filter_FIX.c", - "silk/fixed/k2a_FIX.c", - "silk/fixed/burg_modified_FIX.c", - "silk/fixed/find_pred_coefs_FIX.c" - ] - else: - opus_sources_silk = [ - "silk/float/LTP_scale_ctrl_FLP.c", - "silk/float/regularize_correlations_FLP.c", - "silk/float/corrMatrix_FLP.c", - "silk/float/LPC_analysis_filter_FLP.c", - "silk/float/levinsondurbin_FLP.c", - "silk/float/schur_FLP.c", - "silk/float/scale_vector_FLP.c", - "silk/float/apply_sine_window_FLP.c", - "silk/float/pitch_analysis_core_FLP.c", - "silk/float/wrappers_FLP.c", - "silk/float/bwexpander_FLP.c", - "silk/float/warped_autocorrelation_FLP.c", - "silk/float/solve_LS_FLP.c", - "silk/float/find_LPC_FLP.c", - "silk/float/autocorrelation_FLP.c", - "silk/float/find_pred_coefs_FLP.c", - "silk/float/find_pitch_lags_FLP.c", - "silk/float/burg_modified_FLP.c", - "silk/float/find_LTP_FLP.c", - "silk/float/energy_FLP.c", - "silk/float/sort_FLP.c", - "silk/float/LPC_inv_pred_gain_FLP.c", - "silk/float/k2a_FLP.c", - "silk/float/noise_shape_analysis_FLP.c", - "silk/float/inner_product_FLP.c", - "silk/float/process_gains_FLP.c", - "silk/float/encode_frame_FLP.c", - "silk/float/scale_copy_vector_FLP.c", - "silk/float/residual_energy_FLP.c", - "silk/float/LTP_analysis_filter_FLP.c", - "silk/float/prefilter_FLP.c" - ] + if("opus_fixed_point" in env and env.opus_fixed_point=="yes"): + env_opus.Append(CFLAGS = ["-DFIXED_POINT"]) + opus_sources_silk = [ + "silk/fixed/schur64_FIX.c", + "silk/fixed/residual_energy16_FIX.c", + "silk/fixed/encode_frame_FIX.c", + "silk/fixed/regularize_correlations_FIX.c", + "silk/fixed/apply_sine_window_FIX.c", + "silk/fixed/solve_LS_FIX.c", + "silk/fixed/schur_FIX.c", + "silk/fixed/pitch_analysis_core_FIX.c", + "silk/fixed/noise_shape_analysis_FIX.c", + "silk/fixed/find_LTP_FIX.c", + "silk/fixed/vector_ops_FIX.c", + "silk/fixed/autocorr_FIX.c", + "silk/fixed/warped_autocorrelation_FIX.c", + "silk/fixed/find_pitch_lags_FIX.c", + "silk/fixed/k2a_Q16_FIX.c", + "silk/fixed/LTP_scale_ctrl_FIX.c", + "silk/fixed/corrMatrix_FIX.c", + "silk/fixed/prefilter_FIX.c", + "silk/fixed/find_LPC_FIX.c", + "silk/fixed/residual_energy_FIX.c", + "silk/fixed/process_gains_FIX.c", + "silk/fixed/LTP_analysis_filter_FIX.c", + "silk/fixed/k2a_FIX.c", + "silk/fixed/burg_modified_FIX.c", + "silk/fixed/find_pred_coefs_FIX.c" + ] + else: + opus_sources_silk = [ + "silk/float/LTP_scale_ctrl_FLP.c", + "silk/float/regularize_correlations_FLP.c", + "silk/float/corrMatrix_FLP.c", + "silk/float/LPC_analysis_filter_FLP.c", + "silk/float/levinsondurbin_FLP.c", + "silk/float/schur_FLP.c", + "silk/float/scale_vector_FLP.c", + "silk/float/apply_sine_window_FLP.c", + "silk/float/pitch_analysis_core_FLP.c", + "silk/float/wrappers_FLP.c", + "silk/float/bwexpander_FLP.c", + "silk/float/warped_autocorrelation_FLP.c", + "silk/float/solve_LS_FLP.c", + "silk/float/find_LPC_FLP.c", + "silk/float/autocorrelation_FLP.c", + "silk/float/find_pred_coefs_FLP.c", + "silk/float/find_pitch_lags_FLP.c", + "silk/float/burg_modified_FLP.c", + "silk/float/find_LTP_FLP.c", + "silk/float/energy_FLP.c", + "silk/float/sort_FLP.c", + "silk/float/LPC_inv_pred_gain_FLP.c", + "silk/float/k2a_FLP.c", + "silk/float/noise_shape_analysis_FLP.c", + "silk/float/inner_product_FLP.c", + "silk/float/process_gains_FLP.c", + "silk/float/encode_frame_FLP.c", + "silk/float/scale_copy_vector_FLP.c", + "silk/float/residual_energy_FLP.c", + "silk/float/LTP_analysis_filter_FLP.c", + "silk/float/prefilter_FLP.c" + ] - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources + opus_sources_silk] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources + opus_sources_silk] - env_opus.add_source_files(env.modules_sources, thirdparty_sources) - env_opus.Append(CFLAGS=["-DHAVE_CONFIG_H"]) + env_opus.add_source_files(env.modules_sources, thirdparty_sources) + env_opus.Append(CFLAGS=["-DHAVE_CONFIG_H"]) - thirdparty_include_paths = [ - "", - "celt", - "silk", - "silk/fixed", - "silk/float", - ] - env_opus.Append(CPPPATH = [thirdparty_dir + "/" + dir for dir in thirdparty_include_paths]) + thirdparty_include_paths = [ + "", + "celt", + "silk", + "silk/fixed", + "silk/float", + ] + env_opus.Append(CPPPATH = [thirdparty_dir + "/" + dir for dir in thirdparty_include_paths]) - # also requires libogg - if (env["libogg"] != "system"): # builtin - env_opus.Append(CPPPATH = ["#thirdparty/libogg"]) + # also requires libogg + if (env["libogg"] != "system"): # builtin + env_opus.Append(CPPPATH = ["#thirdparty/libogg"]) # Module files env_opus.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/opus/config.py b/modules/opus/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/opus/config.py +++ b/modules/opus/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/pbm/config.py b/modules/pbm/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/pbm/config.py +++ b/modules/pbm/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/pvr/SCsub b/modules/pvr/SCsub index 4ead52f82ff..04a69cfa23d 100644 --- a/modules/pvr/SCsub +++ b/modules/pvr/SCsub @@ -9,11 +9,11 @@ env_pvr = env_modules.Clone() # Not unbundled so far since not widespread as shared library thirdparty_dir = "#thirdparty/pvrtccompressor/" thirdparty_sources = [ - "BitScale.cpp", - "MortonTable.cpp", - "PvrTcDecoder.cpp", - "PvrTcEncoder.cpp", - "PvrTcPacket.cpp", + "BitScale.cpp", + "MortonTable.cpp", + "PvrTcDecoder.cpp", + "PvrTcEncoder.cpp", + "PvrTcPacket.cpp", ] thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] diff --git a/modules/pvr/config.py b/modules/pvr/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/pvr/config.py +++ b/modules/pvr/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/squish/SCsub b/modules/squish/SCsub index 3fdc587652e..116fc0bb7f4 100644 --- a/modules/squish/SCsub +++ b/modules/squish/SCsub @@ -7,23 +7,23 @@ env_squish = env_modules.Clone() # Thirdparty source files if (env["squish"] != "system"): # builtin - thirdparty_dir = "#thirdparty/squish/" - thirdparty_sources = [ - "alpha.cpp", - "clusterfit.cpp", - "colourblock.cpp", - "colourfit.cpp", - "colourset.cpp", - "maths.cpp", - "rangefit.cpp", - "singlecolourfit.cpp", - "squish.cpp", - ] + thirdparty_dir = "#thirdparty/squish/" + thirdparty_sources = [ + "alpha.cpp", + "clusterfit.cpp", + "colourblock.cpp", + "colourfit.cpp", + "colourset.cpp", + "maths.cpp", + "rangefit.cpp", + "singlecolourfit.cpp", + "squish.cpp", + ] - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - env_squish.add_source_files(env.modules_sources, thirdparty_sources) - env_squish.Append(CPPPATH = [thirdparty_dir]) + env_squish.add_source_files(env.modules_sources, thirdparty_sources) + env_squish.Append(CPPPATH = [thirdparty_dir]) # Godot source files env_squish.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/squish/config.py b/modules/squish/config.py index d28d9c702ef..81e6163a9b2 100644 --- a/modules/squish/config.py +++ b/modules/squish/config.py @@ -1,10 +1,10 @@ def can_build(platform): - return True + return True def configure(env): - # Tools only, disabled for non-tools - # TODO: Find a cleaner way to achieve that - if (env["tools"] == "no"): - env["module_squish_enabled"] = "no" - env.disabled_modules.append("squish") + # Tools only, disabled for non-tools + # TODO: Find a cleaner way to achieve that + if (env["tools"] == "no"): + env["module_squish_enabled"] = "no" + env.disabled_modules.append("squish") diff --git a/modules/theora/SCsub b/modules/theora/SCsub index 22c618fe8b2..cbd19ee6d68 100644 --- a/modules/theora/SCsub +++ b/modules/theora/SCsub @@ -7,77 +7,77 @@ env_theora = env_modules.Clone() # Thirdparty source files if (env["libtheora"] != "system"): # builtin - thirdparty_dir = "#thirdparty/libtheora/" - thirdparty_sources = [ - #"analyze.c", - #"apiwrapper.c", - "bitpack.c", - "cpu.c", - #"decapiwrapper.c", - "decinfo.c", - "decode.c", - "dequant.c", - #"encapiwrapper.c", - #"encfrag.c", - #"encinfo.c", - #"encode.c", - #"encoder_disabled.c", - #"enquant.c", - #"fdct.c", - "fragment.c", - "huffdec.c", - #"huffenc.c", - "idct.c", - "info.c", - "internal.c", - #"mathops.c", - #"mcenc.c", - "quant.c", - #"rate.c", - "state.c", - #"tokenize.c", - ] + thirdparty_dir = "#thirdparty/libtheora/" + thirdparty_sources = [ + #"analyze.c", + #"apiwrapper.c", + "bitpack.c", + "cpu.c", + #"decapiwrapper.c", + "decinfo.c", + "decode.c", + "dequant.c", + #"encapiwrapper.c", + #"encfrag.c", + #"encinfo.c", + #"encode.c", + #"encoder_disabled.c", + #"enquant.c", + #"fdct.c", + "fragment.c", + "huffdec.c", + #"huffenc.c", + "idct.c", + "info.c", + "internal.c", + #"mathops.c", + #"mcenc.c", + "quant.c", + #"rate.c", + "state.c", + #"tokenize.c", + ] - thirdparty_sources_x86 = [ - #"x86/mmxencfrag.c", - #"x86/mmxfdct.c", - "x86/mmxfrag.c", - "x86/mmxidct.c", - "x86/mmxstate.c", - #"x86/sse2fdct.c", - #"x86/x86enc.c", - "x86/x86state.c", - ] + thirdparty_sources_x86 = [ + #"x86/mmxencfrag.c", + #"x86/mmxfdct.c", + "x86/mmxfrag.c", + "x86/mmxidct.c", + "x86/mmxstate.c", + #"x86/sse2fdct.c", + #"x86/x86enc.c", + "x86/x86state.c", + ] - thirdparty_sources_x86_vc = [ - #"x86_vc/mmxencfrag.c", - #"x86_vc/mmxfdct.c", - "x86_vc/mmxfrag.c", - "x86_vc/mmxidct.c", - "x86_vc/mmxstate.c", - #"x86_vc/x86enc.c", - "x86_vc/x86state.c", - ] + thirdparty_sources_x86_vc = [ + #"x86_vc/mmxencfrag.c", + #"x86_vc/mmxfdct.c", + "x86_vc/mmxfrag.c", + "x86_vc/mmxidct.c", + "x86_vc/mmxstate.c", + #"x86_vc/x86enc.c", + "x86_vc/x86state.c", + ] - if (env["x86_libtheora_opt_gcc"]): - thirdparty_sources += thirdparty_sources_x86 + if (env["x86_libtheora_opt_gcc"]): + thirdparty_sources += thirdparty_sources_x86 - if (env["x86_libtheora_opt_vc"]): - thirdparty_sources += thirdparty_sources_x86_vc + if (env["x86_libtheora_opt_vc"]): + thirdparty_sources += thirdparty_sources_x86_vc - if (env["x86_libtheora_opt_gcc"] or env["x86_libtheora_opt_vc"]): - env_theora.Append(CCFLAGS = ["-DOC_X86_ASM"]) + if (env["x86_libtheora_opt_gcc"] or env["x86_libtheora_opt_vc"]): + env_theora.Append(CCFLAGS = ["-DOC_X86_ASM"]) - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - env_theora.add_source_files(env.modules_sources, thirdparty_sources) - env_theora.Append(CPPPATH = [thirdparty_dir]) + env_theora.add_source_files(env.modules_sources, thirdparty_sources) + env_theora.Append(CPPPATH = [thirdparty_dir]) - # also requires libogg and libvorbis - if (env["libogg"] != "system"): # builtin - env_theora.Append(CPPPATH = ["#thirdparty/libogg"]) - if (env["libvorbis"] != "system"): # builtin - env_theora.Append(CPPPATH = ["#thirdparty/libvorbis"]) + # also requires libogg and libvorbis + if (env["libogg"] != "system"): # builtin + env_theora.Append(CPPPATH = ["#thirdparty/libogg"]) + if (env["libvorbis"] != "system"): # builtin + env_theora.Append(CPPPATH = ["#thirdparty/libvorbis"]) # Godot source files env_theora.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/theora/config.py b/modules/theora/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/theora/config.py +++ b/modules/theora/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/visual_script/config.py b/modules/visual_script/config.py index ea7e83378a9..ae0a3d75eed 100644 --- a/modules/visual_script/config.py +++ b/modules/visual_script/config.py @@ -1,11 +1,11 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/vorbis/SCsub b/modules/vorbis/SCsub index 3220cb454c8..4189e119cfd 100644 --- a/modules/vorbis/SCsub +++ b/modules/vorbis/SCsub @@ -7,43 +7,43 @@ env_vorbis = env_modules.Clone() # Thirdparty source files if (env["libvorbis"] != "system"): # builtin - thirdparty_dir = "#thirdparty/libvorbis/" - thirdparty_sources = [ - #"analysis.c", - #"barkmel.c", - "bitrate.c", - "block.c", - "codebook.c", - "envelope.c", - "floor0.c", - "floor1.c", - "info.c", - "lookup.c", - "lpc.c", - "lsp.c", - "mapping0.c", - "mdct.c", - "psy.c", - #"psytune.c", - "registry.c", - "res0.c", - "sharedbook.c", - "smallft.c", - "synthesis.c", - #"tone.c", - #"vorbisenc.c", - "vorbisfile.c", - "window.c", - ] + thirdparty_dir = "#thirdparty/libvorbis/" + thirdparty_sources = [ + #"analysis.c", + #"barkmel.c", + "bitrate.c", + "block.c", + "codebook.c", + "envelope.c", + "floor0.c", + "floor1.c", + "info.c", + "lookup.c", + "lpc.c", + "lsp.c", + "mapping0.c", + "mdct.c", + "psy.c", + #"psytune.c", + "registry.c", + "res0.c", + "sharedbook.c", + "smallft.c", + "synthesis.c", + #"tone.c", + #"vorbisenc.c", + "vorbisfile.c", + "window.c", + ] - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - env_vorbis.add_source_files(env.modules_sources, thirdparty_sources) - env_vorbis.Append(CPPPATH = [thirdparty_dir]) + env_vorbis.add_source_files(env.modules_sources, thirdparty_sources) + env_vorbis.Append(CPPPATH = [thirdparty_dir]) - # also requires libogg - if (env["libogg"] != "system"): # builtin - env_vorbis.Append(CPPPATH = ["#thirdparty/libogg"]) + # also requires libogg + if (env["libogg"] != "system"): # builtin + env_vorbis.Append(CPPPATH = ["#thirdparty/libogg"]) # Godot source files env_vorbis.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/vorbis/config.py b/modules/vorbis/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/vorbis/config.py +++ b/modules/vorbis/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/webm/SCsub b/modules/webm/SCsub index fbd936a2c39..bb87e73980e 100644 --- a/modules/webm/SCsub +++ b/modules/webm/SCsub @@ -8,10 +8,10 @@ env_webm = env_modules.Clone() # Thirdparty source files thirdparty_libsimplewebm_dir = "#thirdparty/libsimplewebm/" thirdparty_libsimplewebm_sources = [ - "libwebm/mkvparser/mkvparser.cc", - "OpusVorbisDecoder.cpp", - "VPXDecoder.cpp", - "WebMDemuxer.cpp", + "libwebm/mkvparser/mkvparser.cc", + "OpusVorbisDecoder.cpp", + "VPXDecoder.cpp", + "WebMDemuxer.cpp", ] thirdparty_libsimplewebm_sources = [thirdparty_libsimplewebm_dir + file for file in thirdparty_libsimplewebm_sources] @@ -20,15 +20,15 @@ env_webm.Append(CPPPATH = [thirdparty_libsimplewebm_dir, thirdparty_libsimpleweb # also requires libogg, libvorbis and libopus if (env["libogg"] != "system"): # builtin - env_webm.Append(CPPPATH = ["#thirdparty/libogg"]) + env_webm.Append(CPPPATH = ["#thirdparty/libogg"]) if (env["libvorbis"] != "system"): # builtin - env_webm.Append(CPPPATH = ["#thirdparty/libvorbis"]) + env_webm.Append(CPPPATH = ["#thirdparty/libvorbis"]) if (env["opus"] != "system"): # builtin - env_webm.Append(CPPPATH = ["#thirdparty"]) + env_webm.Append(CPPPATH = ["#thirdparty"]) if (env["libvpx"] != "system"): # builtin - Export('env_webm') - SConscript("libvpx/SCsub") + Export('env_webm') + SConscript("libvpx/SCsub") # Godot source files env_webm.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/webm/config.py b/modules/webm/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/webm/config.py +++ b/modules/webm/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/modules/webm/libvpx/SCsub b/modules/webm/libvpx/SCsub index 706db972065..12e37886f4d 100644 --- a/modules/webm/libvpx/SCsub +++ b/modules/webm/libvpx/SCsub @@ -3,231 +3,231 @@ libvpx_dir = "#thirdparty/libvpx/" libvpx_sources = [ - "vp8/vp8_dx_iface.c", + "vp8/vp8_dx_iface.c", - "vp8/common/generic/systemdependent.c", + "vp8/common/generic/systemdependent.c", - "vp8/common/alloccommon.c", - "vp8/common/blockd.c", - "vp8/common/copy_c.c", - "vp8/common/debugmodes.c", - "vp8/common/dequantize.c", - "vp8/common/entropy.c", - "vp8/common/entropymode.c", - "vp8/common/entropymv.c", - "vp8/common/extend.c", - "vp8/common/filter.c", - "vp8/common/findnearmv.c", - "vp8/common/idct_blk.c", - "vp8/common/idctllm.c", - "vp8/common/loopfilter_filters.c", - "vp8/common/mbpitch.c", - "vp8/common/modecont.c", - "vp8/common/quant_common.c", - "vp8/common/reconinter.c", - "vp8/common/reconintra.c", - "vp8/common/reconintra4x4.c", - "vp8/common/rtcd.c", - "vp8/common/setupintrarecon.c", - "vp8/common/swapyv12buffer.c", - "vp8/common/treecoder.c", - "vp8/common/vp8_loopfilter.c", + "vp8/common/alloccommon.c", + "vp8/common/blockd.c", + "vp8/common/copy_c.c", + "vp8/common/debugmodes.c", + "vp8/common/dequantize.c", + "vp8/common/entropy.c", + "vp8/common/entropymode.c", + "vp8/common/entropymv.c", + "vp8/common/extend.c", + "vp8/common/filter.c", + "vp8/common/findnearmv.c", + "vp8/common/idct_blk.c", + "vp8/common/idctllm.c", + "vp8/common/loopfilter_filters.c", + "vp8/common/mbpitch.c", + "vp8/common/modecont.c", + "vp8/common/quant_common.c", + "vp8/common/reconinter.c", + "vp8/common/reconintra.c", + "vp8/common/reconintra4x4.c", + "vp8/common/rtcd.c", + "vp8/common/setupintrarecon.c", + "vp8/common/swapyv12buffer.c", + "vp8/common/treecoder.c", + "vp8/common/vp8_loopfilter.c", - "vp8/decoder/dboolhuff.c", - "vp8/decoder/decodeframe.c", - "vp8/decoder/decodemv.c", - "vp8/decoder/detokenize.c", - "vp8/decoder/onyxd_if.c", - "vp8/decoder/threading.c", + "vp8/decoder/dboolhuff.c", + "vp8/decoder/decodeframe.c", + "vp8/decoder/decodemv.c", + "vp8/decoder/detokenize.c", + "vp8/decoder/onyxd_if.c", + "vp8/decoder/threading.c", - "vp9/vp9_dx_iface.c", + "vp9/vp9_dx_iface.c", - "vp9/common/vp9_alloccommon.c", - "vp9/common/vp9_blockd.c", - "vp9/common/vp9_common_data.c", - "vp9/common/vp9_debugmodes.c", - "vp9/common/vp9_entropy.c", - "vp9/common/vp9_entropymode.c", - "vp9/common/vp9_entropymv.c", - "vp9/common/vp9_filter.c", - "vp9/common/vp9_frame_buffers.c", - "vp9/common/vp9_idct.c", - "vp9/common/vp9_loopfilter.c", - "vp9/common/vp9_mvref_common.c", - "vp9/common/vp9_pred_common.c", - "vp9/common/vp9_quant_common.c", - "vp9/common/vp9_reconinter.c", - "vp9/common/vp9_reconintra.c", - "vp9/common/vp9_rtcd.c", - "vp9/common/vp9_scale.c", - "vp9/common/vp9_scan.c", - "vp9/common/vp9_seg_common.c", - "vp9/common/vp9_thread_common.c", - "vp9/common/vp9_tile_common.c", + "vp9/common/vp9_alloccommon.c", + "vp9/common/vp9_blockd.c", + "vp9/common/vp9_common_data.c", + "vp9/common/vp9_debugmodes.c", + "vp9/common/vp9_entropy.c", + "vp9/common/vp9_entropymode.c", + "vp9/common/vp9_entropymv.c", + "vp9/common/vp9_filter.c", + "vp9/common/vp9_frame_buffers.c", + "vp9/common/vp9_idct.c", + "vp9/common/vp9_loopfilter.c", + "vp9/common/vp9_mvref_common.c", + "vp9/common/vp9_pred_common.c", + "vp9/common/vp9_quant_common.c", + "vp9/common/vp9_reconinter.c", + "vp9/common/vp9_reconintra.c", + "vp9/common/vp9_rtcd.c", + "vp9/common/vp9_scale.c", + "vp9/common/vp9_scan.c", + "vp9/common/vp9_seg_common.c", + "vp9/common/vp9_thread_common.c", + "vp9/common/vp9_tile_common.c", - "vp9/decoder/vp9_decodeframe.c", - "vp9/decoder/vp9_decodemv.c", - "vp9/decoder/vp9_decoder.c", - "vp9/decoder/vp9_detokenize.c", - "vp9/decoder/vp9_dsubexp.c", - "vp9/decoder/vp9_dthread.c", + "vp9/decoder/vp9_decodeframe.c", + "vp9/decoder/vp9_decodemv.c", + "vp9/decoder/vp9_decoder.c", + "vp9/decoder/vp9_detokenize.c", + "vp9/decoder/vp9_dsubexp.c", + "vp9/decoder/vp9_dthread.c", - "vpx/src/vpx_codec.c", - "vpx/src/vpx_decoder.c", - "vpx/src/vpx_image.c", - "vpx/src/vpx_psnr.c", + "vpx/src/vpx_codec.c", + "vpx/src/vpx_decoder.c", + "vpx/src/vpx_image.c", + "vpx/src/vpx_psnr.c", - "vpx_dsp/bitreader.c", - "vpx_dsp/bitreader_buffer.c", - "vpx_dsp/intrapred.c", - "vpx_dsp/inv_txfm.c", - "vpx_dsp/loopfilter.c", - "vpx_dsp/prob.c", - "vpx_dsp/vpx_convolve.c", - "vpx_dsp/vpx_dsp_rtcd.c", + "vpx_dsp/bitreader.c", + "vpx_dsp/bitreader_buffer.c", + "vpx_dsp/intrapred.c", + "vpx_dsp/inv_txfm.c", + "vpx_dsp/loopfilter.c", + "vpx_dsp/prob.c", + "vpx_dsp/vpx_convolve.c", + "vpx_dsp/vpx_dsp_rtcd.c", - "vpx_mem/vpx_mem.c", + "vpx_mem/vpx_mem.c", - "vpx_scale/vpx_scale_rtcd.c", + "vpx_scale/vpx_scale_rtcd.c", - "vpx_scale/generic/yv12config.c", - "vpx_scale/generic/yv12extend.c", + "vpx_scale/generic/yv12config.c", + "vpx_scale/generic/yv12extend.c", - "vpx_util/vpx_thread.c" + "vpx_util/vpx_thread.c" ] libvpx_sources_intrin_x86 = [ - "vp8/common/x86/filter_x86.c", - "vp8/common/x86/loopfilter_x86.c", - "vp8/common/x86/vp8_asm_stubs.c", + "vp8/common/x86/filter_x86.c", + "vp8/common/x86/loopfilter_x86.c", + "vp8/common/x86/vp8_asm_stubs.c", - "vpx_dsp/x86/vpx_asm_stubs.c" + "vpx_dsp/x86/vpx_asm_stubs.c" ] libvpx_sources_intrin_x86_mmx = [ - "vp8/common/x86/idct_blk_mmx.c", + "vp8/common/x86/idct_blk_mmx.c", ] libvpx_sources_intrin_x86_sse2 = [ - "vp8/common/x86/idct_blk_sse2.c", + "vp8/common/x86/idct_blk_sse2.c", - "vp9/common/x86/vp9_idct_intrin_sse2.c", + "vp9/common/x86/vp9_idct_intrin_sse2.c", - "vpx_dsp/x86/inv_txfm_sse2.c", - "vpx_dsp/x86/loopfilter_sse2.c", + "vpx_dsp/x86/inv_txfm_sse2.c", + "vpx_dsp/x86/loopfilter_sse2.c", ] libvpx_sources_intrin_x86_ssse3 = [ - "vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c" + "vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c" ] libvpx_sources_intrin_x86_avx2 = [ - "vpx_dsp/x86/loopfilter_avx2.c", - "vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c" + "vpx_dsp/x86/loopfilter_avx2.c", + "vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c" ] libvpx_sources_x86asm = [ - "vp8/common/x86/copy_sse2.asm", - "vp8/common/x86/copy_sse3.asm", - "vp8/common/x86/dequantize_mmx.asm", - "vp8/common/x86/idctllm_mmx.asm", - "vp8/common/x86/idctllm_sse2.asm", - "vp8/common/x86/iwalsh_mmx.asm", - "vp8/common/x86/iwalsh_sse2.asm", - "vp8/common/x86/loopfilter_sse2.asm", - "vp8/common/x86/recon_mmx.asm", - "vp8/common/x86/recon_sse2.asm", - "vp8/common/x86/subpixel_mmx.asm", - "vp8/common/x86/subpixel_sse2.asm", - "vp8/common/x86/subpixel_ssse3.asm", - "vp8/common/x86/vp8_loopfilter_mmx.asm", + "vp8/common/x86/copy_sse2.asm", + "vp8/common/x86/copy_sse3.asm", + "vp8/common/x86/dequantize_mmx.asm", + "vp8/common/x86/idctllm_mmx.asm", + "vp8/common/x86/idctllm_sse2.asm", + "vp8/common/x86/iwalsh_mmx.asm", + "vp8/common/x86/iwalsh_sse2.asm", + "vp8/common/x86/loopfilter_sse2.asm", + "vp8/common/x86/recon_mmx.asm", + "vp8/common/x86/recon_sse2.asm", + "vp8/common/x86/subpixel_mmx.asm", + "vp8/common/x86/subpixel_sse2.asm", + "vp8/common/x86/subpixel_ssse3.asm", + "vp8/common/x86/vp8_loopfilter_mmx.asm", - "vpx_dsp/x86/intrapred_sse2.asm", - "vpx_dsp/x86/intrapred_ssse3.asm", - "vpx_dsp/x86/inv_wht_sse2.asm", - "vpx_dsp/x86/vpx_convolve_copy_sse2.asm", - "vpx_dsp/x86/vpx_subpixel_8t_sse2.asm", - "vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm", - "vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm", - "vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm", + "vpx_dsp/x86/intrapred_sse2.asm", + "vpx_dsp/x86/intrapred_ssse3.asm", + "vpx_dsp/x86/inv_wht_sse2.asm", + "vpx_dsp/x86/vpx_convolve_copy_sse2.asm", + "vpx_dsp/x86/vpx_subpixel_8t_sse2.asm", + "vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm", + "vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm", + "vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm", - "vpx_ports/emms.asm" + "vpx_ports/emms.asm" ] libvpx_sources_x86_64asm = [ - "vp8/common/x86/loopfilter_block_sse2_x86_64.asm", + "vp8/common/x86/loopfilter_block_sse2_x86_64.asm", - "vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm" + "vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm" ] libvpx_sources_arm = [ - "vpx_ports/arm_cpudetect.c", + "vpx_ports/arm_cpudetect.c", - "vp8/common/arm/loopfilter_arm.c", + "vp8/common/arm/loopfilter_arm.c", ] libvpx_sources_arm_neon = [ - "vp8/common/arm/neon/bilinearpredict_neon.c", - "vp8/common/arm/neon/copymem_neon.c", - "vp8/common/arm/neon/dc_only_idct_add_neon.c", - "vp8/common/arm/neon/dequant_idct_neon.c", - "vp8/common/arm/neon/dequantizeb_neon.c", - "vp8/common/arm/neon/idct_blk_neon.c", - "vp8/common/arm/neon/idct_dequant_0_2x_neon.c", - "vp8/common/arm/neon/idct_dequant_full_2x_neon.c", - "vp8/common/arm/neon/iwalsh_neon.c", - "vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c", - "vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c", - "vp8/common/arm/neon/mbloopfilter_neon.c", - "vp8/common/arm/neon/shortidct4x4llm_neon.c", - "vp8/common/arm/neon/sixtappredict_neon.c", - "vp8/common/arm/neon/vp8_loopfilter_neon.c", + "vp8/common/arm/neon/bilinearpredict_neon.c", + "vp8/common/arm/neon/copymem_neon.c", + "vp8/common/arm/neon/dc_only_idct_add_neon.c", + "vp8/common/arm/neon/dequant_idct_neon.c", + "vp8/common/arm/neon/dequantizeb_neon.c", + "vp8/common/arm/neon/idct_blk_neon.c", + "vp8/common/arm/neon/idct_dequant_0_2x_neon.c", + "vp8/common/arm/neon/idct_dequant_full_2x_neon.c", + "vp8/common/arm/neon/iwalsh_neon.c", + "vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c", + "vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c", + "vp8/common/arm/neon/mbloopfilter_neon.c", + "vp8/common/arm/neon/shortidct4x4llm_neon.c", + "vp8/common/arm/neon/sixtappredict_neon.c", + "vp8/common/arm/neon/vp8_loopfilter_neon.c", - "vp9/common/arm/neon/vp9_iht4x4_add_neon.c", - "vp9/common/arm/neon/vp9_iht8x8_add_neon.c", + "vp9/common/arm/neon/vp9_iht4x4_add_neon.c", + "vp9/common/arm/neon/vp9_iht8x8_add_neon.c", - "vpx_dsp/arm/idct16x16_1_add_neon.c", - "vpx_dsp/arm/idct16x16_add_neon.c", - "vpx_dsp/arm/idct16x16_neon.c", - "vpx_dsp/arm/idct32x32_1_add_neon.c", - "vpx_dsp/arm/idct32x32_add_neon.c", - "vpx_dsp/arm/idct4x4_1_add_neon.c", - "vpx_dsp/arm/idct4x4_add_neon.c", - "vpx_dsp/arm/idct8x8_1_add_neon.c", - "vpx_dsp/arm/idct8x8_add_neon.c", - "vpx_dsp/arm/intrapred_neon.c", - "vpx_dsp/arm/loopfilter_16_neon.c", - "vpx_dsp/arm/loopfilter_4_neon.c", - "vpx_dsp/arm/loopfilter_8_neon.c", - "vpx_dsp/arm/loopfilter_neon.c", - "vpx_dsp/arm/vpx_convolve8_avg_neon.c", - "vpx_dsp/arm/vpx_convolve8_neon.c", - "vpx_dsp/arm/vpx_convolve_avg_neon.c", - "vpx_dsp/arm/vpx_convolve_copy_neon.c", - "vpx_dsp/arm/vpx_convolve_neon.c" + "vpx_dsp/arm/idct16x16_1_add_neon.c", + "vpx_dsp/arm/idct16x16_add_neon.c", + "vpx_dsp/arm/idct16x16_neon.c", + "vpx_dsp/arm/idct32x32_1_add_neon.c", + "vpx_dsp/arm/idct32x32_add_neon.c", + "vpx_dsp/arm/idct4x4_1_add_neon.c", + "vpx_dsp/arm/idct4x4_add_neon.c", + "vpx_dsp/arm/idct8x8_1_add_neon.c", + "vpx_dsp/arm/idct8x8_add_neon.c", + "vpx_dsp/arm/intrapred_neon.c", + "vpx_dsp/arm/loopfilter_16_neon.c", + "vpx_dsp/arm/loopfilter_4_neon.c", + "vpx_dsp/arm/loopfilter_8_neon.c", + "vpx_dsp/arm/loopfilter_neon.c", + "vpx_dsp/arm/vpx_convolve8_avg_neon.c", + "vpx_dsp/arm/vpx_convolve8_neon.c", + "vpx_dsp/arm/vpx_convolve_avg_neon.c", + "vpx_dsp/arm/vpx_convolve_copy_neon.c", + "vpx_dsp/arm/vpx_convolve_neon.c" ] libvpx_sources_arm_neon_gas = [ - "vpx_dsp/arm/gas/intrapred_neon_asm.s", - "vpx_dsp/arm/gas/loopfilter_mb_neon.s", - "vpx_dsp/arm/gas/save_reg_neon.s" + "vpx_dsp/arm/gas/intrapred_neon_asm.s", + "vpx_dsp/arm/gas/loopfilter_mb_neon.s", + "vpx_dsp/arm/gas/save_reg_neon.s" ] libvpx_sources_arm_neon_armasm_ms = [ - "vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm", - "vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm", - "vpx_dsp/arm/armasm_ms/save_reg_neon.asm" + "vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm", + "vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm", + "vpx_dsp/arm/armasm_ms/save_reg_neon.asm" ] libvpx_sources_arm_neon_gas_apple = [ - "vpx_dsp/arm/gas_apple/intrapred_neon_asm.s", - "vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s", - "vpx_dsp/arm/gas_apple/save_reg_neon.s" + "vpx_dsp/arm/gas_apple/intrapred_neon_asm.s", + "vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s", + "vpx_dsp/arm/gas_apple/save_reg_neon.s" ] libvpx_sources = [libvpx_dir + file for file in libvpx_sources] @@ -258,133 +258,133 @@ osx_fat = (env["platform"] == 'osx' and cpu_bits == 'fat') webm_cpu_x86 = False webm_cpu_arm = False if env["platform"] == 'winrt': - if 'arm' in env["PROGSUFFIX"]: - webm_cpu_arm = True - else: - webm_cpu_x86 = True + if 'arm' in env["PROGSUFFIX"]: + webm_cpu_arm = True + else: + webm_cpu_x86 = True else: - is_android_x86 = (env["platform"] == 'android' and env["android_arch"] == 'x86') - if is_android_x86: - cpu_bits = '32' - if osx_fat: - webm_cpu_x86 = True - else: - webm_cpu_x86 = (cpu_bits == '32' or cpu_bits == '64') and (env["platform"] == 'windows' or env["platform"] == 'x11' or env["platform"] == 'osx' or env["platform"] == 'haiku' or is_android_x86) - webm_cpu_arm = env["platform"] == 'iphone' or env["platform"] == 'bb10' or (env["platform"] == 'android' and env["android_arch"] != 'x86') + is_android_x86 = (env["platform"] == 'android' and env["android_arch"] == 'x86') + if is_android_x86: + cpu_bits = '32' + if osx_fat: + webm_cpu_x86 = True + else: + webm_cpu_x86 = (cpu_bits == '32' or cpu_bits == '64') and (env["platform"] == 'windows' or env["platform"] == 'x11' or env["platform"] == 'osx' or env["platform"] == 'haiku' or is_android_x86) + webm_cpu_arm = env["platform"] == 'iphone' or env["platform"] == 'bb10' or (env["platform"] == 'android' and env["android_arch"] != 'x86') if webm_cpu_x86: - import subprocess - import os + import subprocess + import os - yasm_paths = [ - "yasm", - "../../../yasm", - ] + yasm_paths = [ + "yasm", + "../../../yasm", + ] - yasm_found = False + yasm_found = False - devnull = open(os.devnull) - for yasm_path in yasm_paths: - try: - yasm_found = True - subprocess.Popen([yasm_path, "--version"], stdout=devnull, stderr=devnull).communicate() - except: - yasm_found = False - if yasm_found: - break + devnull = open(os.devnull) + for yasm_path in yasm_paths: + try: + yasm_found = True + subprocess.Popen([yasm_path, "--version"], stdout=devnull, stderr=devnull).communicate() + except: + yasm_found = False + if yasm_found: + break - if not yasm_found: - webm_cpu_x86 = False - print "YASM is necessary for WebM SIMD optimizations." + if not yasm_found: + webm_cpu_x86 = False + print "YASM is necessary for WebM SIMD optimizations." webm_simd_optimizations = False if webm_cpu_x86: - if osx_fat: - #'osx' platform only: run python script which will compile using 'yasm' command and then merge 32-bit and 64-bit using 'lipo' command - env_libvpx["AS"] = 'python modules/webm/libvpx/yasm_osx_fat.py' - env_libvpx["ASFLAGS"] = '-I' + libvpx_dir[1:] - env_libvpx["ASCOM"] = '$AS $ASFLAGS $TARGET $SOURCES' - else: - if env["platform"] == 'windows' or env["platform"] == 'winrt': - env_libvpx["ASFORMAT"] = 'win' - elif env["platform"] == 'osx': - env_libvpx["ASFORMAT"] = 'macho' - else: - env_libvpx["ASFORMAT"] = 'elf' - env_libvpx["ASFORMAT"] += cpu_bits + if osx_fat: + #'osx' platform only: run python script which will compile using 'yasm' command and then merge 32-bit and 64-bit using 'lipo' command + env_libvpx["AS"] = 'python modules/webm/libvpx/yasm_osx_fat.py' + env_libvpx["ASFLAGS"] = '-I' + libvpx_dir[1:] + env_libvpx["ASCOM"] = '$AS $ASFLAGS $TARGET $SOURCES' + else: + if env["platform"] == 'windows' or env["platform"] == 'winrt': + env_libvpx["ASFORMAT"] = 'win' + elif env["platform"] == 'osx': + env_libvpx["ASFORMAT"] = 'macho' + else: + env_libvpx["ASFORMAT"] = 'elf' + env_libvpx["ASFORMAT"] += cpu_bits - env_libvpx["AS"] = 'yasm' - env_libvpx["ASFLAGS"] = '-I' + libvpx_dir[1:] + ' -f $ASFORMAT -D $ASCPU' - env_libvpx["ASCOM"] = '$AS $ASFLAGS -o $TARGET $SOURCES' + env_libvpx["AS"] = 'yasm' + env_libvpx["ASFLAGS"] = '-I' + libvpx_dir[1:] + ' -f $ASFORMAT -D $ASCPU' + env_libvpx["ASCOM"] = '$AS $ASFLAGS -o $TARGET $SOURCES' - if cpu_bits == '32': - env_libvpx["ASCPU"] = 'X86_32' - elif cpu_bits == '64': - env_libvpx["ASCPU"] = 'X86_64' + if cpu_bits == '32': + env_libvpx["ASCPU"] = 'X86_32' + elif cpu_bits == '64': + env_libvpx["ASCPU"] = 'X86_64' - env_libvpx.Append(CCFLAGS=['-DWEBM_X86ASM']) + env_libvpx.Append(CCFLAGS=['-DWEBM_X86ASM']) - webm_simd_optimizations = True + webm_simd_optimizations = True if webm_cpu_arm: - if env["platform"] == 'iphone': - env_libvpx["ASFLAGS"] = '-arch armv7' - elif env["platform"] == 'android': - env_libvpx["ASFLAGS"] = '-mfpu=neon' - elif env["platform"] == 'winrt': - env_libvpx["AS"] = 'armasm' - env_libvpx["ASFLAGS"] = '' - env_libvpx["ASCOM"] = '$AS $ASFLAGS -o $TARGET $SOURCES' + if env["platform"] == 'iphone': + env_libvpx["ASFLAGS"] = '-arch armv7' + elif env["platform"] == 'android': + env_libvpx["ASFLAGS"] = '-mfpu=neon' + elif env["platform"] == 'winrt': + env_libvpx["AS"] = 'armasm' + env_libvpx["ASFLAGS"] = '' + env_libvpx["ASCOM"] = '$AS $ASFLAGS -o $TARGET $SOURCES' - env_libvpx.Append(CCFLAGS=['-DWEBM_ARMASM']) + env_libvpx.Append(CCFLAGS=['-DWEBM_ARMASM']) - webm_simd_optimizations = True + webm_simd_optimizations = True if webm_simd_optimizations == False: - print "WebM SIMD optimizations are disabled. Check if your CPU architecture, CPU bits or platform are supported!" + print "WebM SIMD optimizations are disabled. Check if your CPU architecture, CPU bits or platform are supported!" env_libvpx.add_source_files(env.modules_sources, libvpx_sources) if webm_cpu_x86: - is_clang_or_gcc = ('gcc' in env["CC"]) or ('clang' in env["CC"]) + is_clang_or_gcc = ('gcc' in env["CC"]) or ('clang' in env["CC"]) - env_libvpx_mmx = env_libvpx.Clone() - if cpu_bits == '32' and is_clang_or_gcc: - env_libvpx_mmx.Append(CCFLAGS=['-mmmx']) - env_libvpx_mmx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_mmx) + env_libvpx_mmx = env_libvpx.Clone() + if cpu_bits == '32' and is_clang_or_gcc: + env_libvpx_mmx.Append(CCFLAGS=['-mmmx']) + env_libvpx_mmx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_mmx) - env_libvpx_sse2 = env_libvpx.Clone() - if cpu_bits == '32' and is_clang_or_gcc: - env_libvpx_sse2.Append(CCFLAGS=['-msse2']) - env_libvpx_sse2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_sse2) + env_libvpx_sse2 = env_libvpx.Clone() + if cpu_bits == '32' and is_clang_or_gcc: + env_libvpx_sse2.Append(CCFLAGS=['-msse2']) + env_libvpx_sse2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_sse2) - env_libvpx_ssse3 = env_libvpx.Clone() - if is_clang_or_gcc: - env_libvpx_ssse3.Append(CCFLAGS=['-mssse3']) - env_libvpx_ssse3.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_ssse3) + env_libvpx_ssse3 = env_libvpx.Clone() + if is_clang_or_gcc: + env_libvpx_ssse3.Append(CCFLAGS=['-mssse3']) + env_libvpx_ssse3.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_ssse3) - env_libvpx_avx2 = env_libvpx.Clone() - if is_clang_or_gcc: - env_libvpx_avx2.Append(CCFLAGS=['-mavx2']) - env_libvpx_avx2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_avx2) + env_libvpx_avx2 = env_libvpx.Clone() + if is_clang_or_gcc: + env_libvpx_avx2.Append(CCFLAGS=['-mavx2']) + env_libvpx_avx2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_avx2) - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86) + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86) - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86asm) - if cpu_bits == '64' or osx_fat: - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86_64asm) + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86asm) + if cpu_bits == '64' or osx_fat: + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86_64asm) elif webm_cpu_arm: - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm) + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm) - env_libvpx_neon = env_libvpx.Clone() - if env["platform"] == 'android' and env["android_arch"] == 'armv6': - env_libvpx_neon.Append(CCFLAGS=['-mfpu=neon']) - env_libvpx_neon.add_source_files(env.modules_sources, libvpx_sources_arm_neon) + env_libvpx_neon = env_libvpx.Clone() + if env["platform"] == 'android' and env["android_arch"] == 'armv6': + env_libvpx_neon.Append(CCFLAGS=['-mfpu=neon']) + env_libvpx_neon.add_source_files(env.modules_sources, libvpx_sources_arm_neon) - if env["platform"] == 'winrt': - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_armasm_ms) - elif env["platform"] == 'iphone': - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas_apple) - else: - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas) + if env["platform"] == 'winrt': + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_armasm_ms) + elif env["platform"] == 'iphone': + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas_apple) + else: + env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas) diff --git a/modules/webm/libvpx/yasm_osx_fat.py b/modules/webm/libvpx/yasm_osx_fat.py index bcee50b3891..c39b95c7803 100644 --- a/modules/webm/libvpx/yasm_osx_fat.py +++ b/modules/webm/libvpx/yasm_osx_fat.py @@ -14,25 +14,25 @@ lipo_command = '' exit_code = 1 for arch in ['32', '64']: - if arch == '32' and input_file.endswith('x86_64.asm'): - can_remove[arch] = False - else: - command = 'yasm ' + includes + ' -f macho' + arch + ' -D X86_' + arch + ' -o ' + output_file + '.' + arch + ' ' + input_file - print(command) - if os.system(command) == 0: - lipo_command += output_file + '.' + arch + ' ' - can_remove[arch] = True - else: - can_remove[arch] = False + if arch == '32' and input_file.endswith('x86_64.asm'): + can_remove[arch] = False + else: + command = 'yasm ' + includes + ' -f macho' + arch + ' -D X86_' + arch + ' -o ' + output_file + '.' + arch + ' ' + input_file + print(command) + if os.system(command) == 0: + lipo_command += output_file + '.' + arch + ' ' + can_remove[arch] = True + else: + can_remove[arch] = False if lipo_command != '': - lipo_command = 'lipo -create ' + lipo_command + '-output ' + output_file - print(lipo_command) - if os.system(lipo_command) == 0: - exit_code = 0 + lipo_command = 'lipo -create ' + lipo_command + '-output ' + output_file + print(lipo_command) + if os.system(lipo_command) == 0: + exit_code = 0 for arch in ['32', '64']: - if can_remove[arch]: - os.remove(output_file + '.' + arch) + if can_remove[arch]: + os.remove(output_file + '.' + arch) sys.exit(exit_code) diff --git a/modules/webp/SCsub b/modules/webp/SCsub index 38585a1ff2c..57dde509e3b 100644 --- a/modules/webp/SCsub +++ b/modules/webp/SCsub @@ -7,115 +7,115 @@ env_webp = env_modules.Clone() # Thirdparty source files if (env["libwebp"] != "system"): # builtin - thirdparty_dir = "#thirdparty/libwebp/" - thirdparty_sources = [ - "enc/webpenc.c", - "enc/near_lossless.c", - "enc/frame.c", - "enc/alpha.c", - "enc/picture_csp.c", - "enc/vp8l.c", - "enc/picture_psnr.c", - "enc/delta_palettization.c", - "enc/syntax.c", - "enc/backward_references.c", - "enc/token.c", - "enc/analysis.c", - "enc/iterator.c", - "enc/picture_tools.c", - "enc/picture_rescale.c", - "enc/config.c", - "enc/tree.c", - "enc/cost.c", - "enc/picture.c", - "enc/quant.c", - "enc/filter.c", - "enc/histogram.c", - "utils/rescaler.c", - "utils/filters.c", - "utils/quant_levels_dec.c", - "utils/huffman.c", - "utils/thread.c", - "utils/quant_levels.c", - "utils/bit_writer.c", - "utils/bit_reader.c", - "utils/random.c", - "utils/utils.c", - "utils/huffman_encode.c", - "utils/color_cache.c", - "mux/muxinternal.c", - "mux/muxread.c", - "mux/anim_encode.c", - "mux/muxedit.c", - "dec/webp.c", - "dec/frame.c", - "dec/alpha.c", - "dec/vp8l.c", - "dec/io.c", - "dec/vp8.c", - "dec/idec.c", - "dec/tree.c", - "dec/buffer.c", - "dec/quant.c", - "demux/demux.c", - "demux/anim_decode.c", - "dsp/yuv.c", - "dsp/filters_sse2.c", - "dsp/dec_sse41.c", - "dsp/rescaler.c", - "dsp/lossless_sse2.c", - "dsp/alpha_processing_sse41.c", - "dsp/alpha_processing_sse2.c", - "dsp/filters.c", - "dsp/upsampling_mips_dsp_r2.c", - "dsp/dec_neon.c", - "dsp/enc_neon.c", - "dsp/lossless_enc_mips32.c", - "dsp/lossless_enc_sse2.c", - "dsp/upsampling.c", - "dsp/lossless_enc_neon.c", - "dsp/alpha_processing.c", - "dsp/cost_sse2.c", - "dsp/dec_mips32.c", - "dsp/enc_avx2.c", - "dsp/rescaler_mips32.c", - "dsp/enc.c", - "dsp/lossless_enc_sse41.c", - "dsp/cost_mips32.c", - "dsp/lossless_mips_dsp_r2.c", - "dsp/filters_mips_dsp_r2.c", - "dsp/upsampling_neon.c", - "dsp/alpha_processing_mips_dsp_r2.c", - "dsp/enc_mips_dsp_r2.c", - "dsp/lossless.c", - "dsp/yuv_mips_dsp_r2.c", - "dsp/cost_mips_dsp_r2.c", - "dsp/argb.c", - "dsp/dec_sse2.c", - "dsp/rescaler_sse2.c", - "dsp/enc_sse41.c", - "dsp/argb_mips_dsp_r2.c", - "dsp/lossless_enc_mips_dsp_r2.c", - "dsp/dec_clip_tables.c", - "dsp/yuv_mips32.c", - "dsp/cpu.c", - "dsp/dec.c", - "dsp/argb_sse2.c", - "dsp/lossless_neon.c", - "dsp/lossless_enc.c", - "dsp/enc_mips32.c", - "dsp/cost.c", - "dsp/rescaler_mips_dsp_r2.c", - "dsp/dec_mips_dsp_r2.c", - "dsp/rescaler_neon.c", - "dsp/yuv_sse2.c", - "dsp/enc_sse2.c", - "dsp/upsampling_sse2.c", - ] - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + thirdparty_dir = "#thirdparty/libwebp/" + thirdparty_sources = [ + "enc/webpenc.c", + "enc/near_lossless.c", + "enc/frame.c", + "enc/alpha.c", + "enc/picture_csp.c", + "enc/vp8l.c", + "enc/picture_psnr.c", + "enc/delta_palettization.c", + "enc/syntax.c", + "enc/backward_references.c", + "enc/token.c", + "enc/analysis.c", + "enc/iterator.c", + "enc/picture_tools.c", + "enc/picture_rescale.c", + "enc/config.c", + "enc/tree.c", + "enc/cost.c", + "enc/picture.c", + "enc/quant.c", + "enc/filter.c", + "enc/histogram.c", + "utils/rescaler.c", + "utils/filters.c", + "utils/quant_levels_dec.c", + "utils/huffman.c", + "utils/thread.c", + "utils/quant_levels.c", + "utils/bit_writer.c", + "utils/bit_reader.c", + "utils/random.c", + "utils/utils.c", + "utils/huffman_encode.c", + "utils/color_cache.c", + "mux/muxinternal.c", + "mux/muxread.c", + "mux/anim_encode.c", + "mux/muxedit.c", + "dec/webp.c", + "dec/frame.c", + "dec/alpha.c", + "dec/vp8l.c", + "dec/io.c", + "dec/vp8.c", + "dec/idec.c", + "dec/tree.c", + "dec/buffer.c", + "dec/quant.c", + "demux/demux.c", + "demux/anim_decode.c", + "dsp/yuv.c", + "dsp/filters_sse2.c", + "dsp/dec_sse41.c", + "dsp/rescaler.c", + "dsp/lossless_sse2.c", + "dsp/alpha_processing_sse41.c", + "dsp/alpha_processing_sse2.c", + "dsp/filters.c", + "dsp/upsampling_mips_dsp_r2.c", + "dsp/dec_neon.c", + "dsp/enc_neon.c", + "dsp/lossless_enc_mips32.c", + "dsp/lossless_enc_sse2.c", + "dsp/upsampling.c", + "dsp/lossless_enc_neon.c", + "dsp/alpha_processing.c", + "dsp/cost_sse2.c", + "dsp/dec_mips32.c", + "dsp/enc_avx2.c", + "dsp/rescaler_mips32.c", + "dsp/enc.c", + "dsp/lossless_enc_sse41.c", + "dsp/cost_mips32.c", + "dsp/lossless_mips_dsp_r2.c", + "dsp/filters_mips_dsp_r2.c", + "dsp/upsampling_neon.c", + "dsp/alpha_processing_mips_dsp_r2.c", + "dsp/enc_mips_dsp_r2.c", + "dsp/lossless.c", + "dsp/yuv_mips_dsp_r2.c", + "dsp/cost_mips_dsp_r2.c", + "dsp/argb.c", + "dsp/dec_sse2.c", + "dsp/rescaler_sse2.c", + "dsp/enc_sse41.c", + "dsp/argb_mips_dsp_r2.c", + "dsp/lossless_enc_mips_dsp_r2.c", + "dsp/dec_clip_tables.c", + "dsp/yuv_mips32.c", + "dsp/cpu.c", + "dsp/dec.c", + "dsp/argb_sse2.c", + "dsp/lossless_neon.c", + "dsp/lossless_enc.c", + "dsp/enc_mips32.c", + "dsp/cost.c", + "dsp/rescaler_mips_dsp_r2.c", + "dsp/dec_mips_dsp_r2.c", + "dsp/rescaler_neon.c", + "dsp/yuv_sse2.c", + "dsp/enc_sse2.c", + "dsp/upsampling_sse2.c", + ] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - env_webp.add_source_files(env.modules_sources, thirdparty_sources) - env_webp.Append(CPPPATH = [thirdparty_dir]) + env_webp.add_source_files(env.modules_sources, thirdparty_sources) + env_webp.Append(CPPPATH = [thirdparty_dir]) # Godot source files env_webp.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/webp/config.py b/modules/webp/config.py index 368e97e152c..3de0425119c 100644 --- a/modules/webp/config.py +++ b/modules/webp/config.py @@ -1,6 +1,6 @@ def can_build(platform): - return True + return True def configure(env): - pass + pass diff --git a/platform/android/SCsub b/platform/android/SCsub index 4f9a9709cb8..9db6636f8b3 100644 --- a/platform/android/SCsub +++ b/platform/android/SCsub @@ -6,20 +6,20 @@ Import('env') android_files = [ - 'os_android.cpp', - 'godot_android.cpp', - 'file_access_android.cpp', - 'dir_access_android.cpp', - 'audio_driver_opensl.cpp', - 'file_access_jandroid.cpp', - 'dir_access_jandroid.cpp', - 'thread_jandroid.cpp', - 'audio_driver_jandroid.cpp', - 'ifaddrs_android.cpp', - 'android_native_app_glue.c', - 'java_glue.cpp', - 'cpu-features.c', - 'java_class_wrapper.cpp' + 'os_android.cpp', + 'godot_android.cpp', + 'file_access_android.cpp', + 'dir_access_android.cpp', + 'audio_driver_opensl.cpp', + 'file_access_jandroid.cpp', + 'dir_access_jandroid.cpp', + 'thread_jandroid.cpp', + 'audio_driver_jandroid.cpp', + 'ifaddrs_android.cpp', + 'android_native_app_glue.c', + 'java_glue.cpp', + 'cpu-features.c', + 'java_class_wrapper.cpp' ] #env.Depends('#core/math/vector3.h', 'vector3_psp.h') @@ -28,11 +28,11 @@ android_files = [ env_android = env.Clone() if env['target'] == "profile": - env_android.Append(CPPFLAGS=['-DPROFILER_ENABLED']) + env_android.Append(CPPFLAGS=['-DPROFILER_ENABLED']) android_objects=[] for x in android_files: - android_objects.append( env_android.SharedObject( x ) ) + android_objects.append( env_android.SharedObject( x ) ) prog = None @@ -48,43 +48,43 @@ gradle_text = gradle_basein.read() gradle_maven_repos_text="" if len(env.android_maven_repos) > 0: - gradle_maven_repos_text+="maven {\n" - for x in env.android_maven_repos: - gradle_maven_repos_text+="\t\t"+x+"\n" - gradle_maven_repos_text+="\t}\n" + gradle_maven_repos_text+="maven {\n" + for x in env.android_maven_repos: + gradle_maven_repos_text+="\t\t"+x+"\n" + gradle_maven_repos_text+="\t}\n" gradle_maven_dependencies_text="" for x in env.android_dependencies: - gradle_maven_dependencies_text+=x+"\n" + gradle_maven_dependencies_text+=x+"\n" gradle_java_dirs_text="" for x in env.android_java_dirs: - gradle_java_dirs_text+=",'"+x.replace("\\","/")+"'" + gradle_java_dirs_text+=",'"+x.replace("\\","/")+"'" gradle_res_dirs_text="" for x in env.android_res_dirs: - gradle_res_dirs_text+=",'"+x.replace("\\","/")+"'" + gradle_res_dirs_text+=",'"+x.replace("\\","/")+"'" gradle_aidl_dirs_text="" for x in env.android_aidl_dirs: - gradle_aidl_dirs_text+=",'"+x.replace("\\","/")+"'" + gradle_aidl_dirs_text+=",'"+x.replace("\\","/")+"'" gradle_jni_dirs_text="" for x in env.android_jni_dirs: - gradle_jni_dirs_text+=",'"+x.replace("\\","/")+"'" + gradle_jni_dirs_text+=",'"+x.replace("\\","/")+"'" gradle_asset_dirs_text="" gradle_default_config_text="" for x in env.android_default_config: - gradle_default_config_text+=x+"\n\t\t" + gradle_default_config_text+=x+"\n\t\t" gradle_text = gradle_text.replace("$$GRADLE_REPOSITORY_URLS$$",gradle_maven_repos_text) gradle_text = gradle_text.replace("$$GRADLE_DEPENDENCIES$$",gradle_maven_dependencies_text) @@ -114,19 +114,19 @@ env_android.SharedLibrary("#bin/libgodot",[android_objects],SHLIBSUFFIX=env["SHL lib_arch_dir = '' if env['android_arch'] == 'armv6': - lib_arch_dir = 'armeabi' + lib_arch_dir = 'armeabi' elif env['android_arch'] == 'armv7': - lib_arch_dir = 'armeabi-v7a' + lib_arch_dir = 'armeabi-v7a' elif env['android_arch'] == 'x86': - lib_arch_dir = 'x86' + lib_arch_dir = 'x86' else: - print 'WARN: Architecture not suitable for embedding into APK; keeping .so at \\bin' + print 'WARN: Architecture not suitable for embedding into APK; keeping .so at \\bin' if lib_arch_dir != '': - if env['target'] == 'release': - lib_type_dir = 'release' - else: # release_debug, debug - lib_type_dir = 'debug' - - out_dir = '#platform/android/java/libs/'+lib_type_dir+'/'+lib_arch_dir - env_android.Command(out_dir+'/libgodot_android.so', '#bin/libgodot'+env['SHLIBSUFFIX'], Move("$TARGET", "$SOURCE")) + if env['target'] == 'release': + lib_type_dir = 'release' + else: # release_debug, debug + lib_type_dir = 'debug' + + out_dir = '#platform/android/java/libs/'+lib_type_dir+'/'+lib_arch_dir + env_android.Command(out_dir+'/libgodot_android.so', '#bin/libgodot'+env['SHLIBSUFFIX'], Move("$TARGET", "$SOURCE")) diff --git a/platform/android/detect.py b/platform/android/detect.py index 842036f9869..9a144528621 100644 --- a/platform/android/detect.py +++ b/platform/android/detect.py @@ -4,247 +4,247 @@ import string import platform def is_active(): - return True + return True def get_name(): - return "Android" + return "Android" def can_build(): - import os - if (not os.environ.has_key("ANDROID_NDK_ROOT")): - return False + import os + if (not os.environ.has_key("ANDROID_NDK_ROOT")): + return False - return True + return True def get_opts(): - return [ - ('ANDROID_NDK_ROOT', 'the path to Android NDK', os.environ.get("ANDROID_NDK_ROOT", 0)), - ('NDK_TARGET', 'toolchain to use for the NDK',os.environ.get("NDK_TARGET", "arm-linux-androideabi-4.9")), - ('NDK_TARGET_X86', 'toolchain to use for the NDK x86',os.environ.get("NDK_TARGET_X86", "x86-4.9")), - ('ndk_platform', 'compile for platform: (android- , example: android-14)',"android-14"), - ('android_arch', 'select compiler architecture: (armv7/armv6/x86)',"armv7"), - ('android_neon','enable neon (armv7 only)',"yes"), - ('android_stl','enable STL support in android port (for modules)',"no") - ] + return [ + ('ANDROID_NDK_ROOT', 'the path to Android NDK', os.environ.get("ANDROID_NDK_ROOT", 0)), + ('NDK_TARGET', 'toolchain to use for the NDK',os.environ.get("NDK_TARGET", "arm-linux-androideabi-4.9")), + ('NDK_TARGET_X86', 'toolchain to use for the NDK x86',os.environ.get("NDK_TARGET_X86", "x86-4.9")), + ('ndk_platform', 'compile for platform: (android- , example: android-14)',"android-14"), + ('android_arch', 'select compiler architecture: (armv7/armv6/x86)',"armv7"), + ('android_neon','enable neon (armv7 only)',"yes"), + ('android_stl','enable STL support in android port (for modules)',"no") + ] def get_flags(): - return [ - ('tools', 'no'), - ('openssl', 'builtin'), #use builtin openssl - ] + return [ + ('tools', 'no'), + ('openssl', 'builtin'), #use builtin openssl + ] def create(env): - tools = env['TOOLS'] - if "mingw" in tools: - tools.remove('mingw') - if "applelink" in tools: - tools.remove("applelink") - env.Tool('gcc') - return env.Clone(tools=tools); + tools = env['TOOLS'] + if "mingw" in tools: + tools.remove('mingw') + if "applelink" in tools: + tools.remove("applelink") + env.Tool('gcc') + return env.Clone(tools=tools); def configure(env): - # Workaround for MinGW. See: - # http://www.scons.org/wiki/LongCmdLinesOnWin32 - import os - if (os.name=="nt"): + # Workaround for MinGW. See: + # http://www.scons.org/wiki/LongCmdLinesOnWin32 + import os + if (os.name=="nt"): - import subprocess + import subprocess - def mySubProcess(cmdline,env): - #print "SPAWNED : " + cmdline - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, startupinfo=startupinfo, shell = False, env = env) - data, err = proc.communicate() - rv = proc.wait() - if rv: - print "=====" - print err - print "=====" - return rv + def mySubProcess(cmdline,env): + #print "SPAWNED : " + cmdline + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, startupinfo=startupinfo, shell = False, env = env) + data, err = proc.communicate() + rv = proc.wait() + if rv: + print "=====" + print err + print "=====" + return rv - def mySpawn(sh, escape, cmd, args, env): + def mySpawn(sh, escape, cmd, args, env): - newargs = ' '.join(args[1:]) - cmdline = cmd + " " + newargs + newargs = ' '.join(args[1:]) + cmdline = cmd + " " + newargs - rv=0 - if len(cmdline) > 32000 and cmd.endswith("ar") : - cmdline = cmd + " " + args[1] + " " + args[2] + " " - for i in range(3,len(args)) : - rv = mySubProcess( cmdline + args[i], env ) - if rv : - break - else: - rv = mySubProcess( cmdline, env ) + rv=0 + if len(cmdline) > 32000 and cmd.endswith("ar") : + cmdline = cmd + " " + args[1] + " " + args[2] + " " + for i in range(3,len(args)) : + rv = mySubProcess( cmdline + args[i], env ) + if rv : + break + else: + rv = mySubProcess( cmdline, env ) - return rv + return rv - env['SPAWN'] = mySpawn + env['SPAWN'] = mySpawn - ndk_platform=env['ndk_platform'] + ndk_platform=env['ndk_platform'] - if env['android_arch'] not in ['armv7','armv6','x86']: - env['android_arch']='armv7' + if env['android_arch'] not in ['armv7','armv6','x86']: + env['android_arch']='armv7' - if env['android_arch']=='x86': - env['NDK_TARGET']=env['NDK_TARGET_X86'] - env["x86_libtheora_opt_gcc"]=True + if env['android_arch']=='x86': + env['NDK_TARGET']=env['NDK_TARGET_X86'] + env["x86_libtheora_opt_gcc"]=True - if env['PLATFORM'] == 'win32': - env.Tool('gcc') - env['SHLIBSUFFIX'] = '.so' + if env['PLATFORM'] == 'win32': + env.Tool('gcc') + env['SHLIBSUFFIX'] = '.so' - neon_text="" - if env["android_arch"]=="armv7" and env['android_neon']=='yes': - neon_text=" (with neon)" - print("Godot Android!!!!! ("+env['android_arch']+")"+neon_text) + neon_text="" + if env["android_arch"]=="armv7" and env['android_neon']=='yes': + neon_text=" (with neon)" + print("Godot Android!!!!! ("+env['android_arch']+")"+neon_text) - env.Append(CPPPATH=['#platform/android']) + env.Append(CPPPATH=['#platform/android']) - if env['android_arch']=='x86': - env.extra_suffix=".x86"+env.extra_suffix - elif env['android_arch']=='armv6': - env.extra_suffix=".armv6"+env.extra_suffix - elif env["android_arch"]=="armv7": - if env['android_neon']=='yes': - env.extra_suffix=".armv7.neon"+env.extra_suffix - else: - env.extra_suffix=".armv7"+env.extra_suffix + if env['android_arch']=='x86': + env.extra_suffix=".x86"+env.extra_suffix + elif env['android_arch']=='armv6': + env.extra_suffix=".armv6"+env.extra_suffix + elif env["android_arch"]=="armv7": + if env['android_neon']=='yes': + env.extra_suffix=".armv7.neon"+env.extra_suffix + else: + env.extra_suffix=".armv7"+env.extra_suffix - gcc_path=env["ANDROID_NDK_ROOT"]+"/toolchains/"+env["NDK_TARGET"]+"/prebuilt/"; + gcc_path=env["ANDROID_NDK_ROOT"]+"/toolchains/"+env["NDK_TARGET"]+"/prebuilt/"; - if (sys.platform.startswith("linux")): - if (platform.machine().endswith('64')): - gcc_path=gcc_path+"/linux-x86_64/bin" - else: - gcc_path=gcc_path+"/linux-x86/bin" - elif (sys.platform.startswith("darwin")): - gcc_path=gcc_path+"/darwin-x86_64/bin" - env['SHLINKFLAGS'][1] = '-shared' - env['SHLIBSUFFIX'] = '.so' - elif (sys.platform.startswith('win')): - if (platform.machine().endswith('64')): - gcc_path=gcc_path+"/windows-x86_64/bin" - else: - gcc_path=gcc_path+"/windows-x86/bin" + if (sys.platform.startswith("linux")): + if (platform.machine().endswith('64')): + gcc_path=gcc_path+"/linux-x86_64/bin" + else: + gcc_path=gcc_path+"/linux-x86/bin" + elif (sys.platform.startswith("darwin")): + gcc_path=gcc_path+"/darwin-x86_64/bin" + env['SHLINKFLAGS'][1] = '-shared' + env['SHLIBSUFFIX'] = '.so' + elif (sys.platform.startswith('win')): + if (platform.machine().endswith('64')): + gcc_path=gcc_path+"/windows-x86_64/bin" + else: + gcc_path=gcc_path+"/windows-x86/bin" - env['ENV']['PATH'] = gcc_path+":"+env['ENV']['PATH'] - if env['android_arch']=='x86': - env['CC'] = gcc_path+'/i686-linux-android-gcc' - env['CXX'] = gcc_path+'/i686-linux-android-g++' - env['AR'] = gcc_path+"/i686-linux-android-ar" - env['RANLIB'] = gcc_path+"/i686-linux-android-ranlib" - env['AS'] = gcc_path+"/i686-linux-android-as" - else: - env['CC'] = gcc_path+'/arm-linux-androideabi-gcc' - env['CXX'] = gcc_path+'/arm-linux-androideabi-g++' - env['AR'] = gcc_path+"/arm-linux-androideabi-ar" - env['RANLIB'] = gcc_path+"/arm-linux-androideabi-ranlib" - env['AS'] = gcc_path+"/arm-linux-androideabi-as" + env['ENV']['PATH'] = gcc_path+":"+env['ENV']['PATH'] + if env['android_arch']=='x86': + env['CC'] = gcc_path+'/i686-linux-android-gcc' + env['CXX'] = gcc_path+'/i686-linux-android-g++' + env['AR'] = gcc_path+"/i686-linux-android-ar" + env['RANLIB'] = gcc_path+"/i686-linux-android-ranlib" + env['AS'] = gcc_path+"/i686-linux-android-as" + else: + env['CC'] = gcc_path+'/arm-linux-androideabi-gcc' + env['CXX'] = gcc_path+'/arm-linux-androideabi-g++' + env['AR'] = gcc_path+"/arm-linux-androideabi-ar" + env['RANLIB'] = gcc_path+"/arm-linux-androideabi-ranlib" + env['AS'] = gcc_path+"/arm-linux-androideabi-as" - if env['android_arch']=='x86': - env['ARCH'] = 'arch-x86' - else: - env['ARCH'] = 'arch-arm' + if env['android_arch']=='x86': + env['ARCH'] = 'arch-x86' + else: + env['ARCH'] = 'arch-arm' - import string - #include path - gcc_include=env["ANDROID_NDK_ROOT"]+"/platforms/"+ndk_platform+"/"+env['ARCH'] +"/usr/include" - ld_sysroot=env["ANDROID_NDK_ROOT"]+"/platforms/"+ndk_platform+"/"+env['ARCH'] - #glue_include=env["ANDROID_NDK_ROOT"]+"/sources/android/native_app_glue" - ld_path=env["ANDROID_NDK_ROOT"]+"/platforms/"+ndk_platform+"/"+env['ARCH']+"/usr/lib" - env.Append(CPPPATH=[gcc_include]) + import string + #include path + gcc_include=env["ANDROID_NDK_ROOT"]+"/platforms/"+ndk_platform+"/"+env['ARCH'] +"/usr/include" + ld_sysroot=env["ANDROID_NDK_ROOT"]+"/platforms/"+ndk_platform+"/"+env['ARCH'] + #glue_include=env["ANDROID_NDK_ROOT"]+"/sources/android/native_app_glue" + ld_path=env["ANDROID_NDK_ROOT"]+"/platforms/"+ndk_platform+"/"+env['ARCH']+"/usr/lib" + env.Append(CPPPATH=[gcc_include]) # env['CCFLAGS'] = string.split('-DNO_THREADS -MMD -MP -MF -fpic -ffunction-sections -funwind-tables -fstack-protector -D__ARM_ARCH_5__ -D__ARM_ARCH_5T__ -D__ARM_ARCH_5E__ -D__ARM_ARCH_5TE__ -Wno-psabi -march=armv5te -mtune=xscale -msoft-float -fno-exceptions -mthumb -fno-strict-aliasing -DANDROID -Wa,--noexecstack -DGLES2_ENABLED ') - env['neon_enabled']=False - if env['android_arch']=='x86': - env.Append(CCFLAGS=string.split('-DNO_STATVFS -fpic -ffunction-sections -funwind-tables -fstack-protector -fvisibility=hidden -D__GLIBC__ -Wno-psabi -ftree-vectorize -funsafe-math-optimizations -fno-strict-aliasing -DANDROID -Wa,--noexecstack -DGLES2_ENABLED')) - elif env["android_arch"]=="armv6": - env.Append(CCFLAGS=string.split('-DNO_STATVFS -fpic -ffunction-sections -funwind-tables -fstack-protector -fvisibility=hidden -D__ARM_ARCH_6__ -D__GLIBC__ -Wno-psabi -march=armv6 -mfpu=vfp -mfloat-abi=softfp -funsafe-math-optimizations -fno-strict-aliasing -DANDROID -Wa,--noexecstack -DGLES2_ENABLED')) - elif env["android_arch"]=="armv7": - env.Append(CCFLAGS=string.split('-DNO_STATVFS -fpic -ffunction-sections -funwind-tables -fstack-protector -fvisibility=hidden -D__ARM_ARCH_7__ -D__ARM_ARCH_7A__ -D__GLIBC__ -Wno-psabi -march=armv7-a -mfloat-abi=softfp -ftree-vectorize -funsafe-math-optimizations -fno-strict-aliasing -DANDROID -Wa,--noexecstack -DGLES2_ENABLED')) - if env['android_neon']=='yes': - env['neon_enabled']=True - env.Append(CCFLAGS=['-mfpu=neon','-D__ARM_NEON__']) - else: - env.Append(CCFLAGS=['-mfpu=vfpv3-d16']) + env['neon_enabled']=False + if env['android_arch']=='x86': + env.Append(CCFLAGS=string.split('-DNO_STATVFS -fpic -ffunction-sections -funwind-tables -fstack-protector -fvisibility=hidden -D__GLIBC__ -Wno-psabi -ftree-vectorize -funsafe-math-optimizations -fno-strict-aliasing -DANDROID -Wa,--noexecstack -DGLES2_ENABLED')) + elif env["android_arch"]=="armv6": + env.Append(CCFLAGS=string.split('-DNO_STATVFS -fpic -ffunction-sections -funwind-tables -fstack-protector -fvisibility=hidden -D__ARM_ARCH_6__ -D__GLIBC__ -Wno-psabi -march=armv6 -mfpu=vfp -mfloat-abi=softfp -funsafe-math-optimizations -fno-strict-aliasing -DANDROID -Wa,--noexecstack -DGLES2_ENABLED')) + elif env["android_arch"]=="armv7": + env.Append(CCFLAGS=string.split('-DNO_STATVFS -fpic -ffunction-sections -funwind-tables -fstack-protector -fvisibility=hidden -D__ARM_ARCH_7__ -D__ARM_ARCH_7A__ -D__GLIBC__ -Wno-psabi -march=armv7-a -mfloat-abi=softfp -ftree-vectorize -funsafe-math-optimizations -fno-strict-aliasing -DANDROID -Wa,--noexecstack -DGLES2_ENABLED')) + if env['android_neon']=='yes': + env['neon_enabled']=True + env.Append(CCFLAGS=['-mfpu=neon','-D__ARM_NEON__']) + else: + env.Append(CCFLAGS=['-mfpu=vfpv3-d16']) - env.Append(LDPATH=[ld_path]) - env.Append(LIBS=['OpenSLES']) + env.Append(LDPATH=[ld_path]) + env.Append(LIBS=['OpenSLES']) # env.Append(LIBS=['c','m','stdc++','log','EGL','GLESv1_CM','GLESv2','OpenSLES','supc++','android']) - env.Append(LIBS=['EGL','OpenSLES','android']) - env.Append(LIBS=['c','m','stdc++','log','GLESv1_CM','GLESv2', 'z']) + env.Append(LIBS=['EGL','OpenSLES','android']) + env.Append(LIBS=['c','m','stdc++','log','GLESv1_CM','GLESv2', 'z']) - env["LINKFLAGS"]= string.split(" -g --sysroot="+ld_sysroot+" -Wl,--no-undefined -Wl,-z,noexecstack ") - env.Append(LINKFLAGS=["-Wl,-soname,libgodot_android.so"]) + env["LINKFLAGS"]= string.split(" -g --sysroot="+ld_sysroot+" -Wl,--no-undefined -Wl,-z,noexecstack ") + env.Append(LINKFLAGS=["-Wl,-soname,libgodot_android.so"]) - if (env["target"]=="release"): + if (env["target"]=="release"): - env.Append(CCFLAGS=['-O2', '-ffast-math','-fomit-frame-pointer']) + env.Append(CCFLAGS=['-O2', '-ffast-math','-fomit-frame-pointer']) - elif (env["target"]=="release_debug"): + elif (env["target"]=="release_debug"): - env.Append(CCFLAGS=['-O2', '-ffast-math','-DDEBUG_ENABLED']) + env.Append(CCFLAGS=['-O2', '-ffast-math','-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): + elif (env["target"]=="debug"): - env.Append(CCFLAGS=['-D_DEBUG', '-g1', '-Wall', '-O0', '-DDEBUG_ENABLED']) - env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ALLOC']) + env.Append(CCFLAGS=['-D_DEBUG', '-g1', '-Wall', '-O0', '-DDEBUG_ENABLED']) + env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ALLOC']) - env.Append(CPPFLAGS=['-DANDROID_ENABLED', '-DUNIX_ENABLED', '-DNO_FCNTL','-DMPC_FIXED_POINT']) + env.Append(CPPFLAGS=['-DANDROID_ENABLED', '-DUNIX_ENABLED', '-DNO_FCNTL','-DMPC_FIXED_POINT']) # env.Append(CPPFLAGS=['-DANDROID_ENABLED', '-DUNIX_ENABLED','-DMPC_FIXED_POINT']) - # TODO: Move that to opus module's config - if("module_opus_enabled" in env and env["module_opus_enabled"] != "no"): - if (env["android_arch"]=="armv6" or env["android_arch"]=="armv7"): - env.Append(CFLAGS=["-DOPUS_ARM_OPT"]) - env.opus_fixed_point="yes" + # TODO: Move that to opus module's config + if("module_opus_enabled" in env and env["module_opus_enabled"] != "no"): + if (env["android_arch"]=="armv6" or env["android_arch"]=="armv7"): + env.Append(CFLAGS=["-DOPUS_ARM_OPT"]) + env.opus_fixed_point="yes" - if (env['android_stl']=='yes'): - #env.Append(CCFLAGS=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/system/include"]) - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/include"]) - if env['android_arch']=='x86': - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86/include"]) - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86"]) - elif env['android_arch']=='armv6': - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi/include"]) - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi"]) - elif env["android_arch"]=="armv7": - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a/include"]) - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a"]) + if (env['android_stl']=='yes'): + #env.Append(CCFLAGS=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/system/include"]) + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/include"]) + if env['android_arch']=='x86': + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86/include"]) + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86"]) + elif env['android_arch']=='armv6': + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi/include"]) + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi"]) + elif env["android_arch"]=="armv7": + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a/include"]) + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a"]) - env.Append(LIBS=["gnustl_static","supc++"]) - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cpufeatures"]) + env.Append(LIBS=["gnustl_static","supc++"]) + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cpufeatures"]) - #env.Append(CCFLAGS=["-I"+env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/stlport/stlport"]) - #env.Append(CCFLAGS=["-I"+env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/libs/armeabi/include"]) - #env.Append(LINKFLAGS=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/libs/armeabi/libstdc++.a"]) - else: + #env.Append(CCFLAGS=["-I"+env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/stlport/stlport"]) + #env.Append(CCFLAGS=["-I"+env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/libs/armeabi/include"]) + #env.Append(LINKFLAGS=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/libs/armeabi/libstdc++.a"]) + else: - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/include"]) - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cpufeatures"]) - if env['android_arch']=='x86': - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86"]) - elif env["android_arch"]=="armv6": - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi"]) - elif env["android_arch"]=="armv7": - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a"]) - env.Append(LIBS=['gnustl_static']) - env.Append(CCFLAGS=["-fno-exceptions",'-DNO_SAFE_CAST']) + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/include"]) + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cpufeatures"]) + if env['android_arch']=='x86': + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86"]) + elif env["android_arch"]=="armv6": + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi"]) + elif env["android_arch"]=="armv7": + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a"]) + env.Append(LIBS=['gnustl_static']) + env.Append(CCFLAGS=["-fno-exceptions",'-DNO_SAFE_CAST']) - import methods - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + import methods + env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.use_windows_spawn_fix() + env.use_windows_spawn_fix() diff --git a/platform/bb10/SCsub b/platform/bb10/SCsub index 84fff0828b6..c19f46d5795 100644 --- a/platform/bb10/SCsub +++ b/platform/bb10/SCsub @@ -4,19 +4,19 @@ Import('env') bb10_lib = [ - 'bbutil.c', - 'os_bb10.cpp', - 'audio_driver_bb10.cpp', - 'godot_bb10.cpp', - 'payment_service.cpp', + 'bbutil.c', + 'os_bb10.cpp', + 'audio_driver_bb10.cpp', + 'godot_bb10.cpp', + 'payment_service.cpp', ] env_bps = env.Clone() if env['bb10_payment_service'] == "yes": - env_bps.Append(CPPFLAGS=['-DPAYMENT_SERVICE_ENABLED']) + env_bps.Append(CPPFLAGS=['-DPAYMENT_SERVICE_ENABLED']) if env['bb10_lgles_override'] == "yes": - env_bps.Append(CPPFLAGS=['-DBB10_LGLES_OVERRIDE']) + env_bps.Append(CPPFLAGS=['-DBB10_LGLES_OVERRIDE']) prog = None diff --git a/platform/bb10/detect.py b/platform/bb10/detect.py index 2b76aa34967..76ccbe3af6d 100644 --- a/platform/bb10/detect.py +++ b/platform/bb10/detect.py @@ -5,85 +5,85 @@ import methods def is_active(): - return True + return True def get_name(): - return "BlackBerry 10" + return "BlackBerry 10" def can_build(): - import os - if (not os.environ.has_key("QNX_TARGET")): - return False - return True + import os + if (not os.environ.has_key("QNX_TARGET")): + return False + return True def get_opts(): - return [ - ('QNX_HOST', 'path to qnx host', os.environ.get("QNX_HOST", 0)), - ('QNX_TARGET', 'path to qnx target', os.environ.get("QNX_TARGET", 0)), - ('QNX_CONFIGURATION', 'path to qnx configuration', os.environ.get("QNX_CONFIGURATION", 0)), - ('qnx_target', 'Qnx target (armle or x86', 'armle'), - ('bb10_payment_service', 'Enable Payment Service for BlackBerry10', 'yes'), - ('bb10_lgles_override', 'Force legacy GLES (1.1) on iOS', 'no'), - ('bb10_exceptions', 'Use exceptions when compiling on bb10', 'no'), - ] + return [ + ('QNX_HOST', 'path to qnx host', os.environ.get("QNX_HOST", 0)), + ('QNX_TARGET', 'path to qnx target', os.environ.get("QNX_TARGET", 0)), + ('QNX_CONFIGURATION', 'path to qnx configuration', os.environ.get("QNX_CONFIGURATION", 0)), + ('qnx_target', 'Qnx target (armle or x86', 'armle'), + ('bb10_payment_service', 'Enable Payment Service for BlackBerry10', 'yes'), + ('bb10_lgles_override', 'Force legacy GLES (1.1) on iOS', 'no'), + ('bb10_exceptions', 'Use exceptions when compiling on bb10', 'no'), + ] def get_flags(): - return [ - ('tools', 'no'), - ('builtin_zlib', 'yes'), - ('module_theora_enabled', 'no'), - ] + return [ + ('tools', 'no'), + ('builtin_zlib', 'yes'), + ('module_theora_enabled', 'no'), + ] def configure(env): - if env['PLATFORM'] == 'win32': - env.Tool('mingw') - env['SPAWN'] = methods.win32_spawn + if env['PLATFORM'] == 'win32': + env.Tool('mingw') + env['SPAWN'] = methods.win32_spawn - env['qnx_target_ver'] = env['qnx_target'] - if env['qnx_target'] == "armle": - env['qnx_prefix'] = 'ntoarmv7' - env['qnx_target_ver'] = 'armle-v7' - else: - env['qnx_prefix'] = 'ntox86' + env['qnx_target_ver'] = env['qnx_target'] + if env['qnx_target'] == "armle": + env['qnx_prefix'] = 'ntoarmv7' + env['qnx_target_ver'] = 'armle-v7' + else: + env['qnx_prefix'] = 'ntox86' - env['OBJSUFFIX'] = ".qnx.${qnx_target}.o" - env['LIBSUFFIX'] = ".qnx.${qnx_target}.a" - env['PROGSUFFIX'] = ".qnx.${qnx_target}" - print("PROGSUFFIX: "+env['PROGSUFFIX']+" target: "+env['qnx_target']) + env['OBJSUFFIX'] = ".qnx.${qnx_target}.o" + env['LIBSUFFIX'] = ".qnx.${qnx_target}.a" + env['PROGSUFFIX'] = ".qnx.${qnx_target}" + print("PROGSUFFIX: "+env['PROGSUFFIX']+" target: "+env['qnx_target']) - env.PrependENVPath('PATH', env['QNX_CONFIGURATION'] + '/bin') - env.PrependENVPath('PATH', env['QNX_CONFIGURATION'] + '/usr/bin') - env['ENV']['QNX_HOST'] = env['QNX_HOST'] - env['ENV']['QNX_TARGET'] = env['QNX_TARGET'] - env['ENV']['QNX_CONFIGURATION'] = env['QNX_CONFIGURATION'] + env.PrependENVPath('PATH', env['QNX_CONFIGURATION'] + '/bin') + env.PrependENVPath('PATH', env['QNX_CONFIGURATION'] + '/usr/bin') + env['ENV']['QNX_HOST'] = env['QNX_HOST'] + env['ENV']['QNX_TARGET'] = env['QNX_TARGET'] + env['ENV']['QNX_CONFIGURATION'] = env['QNX_CONFIGURATION'] - env['CC'] = '$qnx_prefix-gcc' - env['CXX'] = '$qnx_prefix-g++' - env['AR'] = '$qnx_prefix-ar' - env['RANLIB'] = '$qnx_prefix-ranlib' + env['CC'] = '$qnx_prefix-gcc' + env['CXX'] = '$qnx_prefix-g++' + env['AR'] = '$qnx_prefix-ar' + env['RANLIB'] = '$qnx_prefix-ranlib' - env.Append(CPPPATH = ['#platform/bb10']) - env.Append(LIBPATH = ['#platform/bb10/lib/$qnx_target', '#platform/bb10/lib/$qnx_target_ver']) - env.Append(CCFLAGS = string.split('-DBB10_ENABLED -DUNIX_ENABLED -DGLES2_ENABLED -DGLES1_ENABLED -D_LITTLE_ENDIAN -DNO_THREADS -DNO_FCNTL')) - if env['bb10_exceptions']=="yes": - env.Append(CCFLAGS = ['-fexceptions']) - else: - env.Append(CCFLAGS = ['-fno-exceptions']) + env.Append(CPPPATH = ['#platform/bb10']) + env.Append(LIBPATH = ['#platform/bb10/lib/$qnx_target', '#platform/bb10/lib/$qnx_target_ver']) + env.Append(CCFLAGS = string.split('-DBB10_ENABLED -DUNIX_ENABLED -DGLES2_ENABLED -DGLES1_ENABLED -D_LITTLE_ENDIAN -DNO_THREADS -DNO_FCNTL')) + if env['bb10_exceptions']=="yes": + env.Append(CCFLAGS = ['-fexceptions']) + else: + env.Append(CCFLAGS = ['-fno-exceptions']) - #env.Append(LINKFLAGS = string.split() + #env.Append(LINKFLAGS = string.split() - if (env["target"]=="release"): + if (env["target"]=="release"): - env.Append(CCFLAGS=['-O3','-DRELEASE_BUILD']) + env.Append(CCFLAGS=['-O3','-DRELEASE_BUILD']) - elif (env["target"]=="debug"): + elif (env["target"]=="debug"): - env.Append(CCFLAGS=['-g', '-O0','-DDEBUG_ENABLED', '-D_DEBUG']) - env.Append(LINKFLAGS=['-g']) + env.Append(CCFLAGS=['-g', '-O0','-DDEBUG_ENABLED', '-D_DEBUG']) + env.Append(LINKFLAGS=['-g']) - env.Append(LIBS=['bps', 'pps', 'screen', 'socket', 'EGL', 'GLESv2', 'GLESv1_CM', 'm', 'asound']) + env.Append(LIBS=['bps', 'pps', 'screen', 'socket', 'EGL', 'GLESv2', 'GLESv1_CM', 'm', 'asound']) diff --git a/platform/haiku/SCsub b/platform/haiku/SCsub index b5a584baa47..bac287ef62c 100644 --- a/platform/haiku/SCsub +++ b/platform/haiku/SCsub @@ -3,25 +3,25 @@ Import('env') common_haiku = [ - 'os_haiku.cpp', - 'context_gl_haiku.cpp', - 'haiku_application.cpp', - 'haiku_direct_window.cpp', - 'haiku_gl_view.cpp', - 'key_mapping_haiku.cpp', - 'audio_driver_media_kit.cpp' + 'os_haiku.cpp', + 'context_gl_haiku.cpp', + 'haiku_application.cpp', + 'haiku_direct_window.cpp', + 'haiku_gl_view.cpp', + 'key_mapping_haiku.cpp', + 'audio_driver_media_kit.cpp' ] target = env.Program( - '#bin/godot', - ['godot_haiku.cpp'] + common_haiku + '#bin/godot', + ['godot_haiku.cpp'] + common_haiku ) command = env.Command('#bin/godot.rsrc', '#platform/haiku/godot.rdef', - ['rc -o $TARGET $SOURCE']) + ['rc -o $TARGET $SOURCE']) def addResourcesAction(target = None, source = None, env = None): - return env.Execute('xres -o ' + File(target)[0].path + ' bin/godot.rsrc') + return env.Execute('xres -o ' + File(target)[0].path + ' bin/godot.rsrc') env.AddPostAction(target, addResourcesAction) env.Depends(target, command) diff --git a/platform/haiku/detect.py b/platform/haiku/detect.py index af997a57372..aef423b404b 100644 --- a/platform/haiku/detect.py +++ b/platform/haiku/detect.py @@ -2,60 +2,60 @@ import os import sys def is_active(): - return True + return True def get_name(): - return "Haiku" + return "Haiku" def can_build(): - if (os.name != "posix"): - return False + if (os.name != "posix"): + return False - if (sys.platform == "darwin"): - return False + if (sys.platform == "darwin"): + return False - return True + return True def get_opts(): - return [ - ('debug_release', 'Add debug symbols to release version','no') - ] + return [ + ('debug_release', 'Add debug symbols to release version','no') + ] def get_flags(): - return [ - ] + return [ + ] def configure(env): - is64 = sys.maxsize > 2**32 + is64 = sys.maxsize > 2**32 - if (env["bits"]=="default"): - if (is64): - env["bits"]="64" - else: - env["bits"]="32" + if (env["bits"]=="default"): + if (is64): + env["bits"]="64" + else: + env["bits"]="32" - env.Append(CPPPATH = ['#platform/haiku']) + env.Append(CPPPATH = ['#platform/haiku']) - env["CC"] = "gcc-x86" - env["CXX"] = "g++-x86" + env["CC"] = "gcc-x86" + env["CXX"] = "g++-x86" - if (env["target"]=="release"): - if (env["debug_release"]=="yes"): - env.Append(CCFLAGS=['-g2']) - else: - env.Append(CCFLAGS=['-O3','-ffast-math']) - elif (env["target"]=="release_debug"): - env.Append(CCFLAGS=['-O2','-ffast-math','-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): - env.Append(CCFLAGS=['-g2', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) + if (env["target"]=="release"): + if (env["debug_release"]=="yes"): + env.Append(CCFLAGS=['-g2']) + else: + env.Append(CCFLAGS=['-O3','-ffast-math']) + elif (env["target"]=="release_debug"): + env.Append(CCFLAGS=['-O2','-ffast-math','-DDEBUG_ENABLED']) + elif (env["target"]=="debug"): + env.Append(CCFLAGS=['-g2', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) - #env.Append(CCFLAGS=['-DFREETYPE_ENABLED']) - env.Append(CPPFLAGS = ['-DPTHREAD_NO_RENAME']) # TODO: enable when we have pthread_setname_np - env.Append(CPPFLAGS = ['-DOPENGL_ENABLED', '-DMEDIA_KIT_ENABLED']) - env.Append(CPPFLAGS = ['-DUNIX_ENABLED', '-DGLES2_ENABLED', '-DGLES_OVER_GL']) - env.Append(LIBS = ['be', 'game', 'media', 'network', 'bnetapi', 'z', 'GL']) + #env.Append(CCFLAGS=['-DFREETYPE_ENABLED']) + env.Append(CPPFLAGS = ['-DPTHREAD_NO_RENAME']) # TODO: enable when we have pthread_setname_np + env.Append(CPPFLAGS = ['-DOPENGL_ENABLED', '-DMEDIA_KIT_ENABLED']) + env.Append(CPPFLAGS = ['-DUNIX_ENABLED', '-DGLES2_ENABLED', '-DGLES_OVER_GL']) + env.Append(LIBS = ['be', 'game', 'media', 'network', 'bnetapi', 'z', 'GL']) - import methods - env.Append(BUILDERS = {'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl')}) - env.Append(BUILDERS = {'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl')}) - env.Append(BUILDERS = {'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl')}) + import methods + env.Append(BUILDERS = {'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl')}) + env.Append(BUILDERS = {'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl')}) + env.Append(BUILDERS = {'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl')}) diff --git a/platform/iphone/SCsub b/platform/iphone/SCsub index 236630bef41..1d7797ebd3f 100644 --- a/platform/iphone/SCsub +++ b/platform/iphone/SCsub @@ -4,19 +4,19 @@ Import('env') iphone_lib = [ - 'os_iphone.cpp', - #'rasterizer_iphone.cpp', - 'audio_driver_iphone.cpp', - 'sem_iphone.cpp', - 'gl_view.mm', - 'main.m', - 'app_delegate.mm', - 'view_controller.mm', - 'game_center.mm', - 'in_app_store.mm', - 'icloud.mm', - #'Appirater.m', - 'ios.mm', + 'os_iphone.cpp', + #'rasterizer_iphone.cpp', + 'audio_driver_iphone.cpp', + 'sem_iphone.cpp', + 'gl_view.mm', + 'main.m', + 'app_delegate.mm', + 'view_controller.mm', + 'game_center.mm', + 'in_app_store.mm', + 'icloud.mm', + #'Appirater.m', + 'ios.mm', ] #env.Depends('#core/math/vector3.h', 'vector3_psp.h') @@ -27,7 +27,7 @@ env_ios = env.Clone(); if env['ios_gles22_override'] == "yes": - env_ios.Append(CPPFLAGS=['-DGLES2_OVERRIDE']) + env_ios.Append(CPPFLAGS=['-DGLES2_OVERRIDE']) #if env['ios_appirater'] == "yes": diff --git a/platform/iphone/detect.py b/platform/iphone/detect.py index 1be63891b16..cea79d0f212 100644 --- a/platform/iphone/detect.py +++ b/platform/iphone/detect.py @@ -3,187 +3,187 @@ import sys def is_active(): - return True + return True def get_name(): - return "iOS" + return "iOS" def can_build(): - import sys - import os - if sys.platform == 'darwin' or os.environ.has_key("OSXCROSS_IOS"): - return True + import sys + import os + if sys.platform == 'darwin' or os.environ.has_key("OSXCROSS_IOS"): + return True - return False + return False def get_opts(): - return [ - ('IPHONEPLATFORM', 'name of the iphone platform', 'iPhoneOS'), - ('IPHONEPATH', 'the path to iphone toolchain', '/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain'), - ('IPHONESDK', 'path to the iphone SDK', '/Applications/Xcode.app/Contents/Developer/Platforms/${IPHONEPLATFORM}.platform/Developer/SDKs/${IPHONEPLATFORM}.sdk/'), - ('game_center', 'Support for game center', 'yes'), - ('store_kit', 'Support for in-app store', 'yes'), - ('icloud', 'Support for iCloud', 'yes'), - ('ios_gles22_override', 'Force GLES2.0 on iOS', 'yes'), - ('ios_appirater', 'Enable Appirater', 'no'), - ('ios_exceptions', 'Use exceptions when compiling on playbook', 'yes'), - ('ios_triple', 'Triple for ios toolchain', ''), - ('ios_sim', 'Build simulator binary', 'no'), - ] + return [ + ('IPHONEPLATFORM', 'name of the iphone platform', 'iPhoneOS'), + ('IPHONEPATH', 'the path to iphone toolchain', '/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain'), + ('IPHONESDK', 'path to the iphone SDK', '/Applications/Xcode.app/Contents/Developer/Platforms/${IPHONEPLATFORM}.platform/Developer/SDKs/${IPHONEPLATFORM}.sdk/'), + ('game_center', 'Support for game center', 'yes'), + ('store_kit', 'Support for in-app store', 'yes'), + ('icloud', 'Support for iCloud', 'yes'), + ('ios_gles22_override', 'Force GLES2.0 on iOS', 'yes'), + ('ios_appirater', 'Enable Appirater', 'no'), + ('ios_exceptions', 'Use exceptions when compiling on playbook', 'yes'), + ('ios_triple', 'Triple for ios toolchain', ''), + ('ios_sim', 'Build simulator binary', 'no'), + ] def get_flags(): - return [ - ('tools', 'no'), - ('webp', 'yes'), - ('builtin_zlib', 'yes'), - ('openssl','builtin'), #use builtin openssl - ] + return [ + ('tools', 'no'), + ('webp', 'yes'), + ('builtin_zlib', 'yes'), + ('openssl','builtin'), #use builtin openssl + ] def configure(env): - env.Append(CPPPATH=['#platform/iphone']) + env.Append(CPPPATH=['#platform/iphone']) - env['ENV']['PATH'] = env['IPHONEPATH']+"/Developer/usr/bin/:"+env['ENV']['PATH'] + env['ENV']['PATH'] = env['IPHONEPATH']+"/Developer/usr/bin/:"+env['ENV']['PATH'] - env['CC'] = '$IPHONEPATH/usr/bin/${ios_triple}clang' - env['CXX'] = '$IPHONEPATH/usr/bin/${ios_triple}clang++' - env['AR'] = '$IPHONEPATH/usr/bin/${ios_triple}ar' - env['RANLIB'] = '$IPHONEPATH/usr/bin/${ios_triple}ranlib' + env['CC'] = '$IPHONEPATH/usr/bin/${ios_triple}clang' + env['CXX'] = '$IPHONEPATH/usr/bin/${ios_triple}clang++' + env['AR'] = '$IPHONEPATH/usr/bin/${ios_triple}ar' + env['RANLIB'] = '$IPHONEPATH/usr/bin/${ios_triple}ranlib' - import string - if (env["ios_sim"]=="yes" or env["arch"] == "x86"): # i386, simulator - env["arch"]="x86" - env["bits"]="32" - env['CCFLAGS'] = string.split('-arch i386 -fobjc-abi-version=2 -fobjc-legacy-dispatch -fmessage-length=0 -fpascal-strings -fasm-blocks -Wall -D__IPHONE_OS_VERSION_MIN_REQUIRED=40100 -isysroot $IPHONESDK -mios-simulator-version-min=4.3 -DCUSTOM_MATRIX_TRANSFORM_H=\\\"build/iphone/matrix4_iphone.h\\\" -DCUSTOM_VECTOR3_TRANSFORM_H=\\\"build/iphone/vector3_iphone.h\\\"') - elif (env["arch"]=="arm64"): # arm64 - env["bits"] = "64" - env['CCFLAGS'] = string.split('-fno-objc-arc -arch arm64 -fmessage-length=0 -fno-strict-aliasing -fdiagnostics-print-source-range-info -fdiagnostics-show-category=id -fdiagnostics-parseable-fixits -Wno-trigraphs -fpascal-strings -Wmissing-prototypes -Wreturn-type -Wparentheses -Wswitch -Wno-unused-parameter -Wunused-variable -Wunused-value -Wno-shorten-64-to-32 -fvisibility=hidden -Wno-sign-conversion -MMD -MT dependencies -miphoneos-version-min=5.1.1 -isysroot $IPHONESDK') - env.Append(CPPFLAGS=['-DNEED_LONG_INT']) - env.Append(CPPFLAGS=['-DLIBYUV_DISABLE_NEON']) - else: # armv7 - env["arch"] = "arm" - env["bits"] = "32" - env['CCFLAGS'] = string.split('-fno-objc-arc -arch armv7 -fmessage-length=0 -fno-strict-aliasing -fdiagnostics-print-source-range-info -fdiagnostics-show-category=id -fdiagnostics-parseable-fixits -Wno-trigraphs -fpascal-strings -Wmissing-prototypes -Wreturn-type -Wparentheses -Wswitch -Wno-unused-parameter -Wunused-variable -Wunused-value -Wno-shorten-64-to-32 -isysroot /Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS5.0.sdk -fvisibility=hidden -Wno-sign-conversion -mthumb "-DIBOutlet=__attribute__((iboutlet))" "-DIBOutletCollection(ClassName)=__attribute__((iboutletcollection(ClassName)))" "-DIBAction=void)__attribute__((ibaction)" -miphoneos-version-min=5.1.1 -MMD -MT dependencies -isysroot $IPHONESDK') + import string + if (env["ios_sim"]=="yes" or env["arch"] == "x86"): # i386, simulator + env["arch"]="x86" + env["bits"]="32" + env['CCFLAGS'] = string.split('-arch i386 -fobjc-abi-version=2 -fobjc-legacy-dispatch -fmessage-length=0 -fpascal-strings -fasm-blocks -Wall -D__IPHONE_OS_VERSION_MIN_REQUIRED=40100 -isysroot $IPHONESDK -mios-simulator-version-min=4.3 -DCUSTOM_MATRIX_TRANSFORM_H=\\\"build/iphone/matrix4_iphone.h\\\" -DCUSTOM_VECTOR3_TRANSFORM_H=\\\"build/iphone/vector3_iphone.h\\\"') + elif (env["arch"]=="arm64"): # arm64 + env["bits"] = "64" + env['CCFLAGS'] = string.split('-fno-objc-arc -arch arm64 -fmessage-length=0 -fno-strict-aliasing -fdiagnostics-print-source-range-info -fdiagnostics-show-category=id -fdiagnostics-parseable-fixits -Wno-trigraphs -fpascal-strings -Wmissing-prototypes -Wreturn-type -Wparentheses -Wswitch -Wno-unused-parameter -Wunused-variable -Wunused-value -Wno-shorten-64-to-32 -fvisibility=hidden -Wno-sign-conversion -MMD -MT dependencies -miphoneos-version-min=5.1.1 -isysroot $IPHONESDK') + env.Append(CPPFLAGS=['-DNEED_LONG_INT']) + env.Append(CPPFLAGS=['-DLIBYUV_DISABLE_NEON']) + else: # armv7 + env["arch"] = "arm" + env["bits"] = "32" + env['CCFLAGS'] = string.split('-fno-objc-arc -arch armv7 -fmessage-length=0 -fno-strict-aliasing -fdiagnostics-print-source-range-info -fdiagnostics-show-category=id -fdiagnostics-parseable-fixits -Wno-trigraphs -fpascal-strings -Wmissing-prototypes -Wreturn-type -Wparentheses -Wswitch -Wno-unused-parameter -Wunused-variable -Wunused-value -Wno-shorten-64-to-32 -isysroot /Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS5.0.sdk -fvisibility=hidden -Wno-sign-conversion -mthumb "-DIBOutlet=__attribute__((iboutlet))" "-DIBOutletCollection(ClassName)=__attribute__((iboutletcollection(ClassName)))" "-DIBAction=void)__attribute__((ibaction)" -miphoneos-version-min=5.1.1 -MMD -MT dependencies -isysroot $IPHONESDK') - if (env["arch"]=="x86"): - env['IPHONEPLATFORM'] = 'iPhoneSimulator' - env.Append(LINKFLAGS=['-arch', 'i386', '-mios-simulator-version-min=4.3', - '-isysroot', '$IPHONESDK', - #'-mmacosx-version-min=10.6', - '-Xlinker', - '-objc_abi_version', - '-Xlinker', '2', - '-framework', 'AudioToolbox', - '-framework', 'AVFoundation', - '-framework', 'CoreAudio', - '-framework', 'CoreGraphics', - '-framework', 'CoreMedia', - '-framework', 'Foundation', - '-framework', 'Security', - '-framework', 'UIKit', - '-framework', 'MediaPlayer', - '-framework', 'OpenGLES', - '-framework', 'QuartzCore', - '-framework', 'SystemConfiguration', - '-F$IPHONESDK', - ]) - elif (env["arch"]=="arm64"): - env.Append(LINKFLAGS=['-arch', 'arm64', '-Wl,-dead_strip', '-miphoneos-version-min=5.1.1', - '-isysroot', '$IPHONESDK', - #'-stdlib=libc++', - '-framework', 'Foundation', - '-framework', 'UIKit', - '-framework', 'CoreGraphics', - '-framework', 'OpenGLES', - '-framework', 'QuartzCore', - '-framework', 'CoreAudio', - '-framework', 'AudioToolbox', - '-framework', 'SystemConfiguration', - '-framework', 'Security', - #'-framework', 'AdSupport', - '-framework', 'MediaPlayer', - '-framework', 'AVFoundation', - '-framework', 'CoreMedia', - ]) - else: - env.Append(LINKFLAGS=['-arch', 'armv7', '-Wl,-dead_strip', '-miphoneos-version-min=5.1.1', - '-isysroot', '$IPHONESDK', - '-framework', 'Foundation', - '-framework', 'UIKit', - '-framework', 'CoreGraphics', - '-framework', 'OpenGLES', - '-framework', 'QuartzCore', - '-framework', 'CoreAudio', - '-framework', 'AudioToolbox', - '-framework', 'SystemConfiguration', - '-framework', 'Security', - #'-framework', 'AdSupport', - '-framework', 'MediaPlayer', - '-framework', 'AVFoundation', - '-framework', 'CoreMedia', - ]) + if (env["arch"]=="x86"): + env['IPHONEPLATFORM'] = 'iPhoneSimulator' + env.Append(LINKFLAGS=['-arch', 'i386', '-mios-simulator-version-min=4.3', + '-isysroot', '$IPHONESDK', + #'-mmacosx-version-min=10.6', + '-Xlinker', + '-objc_abi_version', + '-Xlinker', '2', + '-framework', 'AudioToolbox', + '-framework', 'AVFoundation', + '-framework', 'CoreAudio', + '-framework', 'CoreGraphics', + '-framework', 'CoreMedia', + '-framework', 'Foundation', + '-framework', 'Security', + '-framework', 'UIKit', + '-framework', 'MediaPlayer', + '-framework', 'OpenGLES', + '-framework', 'QuartzCore', + '-framework', 'SystemConfiguration', + '-F$IPHONESDK', + ]) + elif (env["arch"]=="arm64"): + env.Append(LINKFLAGS=['-arch', 'arm64', '-Wl,-dead_strip', '-miphoneos-version-min=5.1.1', + '-isysroot', '$IPHONESDK', + #'-stdlib=libc++', + '-framework', 'Foundation', + '-framework', 'UIKit', + '-framework', 'CoreGraphics', + '-framework', 'OpenGLES', + '-framework', 'QuartzCore', + '-framework', 'CoreAudio', + '-framework', 'AudioToolbox', + '-framework', 'SystemConfiguration', + '-framework', 'Security', + #'-framework', 'AdSupport', + '-framework', 'MediaPlayer', + '-framework', 'AVFoundation', + '-framework', 'CoreMedia', + ]) + else: + env.Append(LINKFLAGS=['-arch', 'armv7', '-Wl,-dead_strip', '-miphoneos-version-min=5.1.1', + '-isysroot', '$IPHONESDK', + '-framework', 'Foundation', + '-framework', 'UIKit', + '-framework', 'CoreGraphics', + '-framework', 'OpenGLES', + '-framework', 'QuartzCore', + '-framework', 'CoreAudio', + '-framework', 'AudioToolbox', + '-framework', 'SystemConfiguration', + '-framework', 'Security', + #'-framework', 'AdSupport', + '-framework', 'MediaPlayer', + '-framework', 'AVFoundation', + '-framework', 'CoreMedia', + ]) - if env['game_center'] == 'yes': - env.Append(CPPFLAGS=['-fblocks', '-DGAME_CENTER_ENABLED']) - env.Append(LINKFLAGS=['-framework', 'GameKit']) + if env['game_center'] == 'yes': + env.Append(CPPFLAGS=['-fblocks', '-DGAME_CENTER_ENABLED']) + env.Append(LINKFLAGS=['-framework', 'GameKit']) - if env['store_kit'] == 'yes': - env.Append(CPPFLAGS=['-DSTOREKIT_ENABLED']) - env.Append(LINKFLAGS=['-framework', 'StoreKit']) + if env['store_kit'] == 'yes': + env.Append(CPPFLAGS=['-DSTOREKIT_ENABLED']) + env.Append(LINKFLAGS=['-framework', 'StoreKit']) - if env['icloud'] == 'yes': - env.Append(CPPFLAGS=['-DICLOUD_ENABLED']) + if env['icloud'] == 'yes': + env.Append(CPPFLAGS=['-DICLOUD_ENABLED']) - env.Append(CPPPATH = ['$IPHONESDK/usr/include', '$IPHONESDK/System/Library/Frameworks/OpenGLES.framework/Headers', '$IPHONESDK/System/Library/Frameworks/AudioUnit.framework/Headers']) + env.Append(CPPPATH = ['$IPHONESDK/usr/include', '$IPHONESDK/System/Library/Frameworks/OpenGLES.framework/Headers', '$IPHONESDK/System/Library/Frameworks/AudioUnit.framework/Headers']) - if (env["target"]=="release"): + if (env["target"]=="release"): - env.Append(CCFLAGS=['-O3', '-DNS_BLOCK_ASSERTIONS=1','-Wall', '-gdwarf-2']) # removed -ffast-math - env.Append(LINKFLAGS=['-O3']) # + env.Append(CCFLAGS=['-O3', '-DNS_BLOCK_ASSERTIONS=1','-Wall', '-gdwarf-2']) # removed -ffast-math + env.Append(LINKFLAGS=['-O3']) # - elif env["target"] == "release_debug": - env.Append(CCFLAGS=['-Os', '-DNS_BLOCK_ASSERTIONS=1','-Wall','-DDEBUG_ENABLED']) - env.Append(LINKFLAGS=['-Os']) - env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ENABLED']) + elif env["target"] == "release_debug": + env.Append(CCFLAGS=['-Os', '-DNS_BLOCK_ASSERTIONS=1','-Wall','-DDEBUG_ENABLED']) + env.Append(LINKFLAGS=['-Os']) + env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ENABLED']) - elif (env["target"]=="debug"): + elif (env["target"]=="debug"): - env.Append(CCFLAGS=['-D_DEBUG', '-DDEBUG=1', '-gdwarf-2', '-Wall', '-O0', '-DDEBUG_ENABLED']) - env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ENABLED']) + env.Append(CCFLAGS=['-D_DEBUG', '-DDEBUG=1', '-gdwarf-2', '-Wall', '-O0', '-DDEBUG_ENABLED']) + env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ENABLED']) - elif (env["target"]=="profile"): + elif (env["target"]=="profile"): - env.Append(CCFLAGS=['-g','-pg', '-Os']) - env.Append(LINKFLAGS=['-pg']) + env.Append(CCFLAGS=['-g','-pg', '-Os']) + env.Append(LINKFLAGS=['-pg']) - if (env["ios_sim"]=="yes"): #TODO: Check if needed? - env['ENV']['MACOSX_DEPLOYMENT_TARGET'] = '10.6' - env['ENV']['CODESIGN_ALLOCATE'] = '/Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/codesign_allocate' - env.Append(CPPFLAGS=['-DIPHONE_ENABLED', '-DUNIX_ENABLED', '-DGLES2_ENABLED', '-DMPC_FIXED_POINT']) + if (env["ios_sim"]=="yes"): #TODO: Check if needed? + env['ENV']['MACOSX_DEPLOYMENT_TARGET'] = '10.6' + env['ENV']['CODESIGN_ALLOCATE'] = '/Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/codesign_allocate' + env.Append(CPPFLAGS=['-DIPHONE_ENABLED', '-DUNIX_ENABLED', '-DGLES2_ENABLED', '-DMPC_FIXED_POINT']) - # TODO: Move that to opus module's config - if("module_opus_enabled" in env and env["module_opus_enabled"] != "no"): - env.opus_fixed_point="yes" - if env["arch"]=="x86": - pass - elif(env["arch"]=="arm64"): - env.Append(CFLAGS=["-DOPUS_ARM64_OPT"]) - else: - env.Append(CFLAGS=["-DOPUS_ARM_OPT"]) + # TODO: Move that to opus module's config + if("module_opus_enabled" in env and env["module_opus_enabled"] != "no"): + env.opus_fixed_point="yes" + if env["arch"]=="x86": + pass + elif(env["arch"]=="arm64"): + env.Append(CFLAGS=["-DOPUS_ARM64_OPT"]) + else: + env.Append(CFLAGS=["-DOPUS_ARM_OPT"]) - if env['ios_exceptions'] == 'yes': - env.Append(CPPFLAGS=['-fexceptions']) - else: - env.Append(CPPFLAGS=['-fno-exceptions']) - #env['neon_enabled']=True - env['S_compiler'] = '$IPHONEPATH/Developer/usr/bin/gcc' + if env['ios_exceptions'] == 'yes': + env.Append(CPPFLAGS=['-fexceptions']) + else: + env.Append(CPPFLAGS=['-fno-exceptions']) + #env['neon_enabled']=True + env['S_compiler'] = '$IPHONEPATH/Developer/usr/bin/gcc' - import methods - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + import methods + env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) diff --git a/platform/javascript/SCsub b/platform/javascript/SCsub index 59e5aa175d2..07852ac2065 100644 --- a/platform/javascript/SCsub +++ b/platform/javascript/SCsub @@ -3,20 +3,20 @@ Import('env') javascript_files = [ - "os_javascript.cpp", - "audio_driver_javascript.cpp", - "javascript_main.cpp", - "audio_server_javascript.cpp", - "javascript_eval.cpp" + "os_javascript.cpp", + "audio_driver_javascript.cpp", + "javascript_main.cpp", + "audio_server_javascript.cpp", + "javascript_eval.cpp" ] env_javascript = env.Clone() if env['target'] == "profile": - env_javascript.Append(CPPFLAGS=['-DPROFILER_ENABLED']) + env_javascript.Append(CPPFLAGS=['-DPROFILER_ENABLED']) javascript_objects=[] for x in javascript_files: - javascript_objects.append( env_javascript.Object( x ) ) + javascript_objects.append( env_javascript.Object( x ) ) env.Append(LINKFLAGS=["-s","EXPORTED_FUNCTIONS=\"['_main','_audio_server_mix_function','_main_after_fs_sync']\""]) env.Append(LINKFLAGS=["--shell-file",'"platform/javascript/godot_shell.html"']) diff --git a/platform/javascript/detect.py b/platform/javascript/detect.py index b6a6a453b33..ce6149c2503 100644 --- a/platform/javascript/detect.py +++ b/platform/javascript/detect.py @@ -3,103 +3,103 @@ import sys import string def is_active(): - return True + return True def get_name(): - return "JavaScript" + return "JavaScript" def can_build(): - import os - if (not os.environ.has_key("EMSCRIPTEN_ROOT")): - return False - return True + import os + if (not os.environ.has_key("EMSCRIPTEN_ROOT")): + return False + return True def get_opts(): - return [ - ['wasm','Compile to WebAssembly','no'], - ['javascript_eval','Enable JavaScript eval interface','yes'], - ] + return [ + ['wasm','Compile to WebAssembly','no'], + ['javascript_eval','Enable JavaScript eval interface','yes'], + ] def get_flags(): - return [ - ('tools', 'no'), - ('builtin_zlib', 'yes'), - ('module_etc1_enabled', 'no'), - ('module_mpc_enabled', 'no'), - ('module_theora_enabled', 'no'), - ] + return [ + ('tools', 'no'), + ('builtin_zlib', 'yes'), + ('module_etc1_enabled', 'no'), + ('module_mpc_enabled', 'no'), + ('module_theora_enabled', 'no'), + ] def configure(env): - env['ENV'] = os.environ; - env.use_windows_spawn_fix('javascript') + env['ENV'] = os.environ; + env.use_windows_spawn_fix('javascript') - env.Append(CPPPATH=['#platform/javascript']) + env.Append(CPPPATH=['#platform/javascript']) - em_path=os.environ["EMSCRIPTEN_ROOT"] + em_path=os.environ["EMSCRIPTEN_ROOT"] - env['ENV']['PATH'] = em_path+":"+env['ENV']['PATH'] - env['CC'] = em_path+'/emcc' - env['CXX'] = em_path+'/emcc' - #env['AR'] = em_path+"/emar" - env['AR'] = em_path+"/emcc" - env['ARFLAGS'] = "-o" + env['ENV']['PATH'] = em_path+":"+env['ENV']['PATH'] + env['CC'] = em_path+'/emcc' + env['CXX'] = em_path+'/emcc' + #env['AR'] = em_path+"/emar" + env['AR'] = em_path+"/emcc" + env['ARFLAGS'] = "-o" # env['RANLIB'] = em_path+"/emranlib" - env['RANLIB'] = em_path + "/emcc" - env['OBJSUFFIX'] = '.bc' - env['LIBSUFFIX'] = '.bc' - env['CCCOM'] = "$CC -o $TARGET $CFLAGS $CCFLAGS $_CCCOMCOM $SOURCES" - env['CXXCOM'] = "$CC -o $TARGET $CFLAGS $CCFLAGS $_CCCOMCOM $SOURCES" + env['RANLIB'] = em_path + "/emcc" + env['OBJSUFFIX'] = '.bc' + env['LIBSUFFIX'] = '.bc' + env['CCCOM'] = "$CC -o $TARGET $CFLAGS $CCFLAGS $_CCCOMCOM $SOURCES" + env['CXXCOM'] = "$CC -o $TARGET $CFLAGS $CCFLAGS $_CCCOMCOM $SOURCES" # env.Append(LIBS=['c','m','stdc++','log','GLESv1_CM','GLESv2']) # env["LINKFLAGS"]= string.split(" -g --sysroot="+ld_sysroot+" -Wl,--no-undefined -Wl,-z,noexecstack ") - if (env["target"]=="release"): - env.Append(CCFLAGS=['-O2']) - elif (env["target"]=="release_debug"): - env.Append(CCFLAGS=['-O2','-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): - env.Append(CCFLAGS=['-D_DEBUG', '-Wall', '-O2', '-DDEBUG_ENABLED']) - #env.Append(CCFLAGS=['-D_DEBUG', '-Wall', '-g4', '-DDEBUG_ENABLED']) - env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ALLOC']) + if (env["target"]=="release"): + env.Append(CCFLAGS=['-O2']) + elif (env["target"]=="release_debug"): + env.Append(CCFLAGS=['-O2','-DDEBUG_ENABLED']) + elif (env["target"]=="debug"): + env.Append(CCFLAGS=['-D_DEBUG', '-Wall', '-O2', '-DDEBUG_ENABLED']) + #env.Append(CCFLAGS=['-D_DEBUG', '-Wall', '-g4', '-DDEBUG_ENABLED']) + env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ALLOC']) - # TODO: Move that to opus module's config - if("module_opus_enabled" in env and env["module_opus_enabled"] != "no"): - env.opus_fixed_point = "yes" + # TODO: Move that to opus module's config + if("module_opus_enabled" in env and env["module_opus_enabled"] != "no"): + env.opus_fixed_point = "yes" - env.Append(CPPFLAGS=["-fno-exceptions",'-DNO_SAFE_CAST','-fno-rtti']) - env.Append(CPPFLAGS=['-DJAVASCRIPT_ENABLED', '-DUNIX_ENABLED', '-DPTHREAD_NO_RENAME', '-DNO_FCNTL','-DMPC_FIXED_POINT','-DTYPED_METHOD_BIND','-DNO_THREADS']) - env.Append(CPPFLAGS=['-DGLES2_ENABLED']) - env.Append(CPPFLAGS=['-DGLES_NO_CLIENT_ARRAYS']) - env.Append(CPPFLAGS=['-s','FULL_ES2=1']) + env.Append(CPPFLAGS=["-fno-exceptions",'-DNO_SAFE_CAST','-fno-rtti']) + env.Append(CPPFLAGS=['-DJAVASCRIPT_ENABLED', '-DUNIX_ENABLED', '-DPTHREAD_NO_RENAME', '-DNO_FCNTL','-DMPC_FIXED_POINT','-DTYPED_METHOD_BIND','-DNO_THREADS']) + env.Append(CPPFLAGS=['-DGLES2_ENABLED']) + env.Append(CPPFLAGS=['-DGLES_NO_CLIENT_ARRAYS']) + env.Append(CPPFLAGS=['-s','FULL_ES2=1']) # env.Append(CPPFLAGS=['-DANDROID_ENABLED', '-DUNIX_ENABLED','-DMPC_FIXED_POINT']) - if env['wasm'] == 'yes': - env.Append(LINKFLAGS=['-s','BINARYEN=1']) - env.Append(LINKFLAGS=['-s','\'BINARYEN_METHOD="native-wasm"\'']) - env["PROGSUFFIX"]+=".webassembly" - else: - env.Append(CPPFLAGS=['-s','ASM_JS=1']) - env.Append(LINKFLAGS=['-s','ASM_JS=1']) + if env['wasm'] == 'yes': + env.Append(LINKFLAGS=['-s','BINARYEN=1']) + env.Append(LINKFLAGS=['-s','\'BINARYEN_METHOD="native-wasm"\'']) + env["PROGSUFFIX"]+=".webassembly" + else: + env.Append(CPPFLAGS=['-s','ASM_JS=1']) + env.Append(LINKFLAGS=['-s','ASM_JS=1']) - if env['javascript_eval'] == 'yes': - env.Append(CPPFLAGS=['-DJAVASCRIPT_EVAL_ENABLED']) + if env['javascript_eval'] == 'yes': + env.Append(CPPFLAGS=['-DJAVASCRIPT_EVAL_ENABLED']) - env.Append(LINKFLAGS=['-O2']) - #env.Append(LINKFLAGS=['-g4']) + env.Append(LINKFLAGS=['-O2']) + #env.Append(LINKFLAGS=['-g4']) - #print "CCCOM is:", env.subst('$CCCOM') - #print "P: ", env['p'], " Platofrm: ", env['platform'] + #print "CCCOM is:", env.subst('$CCCOM') + #print "P: ", env['p'], " Platofrm: ", env['platform'] - import methods + import methods - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - #env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) + env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + #env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) diff --git a/platform/osx/SCsub b/platform/osx/SCsub index 217dee81eb1..68484b72886 100644 --- a/platform/osx/SCsub +++ b/platform/osx/SCsub @@ -3,13 +3,13 @@ Import('env') files = [ - 'os_osx.mm', - 'godot_main_osx.mm', - 'audio_driver_osx.cpp', - 'sem_osx.cpp', -# 'context_gl_osx.cpp', - 'dir_access_osx.mm', - 'joystick_osx.cpp', - ] + 'os_osx.mm', + 'godot_main_osx.mm', + 'audio_driver_osx.cpp', + 'sem_osx.cpp', + # 'context_gl_osx.cpp', + 'dir_access_osx.mm', + 'joystick_osx.cpp', +] env.Program('#bin/godot',files) diff --git a/platform/osx/detect.py b/platform/osx/detect.py index 4e772e37ebb..358c4ef0152 100644 --- a/platform/osx/detect.py +++ b/platform/osx/detect.py @@ -4,99 +4,99 @@ import sys def is_active(): - return True + return True def get_name(): - return "OSX" + return "OSX" def can_build(): - if (sys.platform == "darwin" or os.environ.has_key("OSXCROSS_ROOT")): - return True + if (sys.platform == "darwin" or os.environ.has_key("OSXCROSS_ROOT")): + return True - return False + return False def get_opts(): - return [ - ('force_64_bits','Force 64 bits binary','no'), - ('osxcross_sdk','OSXCross SDK version','darwin14'), + return [ + ('force_64_bits','Force 64 bits binary','no'), + ('osxcross_sdk','OSXCross SDK version','darwin14'), - ] + ] def get_flags(): - return [ - ] + return [ + ] def configure(env): - env.Append(CPPPATH=['#platform/osx']) + env.Append(CPPPATH=['#platform/osx']) - if (env["bits"]=="default"): - env["bits"]="32" + if (env["bits"]=="default"): + env["bits"]="32" - if (env["target"]=="release"): + if (env["target"]=="release"): - env.Append(CCFLAGS=['-O2','-ffast-math','-fomit-frame-pointer','-ftree-vectorize','-msse2']) + env.Append(CCFLAGS=['-O2','-ffast-math','-fomit-frame-pointer','-ftree-vectorize','-msse2']) - elif (env["target"]=="release_debug"): + elif (env["target"]=="release_debug"): - env.Append(CCFLAGS=['-O2','-DDEBUG_ENABLED']) + env.Append(CCFLAGS=['-O2','-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): + elif (env["target"]=="debug"): - env.Append(CCFLAGS=['-g3', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) + env.Append(CCFLAGS=['-g3', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) - if (not os.environ.has_key("OSXCROSS_ROOT")): - #regular native build - if (env["bits"]=="64"): - env.Append(CCFLAGS=['-arch', 'x86_64']) - env.Append(LINKFLAGS=['-arch', 'x86_64']) - elif (env["bits"]=="32"): - env.Append(CCFLAGS=['-arch', 'i386']) - env.Append(LINKFLAGS=['-arch', 'i386']) - else: - env.Append(CCFLAGS=['-arch', 'i386', '-arch', 'x86_64']) - env.Append(LINKFLAGS=['-arch', 'i386', '-arch', 'x86_64']) - else: - #osxcross build - root=os.environ.get("OSXCROSS_ROOT",0) - if env["bits"]=="64": - basecmd=root+"/target/bin/x86_64-apple-"+env["osxcross_sdk"]+"-" - else: - basecmd=root+"/target/bin/i386-apple-"+env["osxcross_sdk"]+"-" + if (not os.environ.has_key("OSXCROSS_ROOT")): + #regular native build + if (env["bits"]=="64"): + env.Append(CCFLAGS=['-arch', 'x86_64']) + env.Append(LINKFLAGS=['-arch', 'x86_64']) + elif (env["bits"]=="32"): + env.Append(CCFLAGS=['-arch', 'i386']) + env.Append(LINKFLAGS=['-arch', 'i386']) + else: + env.Append(CCFLAGS=['-arch', 'i386', '-arch', 'x86_64']) + env.Append(LINKFLAGS=['-arch', 'i386', '-arch', 'x86_64']) + else: + #osxcross build + root=os.environ.get("OSXCROSS_ROOT",0) + if env["bits"]=="64": + basecmd=root+"/target/bin/x86_64-apple-"+env["osxcross_sdk"]+"-" + else: + basecmd=root+"/target/bin/i386-apple-"+env["osxcross_sdk"]+"-" - env['CC'] = basecmd+"cc" - env['CXX'] = basecmd+"c++" - env['AR'] = basecmd+"ar" - env['RANLIB'] = basecmd+"ranlib" - env['AS'] = basecmd+"as" + env['CC'] = basecmd+"cc" + env['CXX'] = basecmd+"c++" + env['AR'] = basecmd+"ar" + env['RANLIB'] = basecmd+"ranlib" + env['AS'] = basecmd+"as" - env.Append(CPPFLAGS=["-DAPPLE_STYLE_KEYS"]) - env.Append(CPPFLAGS=['-DUNIX_ENABLED','-DGLES2_ENABLED','-DOSX_ENABLED']) - env.Append(LIBS=['pthread']) - #env.Append(CPPFLAGS=['-F/Developer/SDKs/MacOSX10.4u.sdk/System/Library/Frameworks', '-isysroot', '/Developer/SDKs/MacOSX10.4u.sdk', '-mmacosx-version-min=10.4']) - #env.Append(LINKFLAGS=['-mmacosx-version-min=10.4', '-isysroot', '/Developer/SDKs/MacOSX10.4u.sdk', '-Wl,-syslibroot,/Developer/SDKs/MacOSX10.4u.sdk']) - env.Append(LINKFLAGS=['-framework', 'Cocoa', '-framework', 'Carbon', '-framework', 'OpenGL', '-framework', 'AGL', '-framework', 'AudioUnit','-lz', '-framework', 'IOKit', '-framework', 'ForceFeedback']) + env.Append(CPPFLAGS=["-DAPPLE_STYLE_KEYS"]) + env.Append(CPPFLAGS=['-DUNIX_ENABLED','-DGLES2_ENABLED','-DOSX_ENABLED']) + env.Append(LIBS=['pthread']) + #env.Append(CPPFLAGS=['-F/Developer/SDKs/MacOSX10.4u.sdk/System/Library/Frameworks', '-isysroot', '/Developer/SDKs/MacOSX10.4u.sdk', '-mmacosx-version-min=10.4']) + #env.Append(LINKFLAGS=['-mmacosx-version-min=10.4', '-isysroot', '/Developer/SDKs/MacOSX10.4u.sdk', '-Wl,-syslibroot,/Developer/SDKs/MacOSX10.4u.sdk']) + env.Append(LINKFLAGS=['-framework', 'Cocoa', '-framework', 'Carbon', '-framework', 'OpenGL', '-framework', 'AGL', '-framework', 'AudioUnit','-lz', '-framework', 'IOKit', '-framework', 'ForceFeedback']) - if (env["CXX"]=="clang++"): - env.Append(CPPFLAGS=['-DTYPED_METHOD_BIND']) - env["CC"]="clang" - env["LD"]="clang++" + if (env["CXX"]=="clang++"): + env.Append(CPPFLAGS=['-DTYPED_METHOD_BIND']) + env["CC"]="clang" + env["LD"]="clang++" - import methods + import methods - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - #env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) + env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + #env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) - env["x86_libtheora_opt_gcc"]=True + env["x86_libtheora_opt_gcc"]=True diff --git a/platform/server/SCsub b/platform/server/SCsub index 12758c5db07..d53a003288f 100644 --- a/platform/server/SCsub +++ b/platform/server/SCsub @@ -4,7 +4,7 @@ Import('env') common_server=[\ - "os_server.cpp",\ + "os_server.cpp",\ ] env.Program('#bin/godot_server',['godot_server.cpp']+common_server) diff --git a/platform/server/detect.py b/platform/server/detect.py index ce14100fd09..c6e9d45f93f 100644 --- a/platform/server/detect.py +++ b/platform/server/detect.py @@ -4,73 +4,73 @@ import sys def is_active(): - return True + return True def get_name(): - return "Server" + return "Server" def can_build(): - if (os.name!="posix"): - return False + if (os.name!="posix"): + return False - return True # enabled + return True # enabled def get_opts(): - return [ - ('use_llvm','Use llvm compiler','no'), - ('force_32_bits','Force 32 bits binary','no') - ] + return [ + ('use_llvm','Use llvm compiler','no'), + ('force_32_bits','Force 32 bits binary','no') + ] def get_flags(): - return [ - ] + return [ + ] def configure(env): - env.Append(CPPPATH=['#platform/server']) - if (env["use_llvm"]=="yes"): - env["CC"]="clang" - env["CXX"]="clang++" - env["LD"]="clang++" + env.Append(CPPPATH=['#platform/server']) + if (env["use_llvm"]=="yes"): + env["CC"]="clang" + env["CXX"]="clang++" + env["LD"]="clang++" - is64=sys.maxsize > 2**32 + is64=sys.maxsize > 2**32 - if (env["bits"]=="default"): - if (is64): - env["bits"]="64" - else: - env["bits"]="32" + if (env["bits"]=="default"): + if (is64): + env["bits"]="64" + else: + env["bits"]="32" - #if (env["tools"]=="no"): - # #no tools suffix - # env['OBJSUFFIX'] = ".nt"+env['OBJSUFFIX'] - # env['LIBSUFFIX'] = ".nt"+env['LIBSUFFIX'] + #if (env["tools"]=="no"): + # #no tools suffix + # env['OBJSUFFIX'] = ".nt"+env['OBJSUFFIX'] + # env['LIBSUFFIX'] = ".nt"+env['LIBSUFFIX'] - if (env["target"]=="release"): + if (env["target"]=="release"): - env.Append(CCFLAGS=['-O2','-ffast-math','-fomit-frame-pointer']) + env.Append(CCFLAGS=['-O2','-ffast-math','-fomit-frame-pointer']) - elif (env["target"]=="release_debug"): + elif (env["target"]=="release_debug"): - env.Append(CCFLAGS=['-O2','-ffast-math','-DDEBUG_ENABLED']) + env.Append(CCFLAGS=['-O2','-ffast-math','-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): + elif (env["target"]=="debug"): - env.Append(CCFLAGS=['-g2', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) + env.Append(CCFLAGS=['-g2', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) - env.Append(CPPFLAGS=['-DSERVER_ENABLED','-DUNIX_ENABLED']) - env.Append(LIBS=['pthread','z']) #TODO detect linux/BSD! + env.Append(CPPFLAGS=['-DSERVER_ENABLED','-DUNIX_ENABLED']) + env.Append(LIBS=['pthread','z']) #TODO detect linux/BSD! - if (env["CXX"]=="clang++"): - env.Append(CPPFLAGS=['-DTYPED_METHOD_BIND']) - env["CC"]="clang" - env["LD"]="clang++" + if (env["CXX"]=="clang++"): + env.Append(CPPFLAGS=['-DTYPED_METHOD_BIND']) + env["CC"]="clang" + env["LD"]="clang++" diff --git a/platform/windows/SCsub b/platform/windows/SCsub index e53eb7af345..00568952ea1 100644 --- a/platform/windows/SCsub +++ b/platform/windows/SCsub @@ -4,14 +4,14 @@ Import('env') common_win=[ - "context_gl_win.cpp", - "os_windows.cpp", - "ctxgl_procaddr.cpp", - "key_mapping_win.cpp", - "tcp_server_winsock.cpp", - "packet_peer_udp_winsock.cpp", - "stream_peer_winsock.cpp", - "joystick.cpp", + "context_gl_win.cpp", + "os_windows.cpp", + "ctxgl_procaddr.cpp", + "key_mapping_win.cpp", + "tcp_server_winsock.cpp", + "packet_peer_udp_winsock.cpp", + "stream_peer_winsock.cpp", + "joystick.cpp", ] restarget="godot_res"+env["OBJSUFFIX"] @@ -24,6 +24,6 @@ env.Program('#bin/godot',['godot_win.cpp']+common_win,PROGSUFFIX=env["PROGSUFFIX # Microsoft Visual Studio Project Generation if (env['vsproj'])=="yes": - env.vs_srcs = env.vs_srcs + ["platform/windows/godot_win.cpp"] - for x in common_win: - env.vs_srcs = env.vs_srcs + ["platform/windows/" + str(x)] + env.vs_srcs = env.vs_srcs + ["platform/windows/godot_win.cpp"] + for x in common_win: + env.vs_srcs = env.vs_srcs + ["platform/windows/" + str(x)] diff --git a/platform/windows/detect.py b/platform/windows/detect.py index 7ae01007628..325f2838b23 100644 --- a/platform/windows/detect.py +++ b/platform/windows/detect.py @@ -100,299 +100,299 @@ import sys import methods def is_active(): - return True + return True def get_name(): - return "Windows" + return "Windows" def can_build(): - if (os.name=="nt"): - #building natively on windows! - if ( os.getenv("VCINSTALLDIR") ): - return True - else: - print("\nMSVC not detected, attempting Mingw.") - mingw32 = "" - mingw64 = "" - if ( os.getenv("MINGW32_PREFIX") ) : - mingw32 = os.getenv("MINGW32_PREFIX") - if ( os.getenv("MINGW64_PREFIX") ) : - mingw64 = os.getenv("MINGW64_PREFIX") + if (os.name=="nt"): + #building natively on windows! + if ( os.getenv("VCINSTALLDIR") ): + return True + else: + print("\nMSVC not detected, attempting Mingw.") + mingw32 = "" + mingw64 = "" + if ( os.getenv("MINGW32_PREFIX") ) : + mingw32 = os.getenv("MINGW32_PREFIX") + if ( os.getenv("MINGW64_PREFIX") ) : + mingw64 = os.getenv("MINGW64_PREFIX") - test = "gcc --version > NUL 2>&1" - if os.system(test)!= 0 and os.system(mingw32+test)!=0 and os.system(mingw64+test)!=0 : - print("- could not detect gcc.") - print("Please, make sure a path to a Mingw /bin directory is accessible into the environment PATH.\n") - return False - else: - print("- gcc detected.") + test = "gcc --version > NUL 2>&1" + if os.system(test)!= 0 and os.system(mingw32+test)!=0 and os.system(mingw64+test)!=0 : + print("- could not detect gcc.") + print("Please, make sure a path to a Mingw /bin directory is accessible into the environment PATH.\n") + return False + else: + print("- gcc detected.") - return True + return True - if (os.name=="posix"): + if (os.name=="posix"): - mingw = "i586-mingw32msvc-" - mingw64 = "x86_64-w64-mingw32-" - mingw32 = "i686-w64-mingw32-" + mingw = "i586-mingw32msvc-" + mingw64 = "x86_64-w64-mingw32-" + mingw32 = "i686-w64-mingw32-" - if (os.getenv("MINGW32_PREFIX")): - mingw32=os.getenv("MINGW32_PREFIX") - mingw = mingw32 - if (os.getenv("MINGW64_PREFIX")): - mingw64=os.getenv("MINGW64_PREFIX") + if (os.getenv("MINGW32_PREFIX")): + mingw32=os.getenv("MINGW32_PREFIX") + mingw = mingw32 + if (os.getenv("MINGW64_PREFIX")): + mingw64=os.getenv("MINGW64_PREFIX") - test = "gcc --version &>/dev/null" - if (os.system(mingw+test) == 0 or os.system(mingw64+test) == 0 or os.system(mingw32+test) == 0): - return True + test = "gcc --version &>/dev/null" + if (os.system(mingw+test) == 0 or os.system(mingw64+test) == 0 or os.system(mingw32+test) == 0): + return True - return False + return False def get_opts(): - mingw="" - mingw32="" - mingw64="" - if ( os.name == "posix" ): - mingw = "i586-mingw32msvc-" - mingw32 = "i686-w64-mingw32-" - mingw64 = "x86_64-w64-mingw32-" + mingw="" + mingw32="" + mingw64="" + if ( os.name == "posix" ): + mingw = "i586-mingw32msvc-" + mingw32 = "i686-w64-mingw32-" + mingw64 = "x86_64-w64-mingw32-" - if os.system(mingw32+"gcc --version &>/dev/null") != 0 : - mingw32 = mingw + if os.system(mingw32+"gcc --version &>/dev/null") != 0 : + mingw32 = mingw - if (os.getenv("MINGW32_PREFIX")): - mingw32=os.getenv("MINGW32_PREFIX") - mingw = mingw32 - if (os.getenv("MINGW64_PREFIX")): - mingw64=os.getenv("MINGW64_PREFIX") + if (os.getenv("MINGW32_PREFIX")): + mingw32=os.getenv("MINGW32_PREFIX") + mingw = mingw32 + if (os.getenv("MINGW64_PREFIX")): + mingw64=os.getenv("MINGW64_PREFIX") - return [ - ('mingw_prefix','Mingw Prefix',mingw32), - ('mingw_prefix_64','Mingw Prefix 64 bits',mingw64), - ] + return [ + ('mingw_prefix','Mingw Prefix',mingw32), + ('mingw_prefix_64','Mingw Prefix 64 bits',mingw64), + ] def get_flags(): - return [ - ('builtin_zlib', 'yes'), - ('openssl','builtin'), #use builtin openssl - ] + return [ + ('builtin_zlib', 'yes'), + ('openssl','builtin'), #use builtin openssl + ] def build_res_file( target, source, env ): - cmdbase = "" - if (env["bits"] == "32"): - cmdbase = env['mingw_prefix'] - else: - cmdbase = env['mingw_prefix_64'] - CPPPATH = env['CPPPATH'] - cmdbase = cmdbase + 'windres --include-dir . ' - import subprocess - for x in range(len(source)): - cmd = cmdbase + '-i ' + str(source[x]) + ' -o ' + str(target[x]) - try: - out = subprocess.Popen(cmd,shell = True,stderr = subprocess.PIPE).communicate() - if len(out[1]): - return 1 - except: - return 1 - return 0 + cmdbase = "" + if (env["bits"] == "32"): + cmdbase = env['mingw_prefix'] + else: + cmdbase = env['mingw_prefix_64'] + CPPPATH = env['CPPPATH'] + cmdbase = cmdbase + 'windres --include-dir . ' + import subprocess + for x in range(len(source)): + cmd = cmdbase + '-i ' + str(source[x]) + ' -o ' + str(target[x]) + try: + out = subprocess.Popen(cmd,shell = True,stderr = subprocess.PIPE).communicate() + if len(out[1]): + return 1 + except: + return 1 + return 0 def configure(env): - env.Append(CPPPATH=['#platform/windows']) - env['is_mingw']=False - if (os.name=="nt" and os.getenv("VCINSTALLDIR") ): - #build using visual studio - env['ENV']['TMP'] = os.environ['TMP'] - env.Append(CPPPATH=['#platform/windows/include']) - env.Append(LIBPATH=['#platform/windows/lib']) + env.Append(CPPPATH=['#platform/windows']) + env['is_mingw']=False + if (os.name=="nt" and os.getenv("VCINSTALLDIR") ): + #build using visual studio + env['ENV']['TMP'] = os.environ['TMP'] + env.Append(CPPPATH=['#platform/windows/include']) + env.Append(LIBPATH=['#platform/windows/lib']) - if (env["target"]=="release"): + if (env["target"]=="release"): - env.Append(CCFLAGS=['/O2']) - env.Append(LINKFLAGS=['/SUBSYSTEM:WINDOWS']) - env.Append(LINKFLAGS=['/ENTRY:mainCRTStartup']) + env.Append(CCFLAGS=['/O2']) + env.Append(LINKFLAGS=['/SUBSYSTEM:WINDOWS']) + env.Append(LINKFLAGS=['/ENTRY:mainCRTStartup']) - elif (env["target"]=="release_debug"): + elif (env["target"]=="release_debug"): - env.Append(CCFLAGS=['/O2','/DDEBUG_ENABLED']) - env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) - elif (env["target"]=="debug_release"): + env.Append(CCFLAGS=['/O2','/DDEBUG_ENABLED']) + env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) + elif (env["target"]=="debug_release"): - env.Append(CCFLAGS=['/Z7','/Od']) - env.Append(LINKFLAGS=['/DEBUG']) - env.Append(LINKFLAGS=['/SUBSYSTEM:WINDOWS']) - env.Append(LINKFLAGS=['/ENTRY:mainCRTStartup']) + env.Append(CCFLAGS=['/Z7','/Od']) + env.Append(LINKFLAGS=['/DEBUG']) + env.Append(LINKFLAGS=['/SUBSYSTEM:WINDOWS']) + env.Append(LINKFLAGS=['/ENTRY:mainCRTStartup']) - elif (env["target"]=="debug"): + elif (env["target"]=="debug"): - env.Append(CCFLAGS=['/Z7','/DDEBUG_ENABLED','/DDEBUG_MEMORY_ENABLED','/DD3D_DEBUG_INFO','/Od']) - env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) - env.Append(LINKFLAGS=['/DEBUG']) + env.Append(CCFLAGS=['/Z7','/DDEBUG_ENABLED','/DDEBUG_MEMORY_ENABLED','/DD3D_DEBUG_INFO','/Od']) + env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) + env.Append(LINKFLAGS=['/DEBUG']) - env.Append(CCFLAGS=['/MT','/Gd','/GR','/nologo']) - env.Append(CXXFLAGS=['/TP']) - env.Append(CPPFLAGS=['/DMSVC', '/GR', ]) - env.Append(CCFLAGS=['/I'+os.getenv("WindowsSdkDir")+"/Include"]) - env.Append(CCFLAGS=['/DWINDOWS_ENABLED']) - env.Append(CCFLAGS=['/DRTAUDIO_ENABLED']) - env.Append(CCFLAGS=['/DWIN32']) - env.Append(CCFLAGS=['/DTYPED_METHOD_BIND']) + env.Append(CCFLAGS=['/MT','/Gd','/GR','/nologo']) + env.Append(CXXFLAGS=['/TP']) + env.Append(CPPFLAGS=['/DMSVC', '/GR', ]) + env.Append(CCFLAGS=['/I'+os.getenv("WindowsSdkDir")+"/Include"]) + env.Append(CCFLAGS=['/DWINDOWS_ENABLED']) + env.Append(CCFLAGS=['/DRTAUDIO_ENABLED']) + env.Append(CCFLAGS=['/DWIN32']) + env.Append(CCFLAGS=['/DTYPED_METHOD_BIND']) - env.Append(CCFLAGS=['/DGLES2_ENABLED']) - LIBS=['winmm','opengl32','dsound','kernel32','ole32','oleaut32','user32','gdi32', 'IPHLPAPI','Shlwapi', 'wsock32','Ws2_32', 'shell32','advapi32','dinput8','dxguid'] - env.Append(LINKFLAGS=[p+env["LIBSUFFIX"] for p in LIBS]) + env.Append(CCFLAGS=['/DGLES2_ENABLED']) + LIBS=['winmm','opengl32','dsound','kernel32','ole32','oleaut32','user32','gdi32', 'IPHLPAPI','Shlwapi', 'wsock32','Ws2_32', 'shell32','advapi32','dinput8','dxguid'] + env.Append(LINKFLAGS=[p+env["LIBSUFFIX"] for p in LIBS]) - env.Append(LIBPATH=[os.getenv("WindowsSdkDir")+"/Lib"]) - if (os.getenv("DXSDK_DIR")): - DIRECTX_PATH=os.getenv("DXSDK_DIR") - else: - DIRECTX_PATH="C:/Program Files/Microsoft DirectX SDK (March 2009)" + env.Append(LIBPATH=[os.getenv("WindowsSdkDir")+"/Lib"]) + if (os.getenv("DXSDK_DIR")): + DIRECTX_PATH=os.getenv("DXSDK_DIR") + else: + DIRECTX_PATH="C:/Program Files/Microsoft DirectX SDK (March 2009)" - if (os.getenv("VCINSTALLDIR")): - VC_PATH=os.getenv("VCINSTALLDIR") - else: - VC_PATH="" + if (os.getenv("VCINSTALLDIR")): + VC_PATH=os.getenv("VCINSTALLDIR") + else: + VC_PATH="" - env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")]) - env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")]) - env.Append(CCFLAGS=["/I"+DIRECTX_PATH+"/Include"]) - env.Append(LIBPATH=[DIRECTX_PATH+"/Lib/x86"]) - env['ENV'] = os.environ; + env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")]) + env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")]) + env.Append(CCFLAGS=["/I"+DIRECTX_PATH+"/Include"]) + env.Append(LIBPATH=[DIRECTX_PATH+"/Lib/x86"]) + env['ENV'] = os.environ; - # This detection function needs the tools env (that is env['ENV'], not SCons's env), and that is why it's this far bellow in the code - compiler_version_str = methods.detect_visual_c_compiler_version(env['ENV']) + # This detection function needs the tools env (that is env['ENV'], not SCons's env), and that is why it's this far bellow in the code + compiler_version_str = methods.detect_visual_c_compiler_version(env['ENV']) - # Note: this detection/override code from here onward should be here instead of in SConstruct because it's platform and compiler specific (MSVC/Windows) - if(env["bits"] != "default"): - print "Error: bits argument is disabled for MSVC" - print ("Bits argument is not supported for MSVC compilation. Architecture depends on the Native/Cross Compile Tools Prompt/Developer Console (or Visual Studio settings)" - +" that is being used to run SCons. As a consequence, bits argument is disabled. Run scons again without bits argument (example: scons p=windows) and SCons will attempt to detect what MSVC compiler" - +" will be executed and inform you.") - sys.exit() + # Note: this detection/override code from here onward should be here instead of in SConstruct because it's platform and compiler specific (MSVC/Windows) + if(env["bits"] != "default"): + print "Error: bits argument is disabled for MSVC" + print ("Bits argument is not supported for MSVC compilation. Architecture depends on the Native/Cross Compile Tools Prompt/Developer Console (or Visual Studio settings)" + +" that is being used to run SCons. As a consequence, bits argument is disabled. Run scons again without bits argument (example: scons p=windows) and SCons will attempt to detect what MSVC compiler" + +" will be executed and inform you.") + sys.exit() - # Forcing bits argument because MSVC does not have a flag to set this through SCons... it's different compilers (cl.exe's) called from the propper command prompt - # that decide the architecture that is build for. Scons can only detect the os.getenviron (because vsvarsall.bat sets a lot of stuff for cl.exe to work with) - env["bits"]="32" - env["x86_libtheora_opt_vc"]=True + # Forcing bits argument because MSVC does not have a flag to set this through SCons... it's different compilers (cl.exe's) called from the propper command prompt + # that decide the architecture that is build for. Scons can only detect the os.getenviron (because vsvarsall.bat sets a lot of stuff for cl.exe to work with) + env["bits"]="32" + env["x86_libtheora_opt_vc"]=True - print "Detected MSVC compiler: "+compiler_version_str - # If building for 64bit architecture, disable assembly optimisations for 32 bit builds (theora as of writting)... vc compiler for 64bit can not compile _asm - if(compiler_version_str == "amd64" or compiler_version_str == "x86_amd64"): - env["bits"]="64" - env["x86_libtheora_opt_vc"]=False - print "Compiled program architecture will be a 64 bit executable (forcing bits=64)." - elif (compiler_version_str=="x86" or compiler_version_str == "amd64_x86"): - print "Compiled program architecture will be a 32 bit executable. (forcing bits=32)." - else: - print "Failed to detect MSVC compiler architecture version... Defaulting to 32bit executable settings (forcing bits=32). Compilation attempt will continue, but SCons can not detect for what architecture this build is compiled for. You should check your settings/compilation setup." - if env["bits"]=="64": - env.Append(CCFLAGS=['/D_WIN64']) + print "Detected MSVC compiler: "+compiler_version_str + # If building for 64bit architecture, disable assembly optimisations for 32 bit builds (theora as of writting)... vc compiler for 64bit can not compile _asm + if(compiler_version_str == "amd64" or compiler_version_str == "x86_amd64"): + env["bits"]="64" + env["x86_libtheora_opt_vc"]=False + print "Compiled program architecture will be a 64 bit executable (forcing bits=64)." + elif (compiler_version_str=="x86" or compiler_version_str == "amd64_x86"): + print "Compiled program architecture will be a 32 bit executable. (forcing bits=32)." + else: + print "Failed to detect MSVC compiler architecture version... Defaulting to 32bit executable settings (forcing bits=32). Compilation attempt will continue, but SCons can not detect for what architecture this build is compiled for. You should check your settings/compilation setup." + if env["bits"]=="64": + env.Append(CCFLAGS=['/D_WIN64']) - # Incremental linking fix - env['BUILDERS']['ProgramOriginal'] = env['BUILDERS']['Program'] - env['BUILDERS']['Program'] = methods.precious_program + # Incremental linking fix + env['BUILDERS']['ProgramOriginal'] = env['BUILDERS']['Program'] + env['BUILDERS']['Program'] = methods.precious_program - else: + else: - # Workaround for MinGW. See: - # http://www.scons.org/wiki/LongCmdLinesOnWin32 - env.use_windows_spawn_fix() + # Workaround for MinGW. See: + # http://www.scons.org/wiki/LongCmdLinesOnWin32 + env.use_windows_spawn_fix() - #build using mingw - if (os.name=="nt"): - env['ENV']['TMP'] = os.environ['TMP'] #way to go scons, you can be so stupid sometimes - else: - env["PROGSUFFIX"]=env["PROGSUFFIX"]+".exe" # for linux cross-compilation + #build using mingw + if (os.name=="nt"): + env['ENV']['TMP'] = os.environ['TMP'] #way to go scons, you can be so stupid sometimes + else: + env["PROGSUFFIX"]=env["PROGSUFFIX"]+".exe" # for linux cross-compilation - mingw_prefix="" + mingw_prefix="" - if (env["bits"]=="default"): - env["bits"]="32" + if (env["bits"]=="default"): + env["bits"]="32" - if (env["bits"]=="32"): - env.Append(LINKFLAGS=['-static']) - env.Append(LINKFLAGS=['-static-libgcc']) - env.Append(LINKFLAGS=['-static-libstdc++']) - mingw_prefix=env["mingw_prefix"]; - else: - env.Append(LINKFLAGS=['-static']) - mingw_prefix=env["mingw_prefix_64"]; + if (env["bits"]=="32"): + env.Append(LINKFLAGS=['-static']) + env.Append(LINKFLAGS=['-static-libgcc']) + env.Append(LINKFLAGS=['-static-libstdc++']) + mingw_prefix=env["mingw_prefix"]; + else: + env.Append(LINKFLAGS=['-static']) + mingw_prefix=env["mingw_prefix_64"]; - nulstr="" + nulstr="" - if (os.name=="posix"): - nulstr=">/dev/null" - else: - nulstr=">nul" + if (os.name=="posix"): + nulstr=">/dev/null" + else: + nulstr=">nul" - # if os.system(mingw_prefix+"gcc --version"+nulstr)!=0: - # #not really super consistent but.. - # print("Can't find Windows compiler: "+mingw_prefix) - # sys.exit(255) + # if os.system(mingw_prefix+"gcc --version"+nulstr)!=0: + # #not really super consistent but.. + # print("Can't find Windows compiler: "+mingw_prefix) + # sys.exit(255) - if (env["target"]=="release"): + if (env["target"]=="release"): - env.Append(CCFLAGS=['-msse2']) + env.Append(CCFLAGS=['-msse2']) - if (env["bits"]=="64"): - env.Append(CCFLAGS=['-O3']) - else: - env.Append(CCFLAGS=['-O2']) + if (env["bits"]=="64"): + env.Append(CCFLAGS=['-O3']) + else: + env.Append(CCFLAGS=['-O2']) - env.Append(LINKFLAGS=['-Wl,--subsystem,windows']) + env.Append(LINKFLAGS=['-Wl,--subsystem,windows']) - elif (env["target"]=="release_debug"): + elif (env["target"]=="release_debug"): - env.Append(CCFLAGS=['-O2','-DDEBUG_ENABLED']) + env.Append(CCFLAGS=['-O2','-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): + elif (env["target"]=="debug"): - env.Append(CCFLAGS=['-g', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) + env.Append(CCFLAGS=['-g', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) - env["CC"]=mingw_prefix+"gcc" - env['AS']=mingw_prefix+"as" - env['CXX'] = mingw_prefix+"g++" - env['AR'] = mingw_prefix+"ar" - env['RANLIB'] = mingw_prefix+"ranlib" - env['LD'] = mingw_prefix+"g++" - env["x86_libtheora_opt_gcc"]=True + env["CC"]=mingw_prefix+"gcc" + env['AS']=mingw_prefix+"as" + env['CXX'] = mingw_prefix+"g++" + env['AR'] = mingw_prefix+"ar" + env['RANLIB'] = mingw_prefix+"ranlib" + env['LD'] = mingw_prefix+"g++" + env["x86_libtheora_opt_gcc"]=True - #env['CC'] = "winegcc" - #env['CXX'] = "wineg++" + #env['CC'] = "winegcc" + #env['CXX'] = "wineg++" - env.Append(CCFLAGS=['-DWINDOWS_ENABLED','-mwindows']) - env.Append(CPPFLAGS=['-DRTAUDIO_ENABLED']) - env.Append(CCFLAGS=['-DGLES2_ENABLED']) - env.Append(LIBS=['mingw32','opengl32', 'dsound', 'ole32', 'd3d9','winmm','gdi32','iphlpapi','shlwapi','wsock32','ws2_32','kernel32', 'oleaut32', 'dinput8', 'dxguid']) + env.Append(CCFLAGS=['-DWINDOWS_ENABLED','-mwindows']) + env.Append(CPPFLAGS=['-DRTAUDIO_ENABLED']) + env.Append(CCFLAGS=['-DGLES2_ENABLED']) + env.Append(LIBS=['mingw32','opengl32', 'dsound', 'ole32', 'd3d9','winmm','gdi32','iphlpapi','shlwapi','wsock32','ws2_32','kernel32', 'oleaut32', 'dinput8', 'dxguid']) - # if (env["bits"]=="32"): - # env.Append(LIBS=['gcc_s']) - # #--with-arch=i686 - # env.Append(CPPFLAGS=['-march=i686']) - # env.Append(LINKFLAGS=['-march=i686']) + # if (env["bits"]=="32"): + # env.Append(LIBS=['gcc_s']) + # #--with-arch=i686 + # env.Append(CPPFLAGS=['-march=i686']) + # env.Append(LINKFLAGS=['-march=i686']) - #'d3dx9d' - env.Append(CPPFLAGS=['-DMINGW_ENABLED']) - #env.Append(LINKFLAGS=['-g']) + #'d3dx9d' + env.Append(CPPFLAGS=['-DMINGW_ENABLED']) + #env.Append(LINKFLAGS=['-g']) - # resrc - env['is_mingw']=True - env.Append( BUILDERS = { 'RES' : env.Builder(action = build_res_file, suffix = '.o',src_suffix = '.rc') } ) + # resrc + env['is_mingw']=True + env.Append( BUILDERS = { 'RES' : env.Builder(action = build_res_file, suffix = '.o',src_suffix = '.rc') } ) - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) + env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) diff --git a/platform/winrt/SCsub b/platform/winrt/SCsub index 91a179084d9..5259e7dc226 100644 --- a/platform/winrt/SCsub +++ b/platform/winrt/SCsub @@ -3,21 +3,21 @@ Import('env') files = [ - 'thread_winrt.cpp', - '#platform/windows/tcp_server_winsock.cpp', - '#platform/windows/packet_peer_udp_winsock.cpp', - '#platform/windows/stream_peer_winsock.cpp', - '#platform/windows/key_mapping_win.cpp', - 'joystick_winrt.cpp', - 'gl_context_egl.cpp', - 'app.cpp', - 'os_winrt.cpp', + 'thread_winrt.cpp', + '#platform/windows/tcp_server_winsock.cpp', + '#platform/windows/packet_peer_udp_winsock.cpp', + '#platform/windows/stream_peer_winsock.cpp', + '#platform/windows/key_mapping_win.cpp', + 'joystick_winrt.cpp', + 'gl_context_egl.cpp', + 'app.cpp', + 'os_winrt.cpp', ] if "build_angle" in env and env["build_angle"]: - cmd = env.AlwaysBuild(env.ANGLE('libANGLE.lib', None)) + cmd = env.AlwaysBuild(env.ANGLE('libANGLE.lib', None)) prog = env.Program('#bin/godot', files) if "build_angle" in env and env["build_angle"]: - env.Depends(prog, [cmd]) + env.Depends(prog, [cmd]) diff --git a/platform/winrt/detect.py b/platform/winrt/detect.py index a7bc62f6856..06bae980e95 100644 --- a/platform/winrt/detect.py +++ b/platform/winrt/detect.py @@ -6,161 +6,161 @@ import methods def is_active(): - return True + return True def get_name(): - return "WinRT" + return "WinRT" def can_build(): - if (os.name=="nt"): - #building natively on windows! - if (os.getenv("VSINSTALLDIR")): + if (os.name=="nt"): + #building natively on windows! + if (os.getenv("VSINSTALLDIR")): - if (os.getenv("ANGLE_SRC_PATH") == None): - return False + if (os.getenv("ANGLE_SRC_PATH") == None): + return False - return True - return False + return True + return False def get_opts(): - return [] + return [] def get_flags(): - return [ - ('tools', 'no'), - ('builtin_zlib', 'yes'), - ('openssl', 'builtin'), - ('xaudio2', 'yes'), - ] + return [ + ('tools', 'no'), + ('builtin_zlib', 'yes'), + ('openssl', 'builtin'), + ('xaudio2', 'yes'), + ] def configure(env): - if(env["bits"] != "default"): - print "Error: bits argument is disabled for MSVC" - print ("Bits argument is not supported for MSVC compilation. Architecture depends on the Native/Cross Compile Tools Prompt/Developer Console (or Visual Studio settings)" - +" that is being used to run SCons. As a consequence, bits argument is disabled. Run scons again without bits argument (example: scons p=winrt) and SCons will attempt to detect what MSVC compiler" - +" will be executed and inform you.") - sys.exit() + if(env["bits"] != "default"): + print "Error: bits argument is disabled for MSVC" + print ("Bits argument is not supported for MSVC compilation. Architecture depends on the Native/Cross Compile Tools Prompt/Developer Console (or Visual Studio settings)" + +" that is being used to run SCons. As a consequence, bits argument is disabled. Run scons again without bits argument (example: scons p=winrt) and SCons will attempt to detect what MSVC compiler" + +" will be executed and inform you.") + sys.exit() - arch = "" - env['ENV'] = os.environ; + arch = "" + env['ENV'] = os.environ; - # ANGLE - angle_root = os.getenv("ANGLE_SRC_PATH") - env.Append(CPPPATH=[angle_root + '/include']) - jobs = str(env.GetOption("num_jobs")) - angle_build_cmd = "msbuild.exe " + angle_root + "/winrt/10/src/angle.sln /nologo /v:m /m:" + jobs + " /p:Configuration=Release /p:Platform=" + # ANGLE + angle_root = os.getenv("ANGLE_SRC_PATH") + env.Append(CPPPATH=[angle_root + '/include']) + jobs = str(env.GetOption("num_jobs")) + angle_build_cmd = "msbuild.exe " + angle_root + "/winrt/10/src/angle.sln /nologo /v:m /m:" + jobs + " /p:Configuration=Release /p:Platform=" - if os.path.isfile(str(os.getenv("ANGLE_SRC_PATH")) + "/winrt/10/src/angle.sln"): - env["build_angle"] = True + if os.path.isfile(str(os.getenv("ANGLE_SRC_PATH")) + "/winrt/10/src/angle.sln"): + env["build_angle"] = True - if os.getenv('Platform') == "ARM": + if os.getenv('Platform') == "ARM": - print "Compiled program architecture will be an ARM executable. (forcing bits=32)." + print "Compiled program architecture will be an ARM executable. (forcing bits=32)." - arch="arm" - env["bits"]="32" - env.Append(LINKFLAGS=['/MACHINE:ARM']) - env.Append(LIBPATH=[os.environ['VCINSTALLDIR'] + 'lib/store/arm']) + arch="arm" + env["bits"]="32" + env.Append(LINKFLAGS=['/MACHINE:ARM']) + env.Append(LIBPATH=[os.environ['VCINSTALLDIR'] + 'lib/store/arm']) - angle_build_cmd += "ARM" + angle_build_cmd += "ARM" - env.Append(LIBPATH=[angle_root + '/winrt/10/src/Release_ARM/lib']) + env.Append(LIBPATH=[angle_root + '/winrt/10/src/Release_ARM/lib']) - else: + else: - compiler_version_str = methods.detect_visual_c_compiler_version(env['ENV']) + compiler_version_str = methods.detect_visual_c_compiler_version(env['ENV']) - if(compiler_version_str == "amd64" or compiler_version_str == "x86_amd64"): - env["bits"]="64" - print "Compiled program architecture will be a x64 executable (forcing bits=64)." - elif (compiler_version_str=="x86" or compiler_version_str == "amd64_x86"): - env["bits"]="32" - print "Compiled program architecture will be a x86 executable. (forcing bits=32)." - else: - print "Failed to detect MSVC compiler architecture version... Defaulting to 32bit executable settings (forcing bits=32). Compilation attempt will continue, but SCons can not detect for what architecture this build is compiled for. You should check your settings/compilation setup." - env["bits"]="32" + if(compiler_version_str == "amd64" or compiler_version_str == "x86_amd64"): + env["bits"]="64" + print "Compiled program architecture will be a x64 executable (forcing bits=64)." + elif (compiler_version_str=="x86" or compiler_version_str == "amd64_x86"): + env["bits"]="32" + print "Compiled program architecture will be a x86 executable. (forcing bits=32)." + else: + print "Failed to detect MSVC compiler architecture version... Defaulting to 32bit executable settings (forcing bits=32). Compilation attempt will continue, but SCons can not detect for what architecture this build is compiled for. You should check your settings/compilation setup." + env["bits"]="32" - if (env["bits"] == "32"): - arch = "x86" + if (env["bits"] == "32"): + arch = "x86" - angle_build_cmd += "Win32" + angle_build_cmd += "Win32" - env.Append(CPPFLAGS=['/DPNG_ABORT=abort']) - env.Append(LINKFLAGS=['/MACHINE:X86']) - env.Append(LIBPATH=[os.environ['VCINSTALLDIR'] + 'lib/store']) - env.Append(LIBPATH=[angle_root + '/winrt/10/src/Release_Win32/lib']) + env.Append(CPPFLAGS=['/DPNG_ABORT=abort']) + env.Append(LINKFLAGS=['/MACHINE:X86']) + env.Append(LIBPATH=[os.environ['VCINSTALLDIR'] + 'lib/store']) + env.Append(LIBPATH=[angle_root + '/winrt/10/src/Release_Win32/lib']) - else: - arch = "x64" + else: + arch = "x64" - angle_build_cmd += "x64" + angle_build_cmd += "x64" - env.Append(LINKFLAGS=['/MACHINE:X64']) - env.Append(LIBPATH=[os.environ['VCINSTALLDIR'] + 'lib/store/amd64']) - env.Append(LIBPATH=[angle_root + '/winrt/10/src/Release_x64/lib']) + env.Append(LINKFLAGS=['/MACHINE:X64']) + env.Append(LIBPATH=[os.environ['VCINSTALLDIR'] + 'lib/store/amd64']) + env.Append(LIBPATH=[angle_root + '/winrt/10/src/Release_x64/lib']) - env.Append(CPPPATH=['#platform/winrt','#drivers/windows']) - env.Append(LINKFLAGS=['/MANIFEST:NO', '/NXCOMPAT', '/DYNAMICBASE', '/WINMD', '/APPCONTAINER', '/ERRORREPORT:PROMPT', '/NOLOGO', '/TLBID:1', '/NODEFAULTLIB:"kernel32.lib"', '/NODEFAULTLIB:"ole32.lib"']) - env.Append(CPPFLAGS=['/D','__WRL_NO_DEFAULT_LIB__','/D','WIN32']) - env.Append(CPPFLAGS=['/FU', os.environ['VCINSTALLDIR'] + 'lib/store/references/platform.winmd']) - env.Append(CPPFLAGS=['/AI', os.environ['VCINSTALLDIR'] + 'lib/store/references']) + env.Append(CPPPATH=['#platform/winrt','#drivers/windows']) + env.Append(LINKFLAGS=['/MANIFEST:NO', '/NXCOMPAT', '/DYNAMICBASE', '/WINMD', '/APPCONTAINER', '/ERRORREPORT:PROMPT', '/NOLOGO', '/TLBID:1', '/NODEFAULTLIB:"kernel32.lib"', '/NODEFAULTLIB:"ole32.lib"']) + env.Append(CPPFLAGS=['/D','__WRL_NO_DEFAULT_LIB__','/D','WIN32']) + env.Append(CPPFLAGS=['/FU', os.environ['VCINSTALLDIR'] + 'lib/store/references/platform.winmd']) + env.Append(CPPFLAGS=['/AI', os.environ['VCINSTALLDIR'] + 'lib/store/references']) - env.Append(LIBPATH=[os.environ['VCINSTALLDIR'] + 'lib/store/references']) + env.Append(LIBPATH=[os.environ['VCINSTALLDIR'] + 'lib/store/references']) - if (env["target"]=="release"): + if (env["target"]=="release"): - env.Append(CPPFLAGS=['/O2', '/GL']) - env.Append(CPPFLAGS=['/MD']) - env.Append(LINKFLAGS=['/SUBSYSTEM:WINDOWS', '/LTCG']) + env.Append(CPPFLAGS=['/O2', '/GL']) + env.Append(CPPFLAGS=['/MD']) + env.Append(LINKFLAGS=['/SUBSYSTEM:WINDOWS', '/LTCG']) - elif (env["target"]=="release_debug"): + elif (env["target"]=="release_debug"): - env.Append(CCFLAGS=['/O2','/Zi','/DDEBUG_ENABLED']) - env.Append(CPPFLAGS=['/MD']) - env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) + env.Append(CCFLAGS=['/O2','/Zi','/DDEBUG_ENABLED']) + env.Append(CPPFLAGS=['/MD']) + env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) - elif (env["target"]=="debug"): + elif (env["target"]=="debug"): - env.Append(CCFLAGS=['/Zi','/DDEBUG_ENABLED','/DDEBUG_MEMORY_ENABLED']) - env.Append(CPPFLAGS=['/MDd']) - env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) - env.Append(LINKFLAGS=['/DEBUG']) + env.Append(CCFLAGS=['/Zi','/DDEBUG_ENABLED','/DDEBUG_MEMORY_ENABLED']) + env.Append(CPPFLAGS=['/MDd']) + env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) + env.Append(LINKFLAGS=['/DEBUG']) - env.Append(CCFLAGS=string.split('/FS /MP /GS /wd"4453" /wd"28204" /wd"4291" /Zc:wchar_t /Gm- /fp:precise /D "_UNICODE" /D "UNICODE" /D "WINAPI_FAMILY=WINAPI_FAMILY_APP" /errorReport:prompt /WX- /Zc:forScope /Gd /EHsc /nologo')) - env.Append(CXXFLAGS=string.split('/ZW /FS')) - env.Append(CCFLAGS=['/AI', os.environ['VCINSTALLDIR']+'\\vcpackages', '/AI', os.environ['WINDOWSSDKDIR']+'\\References\\CommonConfiguration\\Neutral']) + env.Append(CCFLAGS=string.split('/FS /MP /GS /wd"4453" /wd"28204" /wd"4291" /Zc:wchar_t /Gm- /fp:precise /D "_UNICODE" /D "UNICODE" /D "WINAPI_FAMILY=WINAPI_FAMILY_APP" /errorReport:prompt /WX- /Zc:forScope /Gd /EHsc /nologo')) + env.Append(CXXFLAGS=string.split('/ZW /FS')) + env.Append(CCFLAGS=['/AI', os.environ['VCINSTALLDIR']+'\\vcpackages', '/AI', os.environ['WINDOWSSDKDIR']+'\\References\\CommonConfiguration\\Neutral']) - env["PROGSUFFIX"]="."+arch+env["PROGSUFFIX"] - env["OBJSUFFIX"]="."+arch+env["OBJSUFFIX"] - env["LIBSUFFIX"]="."+arch+env["LIBSUFFIX"] + env["PROGSUFFIX"]="."+arch+env["PROGSUFFIX"] + env["OBJSUFFIX"]="."+arch+env["OBJSUFFIX"] + env["LIBSUFFIX"]="."+arch+env["LIBSUFFIX"] - env.Append(CCFLAGS=['/DWINRT_ENABLED']) - env.Append(CCFLAGS=['/DWINDOWS_ENABLED']) - env.Append(CCFLAGS=['/DTYPED_METHOD_BIND']) + env.Append(CCFLAGS=['/DWINRT_ENABLED']) + env.Append(CCFLAGS=['/DWINDOWS_ENABLED']) + env.Append(CCFLAGS=['/DTYPED_METHOD_BIND']) - env.Append(CCFLAGS=['/DGLES2_ENABLED','/DGL_GLEXT_PROTOTYPES','/DEGL_EGLEXT_PROTOTYPES','/DANGLE_ENABLED']) + env.Append(CCFLAGS=['/DGLES2_ENABLED','/DGL_GLEXT_PROTOTYPES','/DEGL_EGLEXT_PROTOTYPES','/DANGLE_ENABLED']) - LIBS = [ - 'WindowsApp', - 'mincore', - 'libANGLE', - 'libEGL', - 'libGLESv2', - ] - env.Append(LINKFLAGS=[p+".lib" for p in LIBS]) + LIBS = [ + 'WindowsApp', + 'mincore', + 'libANGLE', + 'libEGL', + 'libGLESv2', + ] + env.Append(LINKFLAGS=[p+".lib" for p in LIBS]) - # Incremental linking fix - env['BUILDERS']['ProgramOriginal'] = env['BUILDERS']['Program'] - env['BUILDERS']['Program'] = methods.precious_program + # Incremental linking fix + env['BUILDERS']['ProgramOriginal'] = env['BUILDERS']['Program'] + env['BUILDERS']['Program'] = methods.precious_program - env.Append( BUILDERS = { 'ANGLE' : env.Builder(action = angle_build_cmd) } ) + env.Append( BUILDERS = { 'ANGLE' : env.Builder(action = angle_build_cmd) } ) - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) + env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) diff --git a/platform/x11/SCsub b/platform/x11/SCsub index e20bd44f8d4..93cd2ca267b 100644 --- a/platform/x11/SCsub +++ b/platform/x11/SCsub @@ -4,10 +4,10 @@ Import('env') common_x11=[\ - "context_gl_x11.cpp",\ - "os_x11.cpp",\ - "key_mapping_x11.cpp",\ - "joystick_linux.cpp",\ + "context_gl_x11.cpp",\ + "os_x11.cpp",\ + "key_mapping_x11.cpp",\ + "joystick_linux.cpp",\ ] env.Program('#bin/godot',['godot_x11.cpp']+common_x11) diff --git a/platform/x11/detect.py b/platform/x11/detect.py index e90af2c6970..593e3fbb9d5 100644 --- a/platform/x11/detect.py +++ b/platform/x11/detect.py @@ -5,236 +5,236 @@ import platform def is_active(): - return True + return True def get_name(): - return "X11" + return "X11" def can_build(): - if (os.name!="posix"): - return False + if (os.name!="posix"): + return False - if sys.platform == "darwin": - return False # no x11 on mac for now + if sys.platform == "darwin": + return False # no x11 on mac for now - errorval=os.system("pkg-config --version > /dev/null") + errorval=os.system("pkg-config --version > /dev/null") - if (errorval): - print("pkg-config not found.. x11 disabled.") - return False + if (errorval): + print("pkg-config not found.. x11 disabled.") + return False - x11_error=os.system("pkg-config x11 --modversion > /dev/null ") - if (x11_error): - print("X11 not found.. x11 disabled.") - return False + x11_error=os.system("pkg-config x11 --modversion > /dev/null ") + if (x11_error): + print("X11 not found.. x11 disabled.") + return False - ssl_error=os.system("pkg-config openssl --modversion > /dev/null ") - if (ssl_error): - print("OpenSSL not found.. x11 disabled.") - return False + ssl_error=os.system("pkg-config openssl --modversion > /dev/null ") + if (ssl_error): + print("OpenSSL not found.. x11 disabled.") + return False - x11_error=os.system("pkg-config xcursor --modversion > /dev/null ") - if (x11_error): - print("xcursor not found.. x11 disabled.") - return False + x11_error=os.system("pkg-config xcursor --modversion > /dev/null ") + if (x11_error): + print("xcursor not found.. x11 disabled.") + return False - x11_error=os.system("pkg-config xinerama --modversion > /dev/null ") - if (x11_error): - print("xinerama not found.. x11 disabled.") - return False + x11_error=os.system("pkg-config xinerama --modversion > /dev/null ") + if (x11_error): + print("xinerama not found.. x11 disabled.") + return False - x11_error=os.system("pkg-config xrandr --modversion > /dev/null ") - if (x11_error): - print("xrandr not found.. x11 disabled.") - return False + x11_error=os.system("pkg-config xrandr --modversion > /dev/null ") + if (x11_error): + print("xrandr not found.. x11 disabled.") + return False - return True # X11 enabled + return True # X11 enabled def get_opts(): - return [ - ('use_llvm','Use llvm compiler','no'), - ('use_static_cpp','link stdc++ statically','no'), - ('use_sanitizer','Use llvm compiler sanitize address','no'), - ('use_leak_sanitizer','Use llvm compiler sanitize memory leaks','no'), - ('pulseaudio','Detect & Use pulseaudio','yes'), - ('udev','Use udev for gamepad connection callbacks','no'), - ('debug_release', 'Add debug symbols to release version','no'), - ] + return [ + ('use_llvm','Use llvm compiler','no'), + ('use_static_cpp','link stdc++ statically','no'), + ('use_sanitizer','Use llvm compiler sanitize address','no'), + ('use_leak_sanitizer','Use llvm compiler sanitize memory leaks','no'), + ('pulseaudio','Detect & Use pulseaudio','yes'), + ('udev','Use udev for gamepad connection callbacks','no'), + ('debug_release', 'Add debug symbols to release version','no'), + ] def get_flags(): - return [ - ("openssl", "system"), - ('freetype', 'system'), - ('libpng', 'system'), - ] + return [ + ("openssl", "system"), + ('freetype', 'system'), + ('libpng', 'system'), + ] def configure(env): - is64=sys.maxsize > 2**32 + is64=sys.maxsize > 2**32 - if (env["bits"]=="default"): - if (is64): - env["bits"]="64" - else: - env["bits"]="32" + if (env["bits"]=="default"): + if (is64): + env["bits"]="64" + else: + env["bits"]="32" - env.Append(CPPPATH=['#platform/x11']) - if (env["use_llvm"]=="yes"): - if 'clang++' not in env['CXX']: - env["CC"]="clang" - env["CXX"]="clang++" - env["LD"]="clang++" - env.Append(CPPFLAGS=['-DTYPED_METHOD_BIND']) - env.extra_suffix=".llvm" + env.Append(CPPPATH=['#platform/x11']) + if (env["use_llvm"]=="yes"): + if 'clang++' not in env['CXX']: + env["CC"]="clang" + env["CXX"]="clang++" + env["LD"]="clang++" + env.Append(CPPFLAGS=['-DTYPED_METHOD_BIND']) + env.extra_suffix=".llvm" - if (env["use_sanitizer"]=="yes"): - env.Append(CXXFLAGS=['-fsanitize=address','-fno-omit-frame-pointer']) - env.Append(LINKFLAGS=['-fsanitize=address']) - env.extra_suffix+="s" + if (env["use_sanitizer"]=="yes"): + env.Append(CXXFLAGS=['-fsanitize=address','-fno-omit-frame-pointer']) + env.Append(LINKFLAGS=['-fsanitize=address']) + env.extra_suffix+="s" - if (env["use_leak_sanitizer"]=="yes"): - env.Append(CXXFLAGS=['-fsanitize=address','-fno-omit-frame-pointer']) - env.Append(LINKFLAGS=['-fsanitize=address']) - env.extra_suffix+="s" + if (env["use_leak_sanitizer"]=="yes"): + env.Append(CXXFLAGS=['-fsanitize=address','-fno-omit-frame-pointer']) + env.Append(LINKFLAGS=['-fsanitize=address']) + env.extra_suffix+="s" - #if (env["tools"]=="no"): - # #no tools suffix - # env['OBJSUFFIX'] = ".nt"+env['OBJSUFFIX'] - # env['LIBSUFFIX'] = ".nt"+env['LIBSUFFIX'] + #if (env["tools"]=="no"): + # #no tools suffix + # env['OBJSUFFIX'] = ".nt"+env['OBJSUFFIX'] + # env['LIBSUFFIX'] = ".nt"+env['LIBSUFFIX'] - if (env["target"]=="release"): + if (env["target"]=="release"): - if (env["debug_release"]=="yes"): - env.Append(CCFLAGS=['-g2']) - else: - env.Append(CCFLAGS=['-O3','-ffast-math']) + if (env["debug_release"]=="yes"): + env.Append(CCFLAGS=['-g2']) + else: + env.Append(CCFLAGS=['-O3','-ffast-math']) - elif (env["target"]=="release_debug"): + elif (env["target"]=="release_debug"): - env.Append(CCFLAGS=['-O2','-ffast-math','-DDEBUG_ENABLED']) - if (env["debug_release"]=="yes"): - env.Append(CCFLAGS=['-g2']) + env.Append(CCFLAGS=['-O2','-ffast-math','-DDEBUG_ENABLED']) + if (env["debug_release"]=="yes"): + env.Append(CCFLAGS=['-g2']) - elif (env["target"]=="debug"): + elif (env["target"]=="debug"): - env.Append(CCFLAGS=['-g2', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) + env.Append(CCFLAGS=['-g2', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) - env.ParseConfig('pkg-config x11 --cflags --libs') - env.ParseConfig('pkg-config xinerama --cflags --libs') - env.ParseConfig('pkg-config xcursor --cflags --libs') - env.ParseConfig('pkg-config xrandr --cflags --libs') + env.ParseConfig('pkg-config x11 --cflags --libs') + env.ParseConfig('pkg-config xinerama --cflags --libs') + env.ParseConfig('pkg-config xcursor --cflags --libs') + env.ParseConfig('pkg-config xrandr --cflags --libs') - if (env["openssl"] == "system"): - env.ParseConfig('pkg-config openssl --cflags --libs') + if (env["openssl"] == "system"): + env.ParseConfig('pkg-config openssl --cflags --libs') - if (env["libwebp"] == "system"): - env.ParseConfig('pkg-config libwebp --cflags --libs') + if (env["libwebp"] == "system"): + env.ParseConfig('pkg-config libwebp --cflags --libs') - if (env["freetype"] == "system"): - env["libpng"] = "system" # Freetype links against libpng - env.ParseConfig('pkg-config freetype2 --cflags --libs') + if (env["freetype"] == "system"): + env["libpng"] = "system" # Freetype links against libpng + env.ParseConfig('pkg-config freetype2 --cflags --libs') - if (env["libpng"] == "system"): - env.ParseConfig('pkg-config libpng --cflags --libs') + if (env["libpng"] == "system"): + env.ParseConfig('pkg-config libpng --cflags --libs') - if (env["enet"] == "system"): - env.ParseConfig('pkg-config libenet --cflags --libs') + if (env["enet"] == "system"): + env.ParseConfig('pkg-config libenet --cflags --libs') - if (env["squish"] == "system" and env["tools"] == "yes"): - env.ParseConfig('pkg-config libsquish --cflags --libs') + if (env["squish"] == "system" and env["tools"] == "yes"): + env.ParseConfig('pkg-config libsquish --cflags --libs') - # Sound and video libraries - # Keep the order as it triggers chained dependencies (ogg needed by others, etc.) + # Sound and video libraries + # Keep the order as it triggers chained dependencies (ogg needed by others, etc.) - if (env["libtheora"] == "system"): - env["libogg"] = "system" # Needed to link against system libtheora - env["libvorbis"] = "system" # Needed to link against system libtheora - env.ParseConfig('pkg-config theora theoradec --cflags --libs') + if (env["libtheora"] == "system"): + env["libogg"] = "system" # Needed to link against system libtheora + env["libvorbis"] = "system" # Needed to link against system libtheora + env.ParseConfig('pkg-config theora theoradec --cflags --libs') - if (env["libvpx"] == "system"): - env.ParseConfig('pkg-config vpx --cflags --libs') + if (env["libvpx"] == "system"): + env.ParseConfig('pkg-config vpx --cflags --libs') - if (env["libvorbis"] == "system"): - env["libogg"] = "system" # Needed to link against system libvorbis - env.ParseConfig('pkg-config vorbis vorbisfile --cflags --libs') + if (env["libvorbis"] == "system"): + env["libogg"] = "system" # Needed to link against system libvorbis + env.ParseConfig('pkg-config vorbis vorbisfile --cflags --libs') - if (env["opus"] == "system"): - env["libogg"] = "system" # Needed to link against system opus - env.ParseConfig('pkg-config opus opusfile --cflags --libs') + if (env["opus"] == "system"): + env["libogg"] = "system" # Needed to link against system opus + env.ParseConfig('pkg-config opus opusfile --cflags --libs') - if (env["libogg"] == "system"): - env.ParseConfig('pkg-config ogg --cflags --libs') + if (env["libogg"] == "system"): + env.ParseConfig('pkg-config ogg --cflags --libs') - env.Append(CPPFLAGS=['-DOPENGL_ENABLED']) + env.Append(CPPFLAGS=['-DOPENGL_ENABLED']) - if (env["glew"] == "system"): - env.ParseConfig('pkg-config glew --cflags --libs') + if (env["glew"] == "system"): + env.ParseConfig('pkg-config glew --cflags --libs') - if os.system("pkg-config --exists alsa")==0: - print("Enabling ALSA") - env.Append(CPPFLAGS=["-DALSA_ENABLED"]) - env.ParseConfig('pkg-config alsa --cflags --libs') - else: - print("ALSA libraries not found, disabling driver") + if os.system("pkg-config --exists alsa")==0: + print("Enabling ALSA") + env.Append(CPPFLAGS=["-DALSA_ENABLED"]) + env.ParseConfig('pkg-config alsa --cflags --libs') + else: + print("ALSA libraries not found, disabling driver") - if (platform.system() == "Linux"): - env.Append(CPPFLAGS=["-DJOYDEV_ENABLED"]) - if (env["udev"]=="yes"): - # pkg-config returns 0 when the lib exists... - found_udev = not os.system("pkg-config --exists libudev") + if (platform.system() == "Linux"): + env.Append(CPPFLAGS=["-DJOYDEV_ENABLED"]) + if (env["udev"]=="yes"): + # pkg-config returns 0 when the lib exists... + found_udev = not os.system("pkg-config --exists libudev") - if (found_udev): - print("Enabling udev support") - env.Append(CPPFLAGS=["-DUDEV_ENABLED"]) - env.ParseConfig('pkg-config libudev --cflags --libs') - else: - print("libudev development libraries not found, disabling udev support") + if (found_udev): + print("Enabling udev support") + env.Append(CPPFLAGS=["-DUDEV_ENABLED"]) + env.ParseConfig('pkg-config libudev --cflags --libs') + else: + print("libudev development libraries not found, disabling udev support") - if (env["pulseaudio"]=="yes"): - if not os.system("pkg-config --exists libpulse-simple"): - print("Enabling PulseAudio") - env.Append(CPPFLAGS=["-DPULSEAUDIO_ENABLED"]) - env.ParseConfig('pkg-config --cflags --libs libpulse-simple') - else: - print("PulseAudio development libraries not found, disabling driver") + if (env["pulseaudio"]=="yes"): + if not os.system("pkg-config --exists libpulse-simple"): + print("Enabling PulseAudio") + env.Append(CPPFLAGS=["-DPULSEAUDIO_ENABLED"]) + env.ParseConfig('pkg-config --cflags --libs libpulse-simple') + else: + print("PulseAudio development libraries not found, disabling driver") - env.Append(CPPFLAGS=['-DX11_ENABLED','-DUNIX_ENABLED','-DGLES2_ENABLED','-DGLES_OVER_GL']) - env.Append(LIBS=['GL', 'pthread', 'z']) - if (platform.system() == "Linux"): - env.Append(LIBS='dl') - #env.Append(CPPFLAGS=['-DMPC_FIXED_POINT']) + env.Append(CPPFLAGS=['-DX11_ENABLED','-DUNIX_ENABLED','-DGLES2_ENABLED','-DGLES_OVER_GL']) + env.Append(LIBS=['GL', 'pthread', 'z']) + if (platform.system() == "Linux"): + env.Append(LIBS='dl') + #env.Append(CPPFLAGS=['-DMPC_FIXED_POINT']) #host compiler is default.. - if (is64 and env["bits"]=="32"): - env.Append(CPPFLAGS=['-m32']) - env.Append(LINKFLAGS=['-m32','-L/usr/lib/i386-linux-gnu']) - elif (not is64 and env["bits"]=="64"): - env.Append(CPPFLAGS=['-m64']) - env.Append(LINKFLAGS=['-m64','-L/usr/lib/i686-linux-gnu']) + if (is64 and env["bits"]=="32"): + env.Append(CPPFLAGS=['-m32']) + env.Append(LINKFLAGS=['-m32','-L/usr/lib/i386-linux-gnu']) + elif (not is64 and env["bits"]=="64"): + env.Append(CPPFLAGS=['-m64']) + env.Append(LINKFLAGS=['-m64','-L/usr/lib/i686-linux-gnu']) - import methods + import methods - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - #env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) + env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + #env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) - if (env["use_static_cpp"]=="yes"): - env.Append(LINKFLAGS=['-static-libstdc++']) + if (env["use_static_cpp"]=="yes"): + env.Append(LINKFLAGS=['-static-libstdc++']) - list_of_x86 = ['x86_64', 'x86', 'i386', 'i586'] - if any(platform.machine() in s for s in list_of_x86): - env["x86_libtheora_opt_gcc"]=True + list_of_x86 = ['x86_64', 'x86', 'i386', 'i586'] + if any(platform.machine() in s for s in list_of_x86): + env["x86_libtheora_opt_gcc"]=True diff --git a/scene/3d/SCsub b/scene/3d/SCsub index 1205deb94aa..47825ea6c26 100644 --- a/scene/3d/SCsub +++ b/scene/3d/SCsub @@ -5,9 +5,9 @@ Import('env') if (env["disable_3d"]=="yes"): - env.scene_sources.append("3d/spatial.cpp") - env.scene_sources.append("3d/skeleton.cpp") + env.scene_sources.append("3d/spatial.cpp") + env.scene_sources.append("3d/skeleton.cpp") else: - env.add_source_files(env.scene_sources,"*.cpp") + env.add_source_files(env.scene_sources,"*.cpp") Export('env') diff --git a/scene/resources/default_theme/make_header.py b/scene/resources/default_theme/make_header.py index 2d3f989e015..ce053570959 100644 --- a/scene/resources/default_theme/make_header.py +++ b/scene/resources/default_theme/make_header.py @@ -22,21 +22,21 @@ f.write("\n\n\n"); for x in pixmaps: - var_str=x[:-4]+"_png"; + var_str=x[:-4]+"_png"; - f.write("static const unsigned char "+ var_str +"[]={\n"); + f.write("static const unsigned char "+ var_str +"[]={\n"); - pngf=open(x,"rb"); + pngf=open(x,"rb"); - b=pngf.read(1); - while(len(b)==1): - f.write(hex(ord(b))) - b=pngf.read(1); - if (len(b)==1): - f.write(",") + b=pngf.read(1); + while(len(b)==1): + f.write(hex(ord(b))) + b=pngf.read(1); + if (len(b)==1): + f.write(",") - f.write("\n};\n\n\n"); - pngf.close(); + f.write("\n};\n\n\n"); + pngf.close(); #Generate shaders block @@ -48,25 +48,25 @@ f.write("\n\n\n"); for x in shaders: - var_str=x[:-4]+"_shader_code"; + var_str=x[:-4]+"_shader_code"; - f.write("static const char *"+ var_str +"=\n"); + f.write("static const char *"+ var_str +"=\n"); - sf=open(x,"rb"); + sf=open(x,"rb"); - b=sf.readline(); - while(b!=""): - if (b.endswith("\r\n")): - b=b[:-2] - if (b.endswith("\n")): - b=b[:-1] - f.write(" \""+b) - b=sf.readline(); - if (b!=""): - f.write("\"\n") + b=sf.readline(); + while(b!=""): + if (b.endswith("\r\n")): + b=b[:-2] + if (b.endswith("\n")): + b=b[:-1] + f.write(" \""+b) + b=sf.readline(); + if (b!=""): + f.write("\"\n") - f.write("\";\n\n\n"); - sf.close(); + f.write("\";\n\n\n"); + sf.close(); f.close(); diff --git a/tools/SCsub b/tools/SCsub index b0b33d4f012..a54b8f48a21 100644 --- a/tools/SCsub +++ b/tools/SCsub @@ -11,115 +11,115 @@ Export('env') def make_translations_header(target,source,env): - dst = target[0].srcnode().abspath + dst = target[0].srcnode().abspath - g = open(dst,"wb") + g = open(dst,"wb") - - """" + + """" """ - g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") - g.write("#ifndef _EDITOR_TRANSLATIONS_H\n") - g.write("#define _EDITOR_TRANSLATIONS_H\n") + g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") + g.write("#ifndef _EDITOR_TRANSLATIONS_H\n") + g.write("#define _EDITOR_TRANSLATIONS_H\n") - import zlib - import os.path + import zlib + import os.path - paths = [node.srcnode().abspath for node in source] - sorted_paths = sorted(paths, key=lambda path: os.path.splitext(os.path.basename(path))[0]) + paths = [node.srcnode().abspath for node in source] + sorted_paths = sorted(paths, key=lambda path: os.path.splitext(os.path.basename(path))[0]) - xl_names=[] - for i in range(len(sorted_paths)): - print("Appending translation: "+sorted_paths[i]) - f = open(sorted_paths[i],"rb") - buf = f.read() - decomp_size = len(buf) - buf = zlib.compress(buf) - name = os.path.splitext(os.path.basename(sorted_paths[i]))[0] + xl_names=[] + for i in range(len(sorted_paths)): + print("Appending translation: "+sorted_paths[i]) + f = open(sorted_paths[i],"rb") + buf = f.read() + decomp_size = len(buf) + buf = zlib.compress(buf) + name = os.path.splitext(os.path.basename(sorted_paths[i]))[0] - #g.write("static const int _translation_"+name+"_compressed_size="+str(len(buf))+";\n") - #g.write("static const int _translation_"+name+"_uncompressed_size="+str(decomp_size)+";\n") - g.write("static const unsigned char _translation_"+name+"_compressed[]={\n") - for i in range(len(buf)): - g.write(str(ord(buf[i]))+",\n") + #g.write("static const int _translation_"+name+"_compressed_size="+str(len(buf))+";\n") + #g.write("static const int _translation_"+name+"_uncompressed_size="+str(decomp_size)+";\n") + g.write("static const unsigned char _translation_"+name+"_compressed[]={\n") + for i in range(len(buf)): + g.write(str(ord(buf[i]))+",\n") - g.write("};\n") + g.write("};\n") - xl_names.append([name,len(buf),str(decomp_size)]) + xl_names.append([name,len(buf),str(decomp_size)]) - g.write("struct EditorTranslationList {\n") - g.write("\tconst char* lang;\n"); - g.write("\tint comp_size;\n"); - g.write("\tint uncomp_size;\n"); - g.write("\tconst unsigned char* data;\n"); - g.write("};\n\n"); - g.write("static EditorTranslationList _editor_translations[]={\n") - for x in xl_names: - g.write("\t{ \""+x[0]+"\", "+str(x[1])+", "+str(x[2])+",_translation_"+x[0]+"_compressed},\n") - g.write("\t{NULL,0,0,NULL}\n") - g.write("};\n") + g.write("struct EditorTranslationList {\n") + g.write("\tconst char* lang;\n"); + g.write("\tint comp_size;\n"); + g.write("\tint uncomp_size;\n"); + g.write("\tconst unsigned char* data;\n"); + g.write("};\n\n"); + g.write("static EditorTranslationList _editor_translations[]={\n") + for x in xl_names: + g.write("\t{ \""+x[0]+"\", "+str(x[1])+", "+str(x[2])+",_translation_"+x[0]+"_compressed},\n") + g.write("\t{NULL,0,0,NULL}\n") + g.write("};\n") - g.write("#endif") + g.write("#endif") def make_fonts_header(target,source,env): - dst = target[0].srcnode().abspath + dst = target[0].srcnode().abspath - g = open(dst,"wb") + g = open(dst,"wb") - - """" + + """" """ - g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") - g.write("#ifndef _EDITOR_FONTS_H\n") - g.write("#define _EDITOR_FONTS_H\n") + g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") + g.write("#ifndef _EDITOR_FONTS_H\n") + g.write("#define _EDITOR_FONTS_H\n") - #saving uncompressed, since freetype will reference from memory pointer - xl_names=[] - for i in range(len(source)): - print("Appending font: "+source[i].srcnode().abspath) - f = open(source[i].srcnode().abspath,"rb") - buf = f.read() - import os.path - - name = os.path.splitext(os.path.basename(source[i].srcnode().abspath))[0] + #saving uncompressed, since freetype will reference from memory pointer + xl_names=[] + for i in range(len(source)): + print("Appending font: "+source[i].srcnode().abspath) + f = open(source[i].srcnode().abspath,"rb") + buf = f.read() + import os.path + + name = os.path.splitext(os.path.basename(source[i].srcnode().abspath))[0] + + g.write("static const int _font_"+name+"_size="+str(len(buf))+";\n") + g.write("static const unsigned char _font_"+name+"[]={\n") + for i in range(len(buf)): + g.write(str(ord(buf[i]))+",\n") + + g.write("};\n") + + g.write("#endif") - g.write("static const int _font_"+name+"_size="+str(len(buf))+";\n") - g.write("static const unsigned char _font_"+name+"[]={\n") - for i in range(len(buf)): - g.write(str(ord(buf[i]))+",\n") - g.write("};\n") - - g.write("#endif") - - if (env["tools"]!="no"): - import glob + import glob - dir = env.Dir('.').abspath - tlist = glob.glob(dir + "/translations/*.po") + dir = env.Dir('.').abspath + tlist = glob.glob(dir + "/translations/*.po") - print("translations: ",tlist) - env.Depends('#tools/editor/translations.h',tlist) - env.Command('#tools/editor/translations.h',tlist,make_translations_header) + print("translations: ",tlist) + env.Depends('#tools/editor/translations.h',tlist) + env.Command('#tools/editor/translations.h',tlist,make_translations_header) - flist = glob.glob(dir + "/editor_fonts/*.ttf") - flist.append( glob.glob(dir + "/editor_fonts/*.otf") ) + flist = glob.glob(dir + "/editor_fonts/*.ttf") + flist.append( glob.glob(dir + "/editor_fonts/*.otf") ) - print("fonts: ",flist) - env.Depends('#tools/editor/builtin_fonts.h',flist) - env.Command('#tools/editor/builtin_fonts.h',flist,make_fonts_header) + print("fonts: ",flist) + env.Depends('#tools/editor/builtin_fonts.h',flist) + env.Command('#tools/editor/builtin_fonts.h',flist,make_fonts_header) - SConscript('editor/SCsub'); - SConscript('collada/SCsub'); - SConscript('doc/SCsub') + SConscript('editor/SCsub'); + SConscript('collada/SCsub'); + SConscript('doc/SCsub') - lib = env.Library("tool",env.tool_sources) + lib = env.Library("tool",env.tool_sources) - env.Prepend(LIBS=[lib]) + env.Prepend(LIBS=[lib]) diff --git a/tools/editor/SCsub b/tools/editor/SCsub index f6cb16dc24f..41b4386e21c 100644 --- a/tools/editor/SCsub +++ b/tools/editor/SCsub @@ -5,51 +5,51 @@ Import('env') def make_doc_header(target,source,env): - src = source[0].srcnode().abspath - dst = target[0].srcnode().abspath - f = open(src,"rb") - g = open(dst,"wb") - buf = f.read() - decomp_size = len(buf) - import zlib - buf = zlib.compress(buf) + src = source[0].srcnode().abspath + dst = target[0].srcnode().abspath + f = open(src,"rb") + g = open(dst,"wb") + buf = f.read() + decomp_size = len(buf) + import zlib + buf = zlib.compress(buf) - g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") - g.write("#ifndef _DOC_DATA_RAW_H\n") - g.write("#define _DOC_DATA_RAW_H\n") - g.write("static const int _doc_data_compressed_size="+str(len(buf))+";\n") - g.write("static const int _doc_data_uncompressed_size="+str(decomp_size)+";\n") - g.write("static const unsigned char _doc_data_compressed[]={\n") - for i in range(len(buf)): - g.write(str(ord(buf[i]))+",\n") - g.write("};\n") - g.write("#endif") + g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") + g.write("#ifndef _DOC_DATA_RAW_H\n") + g.write("#define _DOC_DATA_RAW_H\n") + g.write("static const int _doc_data_compressed_size="+str(len(buf))+";\n") + g.write("static const int _doc_data_uncompressed_size="+str(decomp_size)+";\n") + g.write("static const unsigned char _doc_data_compressed[]={\n") + for i in range(len(buf)): + g.write(str(ord(buf[i]))+",\n") + g.write("};\n") + g.write("#endif") def make_certs_header(target,source,env): - src = source[0].srcnode().abspath - dst = target[0].srcnode().abspath - f = open(src,"rb") - g = open(dst,"wb") - buf = f.read() - decomp_size = len(buf) - import zlib - buf = zlib.compress(buf) + src = source[0].srcnode().abspath + dst = target[0].srcnode().abspath + f = open(src,"rb") + g = open(dst,"wb") + buf = f.read() + decomp_size = len(buf) + import zlib + buf = zlib.compress(buf) - g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") - g.write("#ifndef _CERTS_RAW_H\n") - g.write("#define _CERTS_RAW_H\n") - g.write("static const int _certs_compressed_size="+str(len(buf))+";\n") - g.write("static const int _certs_uncompressed_size="+str(decomp_size)+";\n") - g.write("static const unsigned char _certs_compressed[]={\n") - for i in range(len(buf)): - g.write(str(ord(buf[i]))+",\n") - g.write("};\n") - g.write("#endif") + g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") + g.write("#ifndef _CERTS_RAW_H\n") + g.write("#define _CERTS_RAW_H\n") + g.write("static const int _certs_compressed_size="+str(len(buf))+";\n") + g.write("static const int _certs_uncompressed_size="+str(decomp_size)+";\n") + g.write("static const unsigned char _certs_compressed[]={\n") + for i in range(len(buf)): + g.write(str(ord(buf[i]))+",\n") + g.write("};\n") + g.write("#endif") @@ -59,30 +59,30 @@ def make_certs_header(target,source,env): if (env["tools"]=="yes"): - reg_exporters_inc='#include "register_exporters.h"\n' - reg_exporters='void register_exporters() {\n' - for e in env.platform_exporters: - env.tool_sources.append("#platform/"+e+"/export/export.cpp") - reg_exporters+='\tregister_'+e+'_exporter();\n' - reg_exporters_inc+='#include "platform/'+e+'/export/export.h"\n' - reg_exporters+='}\n' - f = open("register_exporters.cpp","wb") - f.write(reg_exporters_inc) - f.write(reg_exporters) - f.close() + reg_exporters_inc='#include "register_exporters.h"\n' + reg_exporters='void register_exporters() {\n' + for e in env.platform_exporters: + env.tool_sources.append("#platform/"+e+"/export/export.cpp") + reg_exporters+='\tregister_'+e+'_exporter();\n' + reg_exporters_inc+='#include "platform/'+e+'/export/export.h"\n' + reg_exporters+='}\n' + f = open("register_exporters.cpp","wb") + f.write(reg_exporters_inc) + f.write(reg_exporters) + f.close() - env.Depends("#tools/editor/doc_data_compressed.h","#doc/base/classes.xml") - env.Command("#tools/editor/doc_data_compressed.h","#doc/base/classes.xml",make_doc_header) + env.Depends("#tools/editor/doc_data_compressed.h","#doc/base/classes.xml") + env.Command("#tools/editor/doc_data_compressed.h","#doc/base/classes.xml",make_doc_header) - env.Depends("#tools/editor/certs_compressed.h","#tools/certs/ca-certificates.crt") - env.Command("#tools/editor/certs_compressed.h","#tools/certs/ca-certificates.crt",make_certs_header) + env.Depends("#tools/editor/certs_compressed.h","#tools/certs/ca-certificates.crt") + env.Command("#tools/editor/certs_compressed.h","#tools/certs/ca-certificates.crt",make_certs_header) - #make_doc_header(env.File("#tools/editor/doc_data_raw.h").srcnode().abspath,env.File("#doc/base/classes.xml").srcnode().abspath,env) + #make_doc_header(env.File("#tools/editor/doc_data_raw.h").srcnode().abspath,env.File("#doc/base/classes.xml").srcnode().abspath,env) - env.add_source_files(env.tool_sources,"*.cpp") + env.add_source_files(env.tool_sources,"*.cpp") - Export('env') - SConscript('icons/SCsub'); - SConscript('plugins/SCsub'); - SConscript('fileserver/SCsub'); - SConscript('io_plugins/SCsub'); + Export('env') + SConscript('icons/SCsub'); + SConscript('plugins/SCsub'); + SConscript('fileserver/SCsub'); + SConscript('io_plugins/SCsub'); diff --git a/tools/editor/icons/SCsub b/tools/editor/icons/SCsub index 9e05d8f391f..4806a89df20 100644 --- a/tools/editor/icons/SCsub +++ b/tools/editor/icons/SCsub @@ -4,93 +4,93 @@ Import('env') def make_editor_icons_action(target, source, env): - import os - import cStringIO + import os + import cStringIO - dst = target[0].srcnode().abspath - pixmaps = source + dst = target[0].srcnode().abspath + pixmaps = source - s = cStringIO.StringIO() + s = cStringIO.StringIO() - s.write("#include \"editor_icons.h\"\n\n") - s.write("#include \"editor_scale.h\"\n\n") - s.write("#include \"scene/resources/theme.h\"\n\n") + s.write("#include \"editor_icons.h\"\n\n") + s.write("#include \"editor_scale.h\"\n\n") + s.write("#include \"scene/resources/theme.h\"\n\n") - hidpi_list=[] + hidpi_list=[] - for x in pixmaps: + for x in pixmaps: - x=str(x) - var_str=os.path.basename(x)[:-4]+"_png"; - #print(var_str) + x=str(x) + var_str=os.path.basename(x)[:-4]+"_png"; + #print(var_str) - s.write("static const unsigned char "+ var_str +"[]={\n"); + s.write("static const unsigned char "+ var_str +"[]={\n"); - pngf=open(x,"rb"); + pngf=open(x,"rb"); - b=pngf.read(1); - while(len(b)==1): - s.write(hex(ord(b))) - b=pngf.read(1); - if (len(b)==1): - s.write(",") + b=pngf.read(1); + while(len(b)==1): + s.write(hex(ord(b))) + b=pngf.read(1); + if (len(b)==1): + s.write(",") - s.write("\n};\n\n"); + s.write("\n};\n\n"); - pngf.close(); - var_str=os.path.basename(x)[:-4]+"_hidpi_png"; - try: + pngf.close(); + var_str=os.path.basename(x)[:-4]+"_hidpi_png"; + try: - pngf = open(os.path.dirname(x)+"/2x/"+os.path.basename(x), "rb") + pngf = open(os.path.dirname(x)+"/2x/"+os.path.basename(x), "rb") - s.write("static const unsigned char "+ var_str +"[]={\n"); + s.write("static const unsigned char "+ var_str +"[]={\n"); - b=pngf.read(1); - while(len(b)==1): - s.write(hex(ord(b))) - b=pngf.read(1); - if (len(b)==1): - s.write(",") + b=pngf.read(1); + while(len(b)==1): + s.write(hex(ord(b))) + b=pngf.read(1); + if (len(b)==1): + s.write(",") - s.write("\n};\n\n\n"); - hidpi_list.append(x) + s.write("\n};\n\n\n"); + hidpi_list.append(x) - except: - s.write("static const unsigned char* "+ var_str +"=NULL;\n\n\n"); + except: + s.write("static const unsigned char* "+ var_str +"=NULL;\n\n\n"); - s.write("static Ref make_icon(const uint8_t* p_png,const uint8_t* p_hidpi_png) {\n") - s.write("\tRef texture( memnew( ImageTexture ) );\n") - s.write("\tbool use_hidpi_image=(editor_get_scale()>1.0&&p_hidpi_png);\n") - s.write("\tImage img(use_hidpi_image?p_hidpi_png:p_png);\n") - s.write("\tif (editor_get_scale()>1.0 && !p_hidpi_png) { img.convert(Image::FORMAT_RGBA); img.expand_x2_hq2x(); use_hidpi_image=true;}\n") - s.write("\timg.resize(img.get_width()*EDSCALE/(use_hidpi_image?2:1),img.get_height()*EDSCALE/(use_hidpi_image?2:1));\n") - s.write("\ttexture->create_from_image( img,ImageTexture::FLAG_FILTER );\n") - s.write("\treturn texture;\n") - s.write("}\n\n") + s.write("static Ref make_icon(const uint8_t* p_png,const uint8_t* p_hidpi_png) {\n") + s.write("\tRef texture( memnew( ImageTexture ) );\n") + s.write("\tbool use_hidpi_image=(editor_get_scale()>1.0&&p_hidpi_png);\n") + s.write("\tImage img(use_hidpi_image?p_hidpi_png:p_png);\n") + s.write("\tif (editor_get_scale()>1.0 && !p_hidpi_png) { img.convert(Image::FORMAT_RGBA); img.expand_x2_hq2x(); use_hidpi_image=true;}\n") + s.write("\timg.resize(img.get_width()*EDSCALE/(use_hidpi_image?2:1),img.get_height()*EDSCALE/(use_hidpi_image?2:1));\n") + s.write("\ttexture->create_from_image( img,ImageTexture::FLAG_FILTER );\n") + s.write("\treturn texture;\n") + s.write("}\n\n") - s.write("void editor_register_icons(Ref p_theme) {\n\n") + s.write("void editor_register_icons(Ref p_theme) {\n\n") - for x in pixmaps: + for x in pixmaps: - x=os.path.basename(str(x)) - type=x[5:-4].title().replace("_",""); - var_str=x[:-4]+"_png"; - var_str_hidpi=x[:-4]+"_hidpi_png"; - s.write("\tp_theme->set_icon(\""+type+"\",\"EditorIcons\",make_icon("+var_str+","+var_str_hidpi+"));\n"); + x=os.path.basename(str(x)) + type=x[5:-4].title().replace("_",""); + var_str=x[:-4]+"_png"; + var_str_hidpi=x[:-4]+"_hidpi_png"; + s.write("\tp_theme->set_icon(\""+type+"\",\"EditorIcons\",make_icon("+var_str+","+var_str_hidpi+"));\n"); - s.write("\n\n}\n\n"); + s.write("\n\n}\n\n"); - f = open(dst,"wb") - f.write(s.getvalue()) - f.close() - s.close() + f = open(dst,"wb") + f.write(s.getvalue()) + f.close() + s.close() make_editor_icons_builder = Builder(action=make_editor_icons_action, - suffix = '.cpp', - src_suffix = '.png') + suffix = '.cpp', + src_suffix = '.png') env['BUILDERS']['MakeEditorIconsBuilder']=make_editor_icons_builder env.Alias('editor_icons',[env.MakeEditorIconsBuilder('#tools/editor/editor_icons.cpp',Glob("*.png"))]) diff --git a/tools/scripts/addheader.py b/tools/scripts/addheader.py index d040d8b5d67..fac35372de7 100644 --- a/tools/scripts/addheader.py +++ b/tools/scripts/addheader.py @@ -34,39 +34,39 @@ f = open("files","rb") fname = f.readline() while (fname!=""): - fr = open(fname.strip(),"rb") - l = fr.readline() - bc=False - fsingle = fname.strip() + fr = open(fname.strip(),"rb") + l = fr.readline() + bc=False + fsingle = fname.strip() - if (fsingle.find("/")!=-1): - fsingle=fsingle[fsingle.rfind("/")+1:] - rep_fl="$filename" - rep_fi=fsingle - len_fl=len(rep_fl) - len_fi=len(rep_fi) - if (len_fi0): - cstr+=", " - cstr+=c + c=font_chars[i] + if (i>0): + cstr+=", " + cstr+=c cstr+=("};") -print(cstr) \ No newline at end of file +print(cstr) diff --git a/tools/scripts/make_glwrapper.py b/tools/scripts/make_glwrapper.py index b4c582f1eb4..8da4e28c89c 100644 --- a/tools/scripts/make_glwrapper.py +++ b/tools/scripts/make_glwrapper.py @@ -2,8 +2,8 @@ import sys if (len(sys.argv)<2): - print("usage: make_glwrapper.py ") - sys.exit(255) + print("usage: make_glwrapper.py ") + sys.exit(255) functions=[] @@ -17,16 +17,16 @@ READ_CONSTANTS=2 read_what=READ_TYPES for x in (range(len(sys.argv)-1)): - f=open(sys.argv[x+1],"r") + f=open(sys.argv[x+1],"r") - while(True): + while(True): - line=f.readline() - if (line==""): - break + line=f.readline() + if (line==""): + break - line=line.replace("\n","").strip() - """ + line=line.replace("\n","").strip() + """ if (line.find("[types]")!=-1): read_what=READ_TYPES continue @@ -38,53 +38,53 @@ for x in (range(len(sys.argv)-1)): continue """ - if (line.find("#define")!=-1): - if (line.find("0x")==-1 and line.find("GL_VERSION")==-1): - continue - constants.append(line) - elif (line.find("typedef")!=-1): - if (line.find("(")!=-1 or line.find(")")!=-1 or line.find("ARB")!=-1 or line.find("EXT")!=-1 or line.find("GL")==-1): - continue - types.append(line) - elif (line.find("APIENTRY")!=-1 and line.find("GLAPI")!=-1): + if (line.find("#define")!=-1): + if (line.find("0x")==-1 and line.find("GL_VERSION")==-1): + continue + constants.append(line) + elif (line.find("typedef")!=-1): + if (line.find("(")!=-1 or line.find(")")!=-1 or line.find("ARB")!=-1 or line.find("EXT")!=-1 or line.find("GL")==-1): + continue + types.append(line) + elif (line.find("APIENTRY")!=-1 and line.find("GLAPI")!=-1): - if (line.find("ARB")!=-1 or line.find("EXT")!=-1 or line.find("NV")!=-1): - continue + if (line.find("ARB")!=-1 or line.find("EXT")!=-1 or line.find("NV")!=-1): + continue - line=line.replace("APIENTRY","") - line=line.replace("GLAPI","") + line=line.replace("APIENTRY","") + line=line.replace("GLAPI","") - glpos=line.find(" gl") - if (glpos==-1): + glpos=line.find(" gl") + if (glpos==-1): - glpos=line.find("\tgl") - if (glpos==-1): - continue + glpos=line.find("\tgl") + if (glpos==-1): + continue - ret=line[:glpos].strip(); + ret=line[:glpos].strip(); - line=line[glpos:].strip() - namepos=line.find("(") + line=line[glpos:].strip() + namepos=line.find("(") - if (namepos==-1): - continue + if (namepos==-1): + continue - name=line[:namepos].strip() - line=line[namepos:] + name=line[:namepos].strip() + line=line[namepos:] - argpos=line.rfind(")") - if (argpos==-1): - continue + argpos=line.rfind(")") + if (argpos==-1): + continue - args=line[1:argpos] + args=line[1:argpos] - funcdata={} - funcdata["ret"]=ret - funcdata["name"]=name - funcdata["args"]=args + funcdata={} + funcdata["ret"]=ret + funcdata["name"]=name + funcdata["args"]=args - functions.append(funcdata) - print(funcdata) + functions.append(funcdata) + print(funcdata) @@ -137,18 +137,18 @@ f.write("#else\n"); f.write("#define GLWRP_APIENTRY \n") f.write("#endif\n\n"); for x in types: - f.write(x+"\n") + f.write(x+"\n") f.write("\n\n") for x in constants: - f.write(x+"\n") + f.write(x+"\n") f.write("\n\n") for x in functions: - f.write("extern "+x["ret"]+" GLWRP_APIENTRY (*__wrapper_"+x["name"]+")("+x["args"]+");\n") - f.write("#define "+x["name"]+" __wrapper_"+x["name"]+"\n") + f.write("extern "+x["ret"]+" GLWRP_APIENTRY (*__wrapper_"+x["name"]+")("+x["args"]+");\n") + f.write("#define "+x["name"]+" __wrapper_"+x["name"]+"\n") f.write("\n\n") f.write("typedef void (*GLWrapperFuncPtr)(void);\n\n"); @@ -165,14 +165,14 @@ f.write("#include \"glwrapper.h\"\n") f.write("\n\n") for x in functions: - f.write(x["ret"]+" GLWRP_APIENTRY (*__wrapper_"+x["name"]+")("+x["args"]+")=NULL;\n") + f.write(x["ret"]+" GLWRP_APIENTRY (*__wrapper_"+x["name"]+")("+x["args"]+")=NULL;\n") f.write("\n\n") f.write("void glWrapperInit( GLWrapperFuncPtr (*wrapperFunc)(const char*) ) {\n") f.write("\n") for x in functions: - f.write("\t__wrapper_"+x["name"]+"=("+x["ret"]+" GLWRP_APIENTRY (*)("+x["args"]+"))wrapperFunc(\""+x["name"]+"\");\n") + f.write("\t__wrapper_"+x["name"]+"=("+x["ret"]+" GLWRP_APIENTRY (*)("+x["args"]+"))wrapperFunc(\""+x["name"]+"\");\n") f.write("\n\n") f.write("}\n") diff --git a/tools/scripts/makeargs.py b/tools/scripts/makeargs.py index 8c5539c5fec..7db9da82a89 100644 --- a/tools/scripts/makeargs.py +++ b/tools/scripts/makeargs.py @@ -67,20 +67,20 @@ text=""" for i in range(1,8): - tp="" - p="" - t="" - for j in range(i): - if (j>0): - tp+=", " - p+=", " - t+=", " - tp +=("m_arg"+str(j+1)+" p"+str(j+1)) - p+=("p"+str(j+1)) - t+=("m_arg"+str(j+1)) + tp="" + p="" + t="" + for j in range(i): + if (j>0): + tp+=", " + p+=", " + t+=", " + tp +=("m_arg"+str(j+1)+" p"+str(j+1)) + p+=("p"+str(j+1)) + t+=("m_arg"+str(j+1)) - t = text.replace("$argtp",tp).replace("$argp",p).replace("$argt",t).replace("$num",str(i)) - print(t) + t = text.replace("$argtp",tp).replace("$argp",p).replace("$argt",t).replace("$num",str(i)) + print(t) diff --git a/tools/scripts/memsort.py b/tools/scripts/memsort.py index d2e4fe02264..15b4e7a84b1 100644 --- a/tools/scripts/memsort.py +++ b/tools/scripts/memsort.py @@ -4,7 +4,7 @@ import sys arg="memdump.txt" if (len(sys.argv)>1): - arg=sys.argv[1] + arg=sys.argv[1] f = open(arg,"rb") @@ -18,18 +18,18 @@ cnt={} while(l!=""): - s=l.split("-") - amount = int(s[1]) - what=s[2] - if (what in sum): - sum[what]+=amount - cnt[what]+=1 - else: - sum[what]=amount - cnt[what]=1 + s=l.split("-") + amount = int(s[1]) + what=s[2] + if (what in sum): + sum[what]+=amount + cnt[what]+=1 + else: + sum[what]=amount + cnt[what]=1 - l=f.readline() + l=f.readline() for x in sum: - print(x.strip()+"("+str(cnt[x])+"):\n: "+str(sum[x])) + print(x.strip()+"("+str(cnt[x])+"):\n: "+str(sum[x])) diff --git a/tools/scripts/svgs_2_pngs.py b/tools/scripts/svgs_2_pngs.py index 879926ab427..4de69a14b41 100644 --- a/tools/scripts/svgs_2_pngs.py +++ b/tools/scripts/svgs_2_pngs.py @@ -130,8 +130,8 @@ special_icons = { } theme_icons = { - 'icon_close': dict(output_names=['close', 'close_hl']), - 'tab_menu': dict(output_names=['tab_menu_hl']) + 'icon_close': dict(output_names=['close', 'close_hl']), + 'tab_menu': dict(output_names=['tab_menu_hl']) } export_icons() diff --git a/tools/translations/extract.py b/tools/translations/extract.py index 61b07b57996..f2ee1ecb198 100755 --- a/tools/translations/extract.py +++ b/tools/translations/extract.py @@ -10,27 +10,27 @@ import sys line_nb = False for arg in sys.argv[1:]: - if (arg == "--with-line-nb"): - print("Enabling line numbers in the context locations.") - line_nb = True - else: - os.sys.exit("Non supported argument '" + arg + "'. Aborting.") + if (arg == "--with-line-nb"): + print("Enabling line numbers in the context locations.") + line_nb = True + else: + os.sys.exit("Non supported argument '" + arg + "'. Aborting.") if (not os.path.exists("tools")): - os.sys.exit("ERROR: This script should be started from the root of the git repo.") + os.sys.exit("ERROR: This script should be started from the root of the git repo.") matches = [] for root, dirnames, filenames in os.walk('.'): - for filename in fnmatch.filter(filenames, '*.cpp'): - if (filename.find("collada") != -1): - continue - matches.append(os.path.join(root, filename)) - for filename in fnmatch.filter(filenames, '*.h'): - if (filename.find("collada") != -1): - continue - matches.append(os.path.join(root, filename)) + for filename in fnmatch.filter(filenames, '*.cpp'): + if (filename.find("collada") != -1): + continue + matches.append(os.path.join(root, filename)) + for filename in fnmatch.filter(filenames, '*.h'): + if (filename.find("collada") != -1): + continue + matches.append(os.path.join(root, filename)) matches.sort() @@ -55,51 +55,51 @@ print("Updating the tools.pot template...") for fname in matches: - f = open(fname, "rb") + f = open(fname, "rb") - l = f.readline() - lc = 1 - while (l): + l = f.readline() + lc = 1 + while (l): - patterns = ['RTR(\"', 'TTR(\"'] - idx = 0 - pos = 0 - while (pos >= 0): - pos = l.find(patterns[idx], pos) - if (pos == -1): - if (idx < len(patterns) - 1): - idx += 1 - pos = 0 - continue - pos += 5 + patterns = ['RTR(\"', 'TTR(\"'] + idx = 0 + pos = 0 + while (pos >= 0): + pos = l.find(patterns[idx], pos) + if (pos == -1): + if (idx < len(patterns) - 1): + idx += 1 + pos = 0 + continue + pos += 5 - msg = "" - while (pos < len(l) and (l[pos] != '"' or l[pos - 1] == '\\')): - msg += l[pos] - pos += 1 + msg = "" + while (pos < len(l) and (l[pos] != '"' or l[pos - 1] == '\\')): + msg += l[pos] + pos += 1 - location = os.path.relpath(fname).replace('\\','/') - if (line_nb): - location += ":" + str(lc) + location = os.path.relpath(fname).replace('\\','/') + if (line_nb): + location += ":" + str(lc) - if (not msg in unique_str): - main_po += "\n#: " + location + "\n" - main_po += 'msgid "' + msg + '"\n' - main_po += 'msgstr ""\n' - unique_str.append(msg) - unique_loc[msg] = [location] - elif (not location in unique_loc[msg]): - # Add additional location to previous occurence too - msg_pos = main_po.find('\nmsgid "' + msg + '"') - if (msg_pos == -1): - print("Someone apparently thought writing Python was as easy as GDScript. Ping Akien.") - main_po = main_po[:msg_pos] + ' ' + location + main_po[msg_pos:] - unique_loc[msg].append(location) + if (not msg in unique_str): + main_po += "\n#: " + location + "\n" + main_po += 'msgid "' + msg + '"\n' + main_po += 'msgstr ""\n' + unique_str.append(msg) + unique_loc[msg] = [location] + elif (not location in unique_loc[msg]): + # Add additional location to previous occurence too + msg_pos = main_po.find('\nmsgid "' + msg + '"') + if (msg_pos == -1): + print("Someone apparently thought writing Python was as easy as GDScript. Ping Akien.") + main_po = main_po[:msg_pos] + ' ' + location + main_po[msg_pos:] + unique_loc[msg].append(location) - l = f.readline() - lc += 1 + l = f.readline() + lc += 1 - f.close() + f.close() f = open("tools.pot", "wb") @@ -107,15 +107,15 @@ f.write(main_po) f.close() if (os.name == "posix"): - print("Wrapping template at 79 characters for compatibility with Weblate.") - os.system("msgmerge -w79 tools.pot tools.pot > tools.pot.wrap") - shutil.move("tools.pot.wrap", "tools.pot") + print("Wrapping template at 79 characters for compatibility with Weblate.") + os.system("msgmerge -w79 tools.pot tools.pot > tools.pot.wrap") + shutil.move("tools.pot.wrap", "tools.pot") shutil.move("tools.pot", "tools/translations/tools.pot") # TODO: Make that in a portable way, if we care; if not, kudos to Unix users if (os.name == "posix"): - added = subprocess.check_output("git diff tools/translations/tools.pot | grep \+msgid | wc -l", shell = True) - removed = subprocess.check_output("git diff tools/translations/tools.pot | grep \\\-msgid | wc -l", shell = True) - print("\n# Template changes compared to the staged status:") - print("# Additions: %s msgids.\n# Deletions: %s msgids." % (int(added), int(removed))) + added = subprocess.check_output("git diff tools/translations/tools.pot | grep \+msgid | wc -l", shell = True) + removed = subprocess.check_output("git diff tools/translations/tools.pot | grep \\\-msgid | wc -l", shell = True) + print("\n# Template changes compared to the staged status:") + print("# Additions: %s msgids.\n# Deletions: %s msgids." % (int(added), int(removed))) From d4c17700aa2f36f69978beda04e42ff2749de270 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Sun, 30 Oct 2016 18:57:40 +0100 Subject: [PATCH 042/223] style: Fix PEP8 whitespace issues in Python files Done with `autopep8 --select=E2,W2`, fixes: - E201 - Remove extraneous whitespace. - E202 - Remove extraneous whitespace. - E203 - Remove extraneous whitespace. - E211 - Remove extraneous whitespace. - E221 - Fix extraneous whitespace around keywords. - E222 - Fix extraneous whitespace around keywords. - E223 - Fix extraneous whitespace around keywords. - E224 - Remove extraneous whitespace around operator. - E225 - Fix missing whitespace around operator. - E226 - Fix missing whitespace around operator. - E227 - Fix missing whitespace around operator. - E228 - Fix missing whitespace around operator. - E231 - Add missing whitespace. - E231 - Fix various deprecated code (via lib2to3). - E241 - Fix extraneous whitespace around keywords. - E242 - Remove extraneous whitespace around operator. - E251 - Remove whitespace around parameter '=' sign. - E261 - Fix spacing after comment hash. - E262 - Fix spacing after comment hash. - E265 - Format block comments. - E271 - Fix extraneous whitespace around keywords. - E272 - Fix extraneous whitespace around keywords. - E273 - Fix extraneous whitespace around keywords. - E274 - Fix extraneous whitespace around keywords. - W291 - Remove trailing whitespace. - W293 - Remove trailing whitespace. --- SConstruct | 302 +++--- bin/tests/SCsub | 8 +- core/SCsub | 50 +- core/bind/SCsub | 2 +- core/io/SCsub | 6 +- core/make_binders.py | 140 +-- core/math/SCsub | 2 +- core/os/SCsub | 2 +- doc/tools/doc_merge.py | 186 ++-- doc/tools/doc_status.py | 46 +- doc/tools/makedoku.py | 304 +++--- doc/tools/makehtml.py | 688 +++++++------- doc/tools/makemd.py | 18 +- doc/tools/makerst.py | 130 +-- drivers/SCsub | 10 +- drivers/gl_context/SCsub | 10 +- drivers/png/SCsub | 8 +- drivers/rtaudio/SCsub | 2 +- drivers/unix/SCsub | 14 +- drivers/zlib/SCsub | 2 +- main/SCsub | 6 +- methods.py | 948 +++++++++---------- modules/SCsub | 10 +- modules/enet/SCsub | 4 +- modules/etc1/SCsub | 2 +- modules/freetype/SCsub | 14 +- modules/jpg/SCsub | 2 +- modules/mpc/SCsub | 4 +- modules/ogg/SCsub | 4 +- modules/openssl/SCsub | 14 +- modules/opus/SCsub | 12 +- modules/pvr/SCsub | 2 +- modules/squish/SCsub | 4 +- modules/theora/SCsub | 14 +- modules/vorbis/SCsub | 8 +- modules/webm/SCsub | 16 +- modules/webm/libvpx/SCsub | 4 +- modules/webm/libvpx/yasm_osx_fat.py | 4 +- modules/webp/SCsub | 4 +- platform/android/SCsub | 84 +- platform/android/detect.py | 212 ++--- platform/bb10/detect.py | 24 +- platform/haiku/SCsub | 2 +- platform/haiku/detect.py | 40 +- platform/iphone/SCsub | 6 +- platform/iphone/detect.py | 50 +- platform/javascript/SCsub | 28 +- platform/javascript/detect.py | 50 +- platform/osx/SCsub | 2 +- platform/osx/detect.py | 64 +- platform/server/SCsub | 4 +- platform/server/detect.py | 48 +- platform/windows/SCsub | 10 +- platform/windows/detect.py | 178 ++-- platform/winrt/detect.py | 56 +- platform/x11/SCsub | 4 +- platform/x11/detect.py | 110 +-- scene/2d/SCsub | 2 +- scene/3d/SCsub | 4 +- scene/SCsub | 6 +- scene/animation/SCsub | 2 +- scene/audio/SCsub | 2 +- scene/gui/SCsub | 2 +- scene/io/SCsub | 2 +- scene/main/SCsub | 2 +- scene/resources/SCsub | 4 +- scene/resources/default_theme/SCsub | 2 +- scene/resources/default_theme/make_header.py | 42 +- servers/SCsub | 6 +- servers/audio/SCsub | 2 +- servers/physics/SCsub | 2 +- servers/physics/joints/SCsub | 2 +- servers/physics_2d/SCsub | 2 +- servers/spatial_sound/SCsub | 2 +- servers/spatial_sound_2d/SCsub | 2 +- servers/visual/SCsub | 2 +- tools/SCsub | 62 +- tools/collada/SCsub | 2 +- tools/doc/SCsub | 2 +- tools/editor/SCsub | 52 +- tools/editor/fileserver/SCsub | 2 +- tools/editor/icons/SCsub | 56 +- tools/editor/io_plugins/SCsub | 2 +- tools/editor/plugins/SCsub | 2 +- tools/scripts/addheader.py | 56 +- tools/scripts/file-hex-array.py | 14 +- tools/scripts/make_bmfhdr.py | 58 +- tools/scripts/make_glwrapper.py | 100 +- tools/scripts/makeargs.py | 26 +- tools/scripts/memsort.py | 30 +- tools/scripts/svgs_2_pngs.py | 38 +- tools/translations/extract.py | 6 +- version.py | 10 +- 93 files changed, 2293 insertions(+), 2293 deletions(-) diff --git a/SConstruct b/SConstruct index 32c24fed091..ee3a54b5ab5 100644 --- a/SConstruct +++ b/SConstruct @@ -1,6 +1,6 @@ #!/usr/bin/env python -EnsureSConsVersion(0,14); +EnsureSConsVersion(0, 14); import string @@ -14,85 +14,85 @@ methods.update_version() # scan possible build platforms -platform_list = [] # list of platforms -platform_opts = {} # options for each platform -platform_flags = {} # flags for each platform +platform_list = [] # list of platforms +platform_opts = {} # options for each platform +platform_flags = {} # flags for each platform -active_platforms=[] -active_platform_ids=[] -platform_exporters=[] -global_defaults=[] +active_platforms = [] +active_platform_ids = [] +platform_exporters = [] +global_defaults = [] for x in glob.glob("platform/*"): - if (not os.path.isdir(x) or not os.path.exists(x+"/detect.py")): + if (not os.path.isdir(x) or not os.path.exists(x + "/detect.py")): continue - tmppath="./"+x + tmppath = "./" + x sys.path.append(tmppath) import detect - if (os.path.exists(x+"/export/export.cpp")): + if (os.path.exists(x + "/export/export.cpp")): platform_exporters.append(x[9:]) - if (os.path.exists(x+"/globals/global_defaults.cpp")): + if (os.path.exists(x + "/globals/global_defaults.cpp")): global_defaults.append(x[9:]) if (detect.is_active()): - active_platforms.append( detect.get_name() ) + active_platforms.append(detect.get_name()) active_platform_ids.append(x); if (detect.can_build()): - x=x.replace("platform/","") # rest of world - x=x.replace("platform\\","") # win32 - platform_list+=[x] - platform_opts[x]=detect.get_opts() - platform_flags[x]=detect.get_flags() + x = x.replace("platform/", "") # rest of world + x = x.replace("platform\\", "") # win32 + platform_list += [x] + platform_opts[x] = detect.get_opts() + platform_flags[x] = detect.get_flags() sys.path.remove(tmppath) sys.modules.pop('detect') -module_list=methods.detect_modules() +module_list = methods.detect_modules() -#print "Detected Platforms: "+str(platform_list) +# print "Detected Platforms: "+str(platform_list) -methods.save_active_platforms(active_platforms,active_platform_ids) +methods.save_active_platforms(active_platforms, active_platform_ids) -custom_tools=['default'] +custom_tools = ['default'] platform_arg = ARGUMENTS.get("platform", ARGUMENTS.get("p", False)) -if (os.name=="posix"): +if (os.name == "posix"): pass -elif (os.name=="nt"): - if ( os.getenv("VCINSTALLDIR")==None or platform_arg=="android"): - custom_tools=['mingw'] +elif (os.name == "nt"): + if (os.getenv("VCINSTALLDIR") == None or platform_arg == "android"): + custom_tools = ['mingw'] -env_base=Environment(tools=custom_tools); +env_base = Environment(tools=custom_tools); if 'TERM' in os.environ: env_base['ENV']['TERM'] = os.environ['TERM'] env_base.AppendENVPath('PATH', os.getenv('PATH')) env_base.AppendENVPath('PKG_CONFIG_PATH', os.getenv('PKG_CONFIG_PATH')) -env_base.global_defaults=global_defaults -env_base.android_maven_repos=[] -env_base.android_dependencies=[] -env_base.android_java_dirs=[] -env_base.android_res_dirs=[] -env_base.android_aidl_dirs=[] -env_base.android_jni_dirs=[] -env_base.android_default_config=[] -env_base.android_manifest_chunk="" -env_base.android_permission_chunk="" -env_base.android_appattributes_chunk="" -env_base.disabled_modules=[] -env_base.use_ptrcall=False -env_base.split_drivers=False +env_base.global_defaults = global_defaults +env_base.android_maven_repos = [] +env_base.android_dependencies = [] +env_base.android_java_dirs = [] +env_base.android_res_dirs = [] +env_base.android_aidl_dirs = [] +env_base.android_jni_dirs = [] +env_base.android_default_config = [] +env_base.android_manifest_chunk = "" +env_base.android_permission_chunk = "" +env_base.android_appattributes_chunk = "" +env_base.disabled_modules = [] +env_base.use_ptrcall = False +env_base.split_drivers = False -env_base.__class__.android_add_maven_repository=methods.android_add_maven_repository -env_base.__class__.android_add_dependency=methods.android_add_dependency -env_base.__class__.android_add_java_dir=methods.android_add_java_dir -env_base.__class__.android_add_res_dir=methods.android_add_res_dir -env_base.__class__.android_add_aidl_dir=methods.android_add_aidl_dir -env_base.__class__.android_add_jni_dir=methods.android_add_jni_dir -env_base.__class__.android_add_default_config=methods.android_add_default_config +env_base.__class__.android_add_maven_repository = methods.android_add_maven_repository +env_base.__class__.android_add_dependency = methods.android_add_dependency +env_base.__class__.android_add_java_dir = methods.android_add_java_dir +env_base.__class__.android_add_res_dir = methods.android_add_res_dir +env_base.__class__.android_add_aidl_dir = methods.android_add_aidl_dir +env_base.__class__.android_add_jni_dir = methods.android_add_jni_dir +env_base.__class__.android_add_default_config = methods.android_add_default_config env_base.__class__.android_add_to_manifest = methods.android_add_to_manifest env_base.__class__.android_add_to_permissions = methods.android_add_to_permissions env_base.__class__.android_add_to_attributes = methods.android_add_to_attributes @@ -102,8 +102,8 @@ env_base.__class__.add_source_files = methods.add_source_files env_base.__class__.use_windows_spawn_fix = methods.use_windows_spawn_fix env_base.__class__.split_lib = methods.split_lib -env_base["x86_libtheora_opt_gcc"]=False -env_base["x86_libtheora_opt_vc"]=False +env_base["x86_libtheora_opt_gcc"] = False +env_base["x86_libtheora_opt_vc"] = False customs = ['custom.py'] @@ -112,43 +112,43 @@ if profile: import os.path if os.path.isfile(profile): customs.append(profile) - elif os.path.isfile(profile+".py"): - customs.append(profile+".py") + elif os.path.isfile(profile + ".py"): + customs.append(profile + ".py") -opts=Variables(customs, ARGUMENTS) +opts = Variables(customs, ARGUMENTS) opts.Add('target', 'Compile Target (debug/release_debug/release).', "debug") opts.Add('arch', 'Platform dependent architecture (arm/arm64/x86/x64/mips/etc)', "") opts.Add('bits', 'Compile Target Bits (default/32/64/fat).', "default") -opts.Add('platform','Platform: '+str(platform_list)+'.',"") -opts.Add('p','Platform (same as platform=).',"") -opts.Add('tools','Build Tools (Including Editor): (yes/no)','yes') -opts.Add('gdscript','Build GDSCript support: (yes/no)','yes') -opts.Add('libogg','Ogg library for ogg container support (system/builtin)','builtin') -opts.Add('libvorbis','Ogg Vorbis library for vorbis support (system/builtin)','builtin') -opts.Add('libtheora','Theora library for theora module (system/builtin)','builtin') -opts.Add('libvpx','VPX library for webm module (system/builtin)','builtin') -opts.Add('opus','Opus and opusfile library for Opus format support: (system/builtin)','builtin') -opts.Add('minizip','Build Minizip Archive Support: (yes/no)','yes') -opts.Add('squish','Squish library for BC Texture Compression in editor (system/builtin)','builtin') -opts.Add('freetype','Freetype library for TTF support via freetype module (system/builtin)','builtin') -opts.Add('xml','XML Save/Load support (yes/no)','yes') -opts.Add('libpng','libpng library for image loader support (system/builtin)','builtin') -opts.Add('libwebp','libwebp library for webp module (system/builtin)','builtin') -opts.Add('openssl','OpenSSL library for openssl module (system/builtin)','builtin') -opts.Add('libmpcdec','libmpcdec library for mpc module (system/builtin)','builtin') -opts.Add('enet','ENet library (system/builtin)','builtin') -opts.Add('glew','GLEW library for the gl_context (system/builtin)','builtin') -opts.Add('xaudio2','XAudio2 audio driver (yes/no)','no') +opts.Add('platform', 'Platform: ' + str(platform_list) + '.', "") +opts.Add('p', 'Platform (same as platform=).', "") +opts.Add('tools', 'Build Tools (Including Editor): (yes/no)', 'yes') +opts.Add('gdscript', 'Build GDSCript support: (yes/no)', 'yes') +opts.Add('libogg', 'Ogg library for ogg container support (system/builtin)', 'builtin') +opts.Add('libvorbis', 'Ogg Vorbis library for vorbis support (system/builtin)', 'builtin') +opts.Add('libtheora', 'Theora library for theora module (system/builtin)', 'builtin') +opts.Add('libvpx', 'VPX library for webm module (system/builtin)', 'builtin') +opts.Add('opus', 'Opus and opusfile library for Opus format support: (system/builtin)', 'builtin') +opts.Add('minizip', 'Build Minizip Archive Support: (yes/no)', 'yes') +opts.Add('squish', 'Squish library for BC Texture Compression in editor (system/builtin)', 'builtin') +opts.Add('freetype', 'Freetype library for TTF support via freetype module (system/builtin)', 'builtin') +opts.Add('xml', 'XML Save/Load support (yes/no)', 'yes') +opts.Add('libpng', 'libpng library for image loader support (system/builtin)', 'builtin') +opts.Add('libwebp', 'libwebp library for webp module (system/builtin)', 'builtin') +opts.Add('openssl', 'OpenSSL library for openssl module (system/builtin)', 'builtin') +opts.Add('libmpcdec', 'libmpcdec library for mpc module (system/builtin)', 'builtin') +opts.Add('enet', 'ENet library (system/builtin)', 'builtin') +opts.Add('glew', 'GLEW library for the gl_context (system/builtin)', 'builtin') +opts.Add('xaudio2', 'XAudio2 audio driver (yes/no)', 'no') opts.Add("CXX", "C++ Compiler") opts.Add("CC", "C Compiler") opts.Add("CCFLAGS", "Custom flags for the C/C++ compiler"); opts.Add("CFLAGS", "Custom flags for the C compiler"); opts.Add("LINKFLAGS", "Custom flags for the linker"); -opts.Add('unix_global_settings_path', 'unix-specific path to system-wide settings. Currently only used by templates.','') +opts.Add('unix_global_settings_path', 'unix-specific path to system-wide settings. Currently only used by templates.', '') opts.Add('disable_3d', 'Disable 3D nodes for smaller executable (yes/no)', "no") opts.Add('disable_advanced_gui', 'Disable advance 3D gui nodes and behaviors (yes/no)', "no") opts.Add('verbose', 'Enable verbose output for the compilation (yes/no)', 'yes') -opts.Add('deprecated','Enable deprecated features (yes/no)','yes') +opts.Add('deprecated', 'Enable deprecated features (yes/no)', 'yes') opts.Add('extra_suffix', 'Custom extra suffix added to the base filename of all generated binary files.', '') opts.Add('vsproj', 'Generate Visual Studio Project. (yes/no)', 'no') @@ -157,20 +157,20 @@ opts.Add('vsproj', 'Generate Visual Studio Project. (yes/no)', 'no') for k in platform_opts.keys(): opt_list = platform_opts[k] for o in opt_list: - opts.Add(o[0],o[1],o[2]) + opts.Add(o[0], o[1], o[2]) for x in module_list: - opts.Add('module_'+x+'_enabled', "Enable module '"+x+"' (yes/no)", "yes") + opts.Add('module_' + x + '_enabled', "Enable module '" + x + "' (yes/no)", "yes") -opts.Update(env_base) # update environment -Help(opts.GenerateHelpText(env_base)) # generate help +opts.Update(env_base) # update environment +Help(opts.GenerateHelpText(env_base)) # generate help # add default include paths -env_base.Append(CPPPATH=['#core','#core/math','#tools','#drivers','#']) +env_base.Append(CPPPATH=['#core', '#core/math', '#tools', '#drivers', '#']) # configure ENV for platform -env_base.platform_exporters=platform_exporters +env_base.platform_exporters = platform_exporters """ sys.path.append("./platform/"+env_base["platform"]) @@ -180,58 +180,58 @@ sys.path.remove("./platform/"+env_base["platform"]) sys.modules.pop('detect') """ -if (env_base['target']=='debug'): +if (env_base['target'] == 'debug'): env_base.Append(CPPFLAGS=['-DDEBUG_MEMORY_ALLOC']); env_base.Append(CPPFLAGS=['-DSCI_NAMESPACE']) -if (env_base['deprecated']!='no'): +if (env_base['deprecated'] != 'no'): env_base.Append(CPPFLAGS=['-DENABLE_DEPRECATED']); env_base.platforms = {} -selected_platform ="" +selected_platform = "" if env_base['platform'] != "": - selected_platform=env_base['platform'] + selected_platform = env_base['platform'] elif env_base['p'] != "": - selected_platform=env_base['p'] - env_base["platform"]=selected_platform + selected_platform = env_base['p'] + env_base["platform"] = selected_platform if selected_platform in platform_list: - sys.path.append("./platform/"+selected_platform) + sys.path.append("./platform/" + selected_platform) import detect if "create" in dir(detect): env = detect.create(env_base) else: env = env_base.Clone() - if env['vsproj']=="yes": + if env['vsproj'] == "yes": env.vs_incs = [] env.vs_srcs = [] - def AddToVSProject( sources ): + def AddToVSProject(sources): for x in sources: if type(x) == type(""): fname = env.File(x).path else: fname = env.File(x)[0].path - pieces = fname.split(".") - if len(pieces)>0: + pieces = fname.split(".") + if len(pieces) > 0: basename = pieces[0] - basename = basename.replace('\\\\','/') + basename = basename.replace('\\\\', '/') env.vs_srcs = env.vs_srcs + [basename + ".cpp"] env.vs_incs = env.vs_incs + [basename + ".h"] - #print basename + # print basename env.AddToVSProject = AddToVSProject - env.extra_suffix="" + env.extra_suffix = "" - if env["extra_suffix"] != '' : - env.extra_suffix += '.'+env["extra_suffix"] + if env["extra_suffix"] != '': + env.extra_suffix += '.' + env["extra_suffix"] CCFLAGS = env.get('CCFLAGS', '') env['CCFLAGS'] = '' @@ -250,63 +250,63 @@ if selected_platform in platform_list: flag_list = platform_flags[selected_platform] for f in flag_list: - if not (f[0] in ARGUMENTS): # allow command line to override platform flags + if not (f[0] in ARGUMENTS): # allow command line to override platform flags env[f[0]] = f[1] - #must happen after the flags, so when flags are used by configure, stuff happens (ie, ssl on x11) + # must happen after the flags, so when flags are used by configure, stuff happens (ie, ssl on x11) detect.configure(env) #env['platform_libsuffix'] = env['LIBSUFFIX'] - suffix="."+selected_platform + suffix = "." + selected_platform - if (env["target"]=="release"): - if (env["tools"]=="yes"): + if (env["target"] == "release"): + if (env["tools"] == "yes"): print("Tools can only be built with targets 'debug' and 'release_debug'.") sys.exit(255) - suffix+=".opt" + suffix += ".opt" env.Append(CCFLAGS=['-DNDEBUG']); - elif (env["target"]=="release_debug"): - if (env["tools"]=="yes"): - suffix+=".opt.tools" + elif (env["target"] == "release_debug"): + if (env["tools"] == "yes"): + suffix += ".opt.tools" else: - suffix+=".opt.debug" + suffix += ".opt.debug" else: - if (env["tools"]=="yes"): - suffix+=".tools" + if (env["tools"] == "yes"): + suffix += ".tools" else: - suffix+=".debug" + suffix += ".debug" if env["arch"] != "": - suffix += "."+env["arch"] - elif (env["bits"]=="32"): - suffix+=".32" - elif (env["bits"]=="64"): - suffix+=".64" - elif (env["bits"]=="fat"): - suffix+=".fat" + suffix += "." + env["arch"] + elif (env["bits"] == "32"): + suffix += ".32" + elif (env["bits"] == "64"): + suffix += ".64" + elif (env["bits"] == "fat"): + suffix += ".fat" - suffix+=env.extra_suffix + suffix += env.extra_suffix - env["PROGSUFFIX"]=suffix+env["PROGSUFFIX"] - env["OBJSUFFIX"]=suffix+env["OBJSUFFIX"] - env["LIBSUFFIX"]=suffix+env["LIBSUFFIX"] - env["SHLIBSUFFIX"]=suffix+env["SHLIBSUFFIX"] + env["PROGSUFFIX"] = suffix + env["PROGSUFFIX"] + env["OBJSUFFIX"] = suffix + env["OBJSUFFIX"] + env["LIBSUFFIX"] = suffix + env["LIBSUFFIX"] + env["SHLIBSUFFIX"] = suffix + env["SHLIBSUFFIX"] - sys.path.remove("./platform/"+selected_platform) + sys.path.remove("./platform/" + selected_platform) sys.modules.pop('detect') - env.module_list=[] + env.module_list = [] for x in module_list: - if env['module_'+x+'_enabled'] != "yes": + if env['module_' + x + '_enabled'] != "yes": continue - tmppath="./modules/"+x + tmppath = "./modules/" + x sys.path.append(tmppath) - env.current_module=x + env.current_module = x import config if (config.can_build(selected_platform)): config.configure(env) @@ -321,27 +321,27 @@ if selected_platform in platform_list: # to test 64 bits compiltion # env.Append(CPPFLAGS=['-m64']) - if (env['tools']=='yes'): + if (env['tools'] == 'yes'): env.Append(CPPFLAGS=['-DTOOLS_ENABLED']) - if (env['disable_3d']=='yes'): + if (env['disable_3d'] == 'yes'): env.Append(CPPFLAGS=['-D_3D_DISABLED']) - if (env['gdscript']=='yes'): + if (env['gdscript'] == 'yes'): env.Append(CPPFLAGS=['-DGDSCRIPT_ENABLED']) - if (env['disable_advanced_gui']=='yes'): + if (env['disable_advanced_gui'] == 'yes'): env.Append(CPPFLAGS=['-DADVANCED_GUI_DISABLED']) if (env['minizip'] == 'yes'): env.Append(CPPFLAGS=['-DMINIZIP_ENABLED']) - if (env['xml']=='yes'): + if (env['xml'] == 'yes'): env.Append(CPPFLAGS=['-DXML_ENABLED']) - if (env['verbose']=='no'): - methods.no_verbose(sys,env) + if (env['verbose'] == 'no'): + methods.no_verbose(sys, env) Export('env') - #build subdirs, the build order is dependent on link order. + # build subdirs, the build order is dependent on link order. SConscript("core/SCsub") SConscript("servers/SCsub") @@ -353,10 +353,10 @@ if selected_platform in platform_list: SConscript("modules/SCsub") SConscript("main/SCsub") - SConscript("platform/"+selected_platform+"/SCsub"); # build selected platform + SConscript("platform/" + selected_platform + "/SCsub"); # build selected platform # Microsoft Visual Studio Project Generation - if (env['vsproj'])=="yes": + if (env['vsproj']) == "yes": AddToVSProject(env.core_sources) AddToVSProject(env.main_sources) @@ -369,7 +369,7 @@ if selected_platform in platform_list: # Even then, SCons still seems to ignore it and builds with the latest MSVC... # That said, it's not needed to be set so far but I'm leaving it here so that this comment # has a purpose. - #env['MSVS_VERSION']='9.0' + # env['MSVS_VERSION']='9.0' # Calls a CMD with /C(lose) and /V(delayed environment variable expansion) options. @@ -381,26 +381,26 @@ if selected_platform in platform_list: # This version information (Win32, x64, Debug, Release, Release_Debug seems to be # required for Visual Studio to understand that it needs to generate an NMAKE # project. Do not modify without knowing what you are doing. - debug_variants = ['debug|Win32']+['debug|x64'] - release_variants = ['release|Win32']+['release|x64'] - release_debug_variants = ['release_debug|Win32']+['release_debug|x64'] + debug_variants = ['debug|Win32'] + ['debug|x64'] + release_variants = ['release|Win32'] + ['release|x64'] + release_debug_variants = ['release_debug|Win32'] + ['release_debug|x64'] variants = debug_variants + release_variants + release_debug_variants - debug_targets = ['bin\\godot.windows.tools.32.exe']+['bin\\godot.windows.tools.64.exe'] - release_targets = ['bin\\godot.windows.opt.32.exe']+['bin\\godot.windows.opt.64.exe'] - release_debug_targets = ['bin\\godot.windows.opt.tools.32.exe']+['bin\\godot.windows.opt.tools.64.exe'] + debug_targets = ['bin\\godot.windows.tools.32.exe'] + ['bin\\godot.windows.tools.64.exe'] + release_targets = ['bin\\godot.windows.opt.32.exe'] + ['bin\\godot.windows.opt.64.exe'] + release_debug_targets = ['bin\\godot.windows.opt.tools.32.exe'] + ['bin\\godot.windows.opt.tools.64.exe'] targets = debug_targets + release_targets + release_debug_targets - msvproj = env.MSVSProject(target = ['#godot' + env['MSVSPROJECTSUFFIX'] ], - incs = env.vs_incs, - srcs = env.vs_srcs, - runfile = targets, - buildtarget = targets, + msvproj = env.MSVSProject(target=['#godot' + env['MSVSPROJECTSUFFIX']], + incs=env.vs_incs, + srcs=env.vs_srcs, + runfile=targets, + buildtarget=targets, auto_build_solution=1, - variant = variants) + variant=variants) else: print("No valid target platform selected.") print("The following were detected:") for x in platform_list: - print("\t"+x) + print("\t" + x) print("\nPlease run scons again with argument: platform=") diff --git a/bin/tests/SCsub b/bin/tests/SCsub index 26687599e1f..03495c06491 100644 --- a/bin/tests/SCsub +++ b/bin/tests/SCsub @@ -2,13 +2,13 @@ Import('env') -env.tests_sources=[] -env.add_source_files(env.tests_sources,"*.cpp") +env.tests_sources = [] +env.add_source_files(env.tests_sources, "*.cpp") Export('env') -#SConscript('math/SCsub'); +# SConscript('math/SCsub'); -lib = env.Library("tests",env.tests_sources) +lib = env.Library("tests", env.tests_sources) env.Prepend(LIBS=[lib]) diff --git a/core/SCsub b/core/SCsub index d3429c57a82..558df83d05a 100644 --- a/core/SCsub +++ b/core/SCsub @@ -2,66 +2,66 @@ Import('env') -env.core_sources=[] +env.core_sources = [] -gd_call="" -gd_inc="" +gd_call = "" +gd_inc = "" for x in env.global_defaults: - env.core_sources.append("#platform/"+x+"/globals/global_defaults.cpp") - gd_inc+='#include "platform/'+x+'/globals/global_defaults.h"\n' - gd_call+="\tregister_"+x+"_global_defaults();\n" + env.core_sources.append("#platform/" + x + "/globals/global_defaults.cpp") + gd_inc += '#include "platform/' + x + '/globals/global_defaults.h"\n' + gd_call += "\tregister_" + x + "_global_defaults();\n" -gd_cpp='#include "globals.h"\n' -gd_cpp+=gd_inc -gd_cpp+="void Globals::register_global_defaults() {\n"+gd_call+"\n}\n" +gd_cpp = '#include "globals.h"\n' +gd_cpp += gd_inc +gd_cpp += "void Globals::register_global_defaults() {\n" + gd_call + "\n}\n" -f = open("global_defaults.cpp","wb") +f = open("global_defaults.cpp", "wb") f.write(gd_cpp) f.close() import os txt = "0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0" if ("SCRIPT_AES256_ENCRYPTION_KEY" in os.environ): - e=os.environ["SCRIPT_AES256_ENCRYPTION_KEY"] + e = os.environ["SCRIPT_AES256_ENCRYPTION_KEY"] txt = "" - ec_valid=True - if (len(e)!=64): - ec_valid=False + ec_valid = True + if (len(e) != 64): + ec_valid = False else: - for i in range(len(e)>>1): - if (i>0): - txt+="," - txts="0x"+e[i*2:i*2+2] + for i in range(len(e) >> 1): + if (i > 0): + txt += "," + txts = "0x" + e[i * 2:i * 2 + 2] try: - int(txts,16) + int(txts, 16) except: - ec_valid=False - txt+=txts + ec_valid = False + txt += txts if (not ec_valid): txt = "0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0" - print("Invalid AES256 encryption key, not 64 bits hex: "+e) + print("Invalid AES256 encryption key, not 64 bits hex: " + e) f = open("script_encryption_key.cpp", "wb") f.write("#include \"globals.h\"\nuint8_t script_encryption_key[32]={" + txt + "};\n") f.close() -env.add_source_files(env.core_sources,"*.cpp") +env.add_source_files(env.core_sources, "*.cpp") Export('env') import make_binders -env.Command(['method_bind.inc','method_bind_ext.inc'], 'make_binders.py', make_binders.run) +env.Command(['method_bind.inc', 'method_bind_ext.inc'], 'make_binders.py', make_binders.run) SConscript('os/SCsub'); SConscript('math/SCsub'); SConscript('io/SCsub'); SConscript('bind/SCsub'); -lib = env.Library("core",env.core_sources) +lib = env.Library("core", env.core_sources) env.Prepend(LIBS=[lib]) diff --git a/core/bind/SCsub b/core/bind/SCsub index c2731d60e6b..4efc9027176 100644 --- a/core/bind/SCsub +++ b/core/bind/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.core_sources,"*.cpp") +env.add_source_files(env.core_sources, "*.cpp") Export('env') diff --git a/core/io/SCsub b/core/io/SCsub index 48cc9a5275a..6789aa8bc6f 100644 --- a/core/io/SCsub +++ b/core/io/SCsub @@ -2,8 +2,8 @@ Import('env') -env.add_source_files(env.core_sources,"*.cpp") -env.add_source_files(env.core_sources,"*.c") -#env.core_sources.append("io/fastlz.c") +env.add_source_files(env.core_sources, "*.cpp") +env.add_source_files(env.core_sources, "*.c") +# env.core_sources.append("io/fastlz.c") Export('env') diff --git a/core/make_binders.py b/core/make_binders.py index 72ca3286fa6..5debf1345ce 100644 --- a/core/make_binders.py +++ b/core/make_binders.py @@ -1,7 +1,7 @@ # -*- coding: ibm850 -*- -template_typed=""" +template_typed = """ #ifdef TYPED_METHOD_BIND template class MethodBind$argc$$ifret R$$ifconst C$ : public MethodBind { @@ -77,7 +77,7 @@ MethodBind* create_method_bind($ifret R$ $ifnoret void$ (T::*p_method)($arg, P@$ #endif """ -template=""" +template = """ #ifndef TYPED_METHOD_BIND $iftempl template<$ $ifret class R$ $ifretargs ,$ $arg, class P@$ $iftempl >$ class MethodBind$argc$$ifret R$$ifconst C$ : public MethodBind { @@ -166,96 +166,96 @@ MethodBind* create_method_bind($ifret R$ $ifnoret void$ (T::*p_method)($arg, P@$ """ -def make_version(template,nargs,argmax,const,ret): +def make_version(template, nargs, argmax, const, ret): - intext=template - from_pos=0 - outtext="" + intext = template + from_pos = 0 + outtext = "" while(True): - to_pos=intext.find("$",from_pos) - if (to_pos==-1): - outtext+=intext[from_pos:] + to_pos = intext.find("$", from_pos) + if (to_pos == -1): + outtext += intext[from_pos:] break else: - outtext+=intext[from_pos:to_pos] - end=intext.find("$",to_pos+1) - if (end==-1): - break # ignore - macro=intext[to_pos+1:end] - cmd="" - data="" + outtext += intext[from_pos:to_pos] + end = intext.find("$", to_pos + 1) + if (end == -1): + break # ignore + macro = intext[to_pos + 1:end] + cmd = "" + data = "" - if (macro.find(" ")!=-1): - cmd=macro[0:macro.find(" ")] - data=macro[macro.find(" ")+1:] + if (macro.find(" ") != -1): + cmd = macro[0:macro.find(" ")] + data = macro[macro.find(" ") + 1:] else: - cmd=macro + cmd = macro - if (cmd=="argc"): - outtext+=str(nargs) - if (cmd=="ifret" and ret): - outtext+=data - if (cmd=="ifargs" and nargs): - outtext+=data - if (cmd=="ifretargs" and nargs and ret): - outtext+=data - if (cmd=="ifconst" and const): - outtext+=data - elif (cmd=="ifnoconst" and not const): - outtext+=data - elif (cmd=="ifnoret" and not ret): - outtext+=data - elif (cmd=="iftempl" and (nargs>0 or ret)): - outtext+=data - elif (cmd=="arg,"): - for i in range(1,nargs+1): - if (i>1): - outtext+=", " - outtext+=data.replace("@",str(i)) - elif (cmd=="arg"): - for i in range(1,nargs+1): - outtext+=data.replace("@",str(i)) - elif (cmd=="noarg"): - for i in range(nargs+1,argmax+1): - outtext+=data.replace("@",str(i)) - elif (cmd=="noarg"): - for i in range(nargs+1,argmax+1): - outtext+=data.replace("@",str(i)) + if (cmd == "argc"): + outtext += str(nargs) + if (cmd == "ifret" and ret): + outtext += data + if (cmd == "ifargs" and nargs): + outtext += data + if (cmd == "ifretargs" and nargs and ret): + outtext += data + if (cmd == "ifconst" and const): + outtext += data + elif (cmd == "ifnoconst" and not const): + outtext += data + elif (cmd == "ifnoret" and not ret): + outtext += data + elif (cmd == "iftempl" and (nargs > 0 or ret)): + outtext += data + elif (cmd == "arg,"): + for i in range(1, nargs + 1): + if (i > 1): + outtext += ", " + outtext += data.replace("@", str(i)) + elif (cmd == "arg"): + for i in range(1, nargs + 1): + outtext += data.replace("@", str(i)) + elif (cmd == "noarg"): + for i in range(nargs + 1, argmax + 1): + outtext += data.replace("@", str(i)) + elif (cmd == "noarg"): + for i in range(nargs + 1, argmax + 1): + outtext += data.replace("@", str(i)) - from_pos=end+1 + from_pos = end + 1 return outtext def run(target, source, env): - versions=10 - versions_ext=6 - text="" - text_ext="" + versions = 10 + versions_ext = 6 + text = "" + text_ext = "" - for i in range(0,versions+1): + for i in range(0, versions + 1): - t="" - t+=make_version(template,i,versions,False,False) - t+=make_version(template_typed,i,versions,False,False) - t+=make_version(template,i,versions,False,True) - t+=make_version(template_typed,i,versions,False,True) - t+=make_version(template,i,versions,True,False) - t+=make_version(template_typed,i,versions,True,False) - t+=make_version(template,i,versions,True,True) - t+=make_version(template_typed,i,versions,True,True) - if (i>=versions_ext): - text_ext+=t + t = "" + t += make_version(template, i, versions, False, False) + t += make_version(template_typed, i, versions, False, False) + t += make_version(template, i, versions, False, True) + t += make_version(template_typed, i, versions, False, True) + t += make_version(template, i, versions, True, False) + t += make_version(template_typed, i, versions, True, False) + t += make_version(template, i, versions, True, True) + t += make_version(template_typed, i, versions, True, True) + if (i >= versions_ext): + text_ext += t else: - text+=t + text += t - f=open(target[0].path,"w") + f = open(target[0].path, "w") f.write(text) f.close() - f=open(target[1].path,"w") + f = open(target[1].path, "w") f.write(text_ext) f.close() diff --git a/core/math/SCsub b/core/math/SCsub index c2731d60e6b..4efc9027176 100644 --- a/core/math/SCsub +++ b/core/math/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.core_sources,"*.cpp") +env.add_source_files(env.core_sources, "*.cpp") Export('env') diff --git a/core/os/SCsub b/core/os/SCsub index c2731d60e6b..4efc9027176 100644 --- a/core/os/SCsub +++ b/core/os/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.core_sources,"*.cpp") +env.add_source_files(env.core_sources, "*.cpp") Export('env') diff --git a/doc/tools/doc_merge.py b/doc/tools/doc_merge.py index 27f46960beb..747a8703784 100644 --- a/doc/tools/doc_merge.py +++ b/doc/tools/doc_merge.py @@ -6,17 +6,17 @@ import xml.etree.ElementTree as ET tree = ET.parse(sys.argv[1]) -old_doc=tree.getroot() +old_doc = tree.getroot() tree = ET.parse(sys.argv[2]) -new_doc=tree.getroot() +new_doc = tree.getroot() -f = file(sys.argv[3],"wb") -tab=0 +f = file(sys.argv[3], "wb") +tab = 0 -old_classes={} +old_classes = {} -def write_string(_f, text,newline=True): +def write_string(_f, text, newline=True): for t in range(tab): _f.write("\t") _f.write(text) @@ -24,188 +24,188 @@ def write_string(_f, text,newline=True): _f.write("\n") def escape(ret): - ret=ret.replace("&","&"); - ret=ret.replace("<",">"); - ret=ret.replace(">","<"); - ret=ret.replace("'","'"); - ret=ret.replace("\"","""); + ret = ret.replace("&", "&"); + ret = ret.replace("<", ">"); + ret = ret.replace(">", "<"); + ret = ret.replace("'", "'"); + ret = ret.replace("\"", """); return ret def inc_tab(): global tab - tab+=1 + tab += 1 def dec_tab(): global tab - tab-=1 + tab -= 1 -write_string(f,'') -write_string(f,'') +write_string(f, '') +write_string(f, '') -def get_tag(node,name): - tag="" +def get_tag(node, name): + tag = "" if (name in node.attrib): - tag=' '+name+'="'+escape(node.attrib[name])+'" ' + tag = ' ' + name + '="' + escape(node.attrib[name]) + '" ' return tag -def find_method_descr(old_class,name): +def find_method_descr(old_class, name): methods = old_class.find("methods") - if(methods!=None and len(list(methods))>0): + if(methods != None and len(list(methods)) > 0): for m in list(methods): - if (m.attrib["name"]==name): - description=m.find("description") - if (description!=None and description.text.strip()!=""): + if (m.attrib["name"] == name): + description = m.find("description") + if (description != None and description.text.strip() != ""): return description.text return None -def find_signal_descr(old_class,name): +def find_signal_descr(old_class, name): signals = old_class.find("signals") - if(signals!=None and len(list(signals))>0): + if(signals != None and len(list(signals)) > 0): for m in list(signals): - if (m.attrib["name"]==name): - description=m.find("description") - if (description!=None and description.text.strip()!=""): + if (m.attrib["name"] == name): + description = m.find("description") + if (description != None and description.text.strip() != ""): return description.text return None -def find_constant_descr(old_class,name): +def find_constant_descr(old_class, name): - if (old_class==None): + if (old_class == None): return None constants = old_class.find("constants") - if(constants!=None and len(list(constants))>0): + if(constants != None and len(list(constants)) > 0): for m in list(constants): - if (m.attrib["name"]==name): - if (m.text.strip()!=""): + if (m.attrib["name"] == name): + if (m.text.strip() != ""): return m.text return None def write_class(c): class_name = c.attrib["name"] - print("Parsing Class: "+class_name) + print("Parsing Class: " + class_name) if (class_name in old_classes): - old_class=old_classes[class_name] + old_class = old_classes[class_name] else: - old_class=None + old_class = None - category=get_tag(c,"category") - inherits=get_tag(c,"inherits") - write_string(f,'') + category = get_tag(c, "category") + inherits = get_tag(c, "inherits") + write_string(f, '') inc_tab() - write_string(f,"") + write_string(f, "") - if (old_class!=None): - old_brief_descr=old_class.find("brief_description") - if (old_brief_descr!=None): - write_string(f,escape(old_brief_descr.text.strip())) + if (old_class != None): + old_brief_descr = old_class.find("brief_description") + if (old_brief_descr != None): + write_string(f, escape(old_brief_descr.text.strip())) - write_string(f,"") + write_string(f, "") - write_string(f,"") - if (old_class!=None): - old_descr=old_class.find("description") - if (old_descr!=None): - write_string(f,escape(old_descr.text.strip())) + write_string(f, "") + if (old_class != None): + old_descr = old_class.find("description") + if (old_descr != None): + write_string(f, escape(old_descr.text.strip())) - write_string(f,"") + write_string(f, "") methods = c.find("methods") - if(methods!=None and len(list(methods))>0): + if(methods != None and len(list(methods)) > 0): - write_string(f,"") + write_string(f, "") inc_tab() for m in list(methods): - qualifiers=get_tag(m,"qualifiers") + qualifiers = get_tag(m, "qualifiers") - write_string(f,'') + write_string(f, '') inc_tab() for a in list(m): - if (a.tag=="return"): - typ=get_tag(a,"type") - write_string(f,''); - write_string(f,''); - elif (a.tag=="argument"): + if (a.tag == "return"): + typ = get_tag(a, "type") + write_string(f, ''); + write_string(f, ''); + elif (a.tag == "argument"): - default=get_tag(a,"default") + default = get_tag(a, "default") - write_string(f,''); - write_string(f,''); + write_string(f, ''); + write_string(f, ''); - write_string(f,''); - if (old_class!=None): - old_method_descr=find_method_descr(old_class,m.attrib["name"]) + write_string(f, ''); + if (old_class != None): + old_method_descr = find_method_descr(old_class, m.attrib["name"]) if (old_method_descr): - write_string(f,escape(escape(old_method_descr.strip()))) + write_string(f, escape(escape(old_method_descr.strip()))) - write_string(f,''); + write_string(f, ''); dec_tab() - write_string(f,"") + write_string(f, "") dec_tab() - write_string(f,"") + write_string(f, "") signals = c.find("signals") - if(signals!=None and len(list(signals))>0): + if(signals != None and len(list(signals)) > 0): - write_string(f,"") + write_string(f, "") inc_tab() for m in list(signals): - write_string(f,'') + write_string(f, '') inc_tab() for a in list(m): - if (a.tag=="argument"): + if (a.tag == "argument"): - write_string(f,''); - write_string(f,''); + write_string(f, ''); + write_string(f, ''); - write_string(f,''); - if (old_class!=None): - old_signal_descr=find_signal_descr(old_class,m.attrib["name"]) + write_string(f, ''); + if (old_class != None): + old_signal_descr = find_signal_descr(old_class, m.attrib["name"]) if (old_signal_descr): - write_string(f,escape(old_signal_descr.strip())) - write_string(f,''); + write_string(f, escape(old_signal_descr.strip())) + write_string(f, ''); dec_tab() - write_string(f,"") + write_string(f, "") dec_tab() - write_string(f,"") + write_string(f, "") constants = c.find("constants") - if(constants!=None and len(list(constants))>0): + if(constants != None and len(list(constants)) > 0): - write_string(f,"") + write_string(f, "") inc_tab() for m in list(constants): - write_string(f,'') - old_constant_descr=find_constant_descr(old_class,m.attrib["name"]) + write_string(f, '') + old_constant_descr = find_constant_descr(old_class, m.attrib["name"]) if (old_constant_descr): - write_string(f,escape(old_constant_descr.strip())) - write_string(f,"") + write_string(f, escape(old_constant_descr.strip())) + write_string(f, "") dec_tab() - write_string(f,"") + write_string(f, "") dec_tab() - write_string(f,"") + write_string(f, "") for c in list(old_doc): - old_classes[c.attrib["name"]]=c + old_classes[c.attrib["name"]] = c for c in list(new_doc): write_class(c) -write_string(f,'\n') +write_string(f, '\n') diff --git a/doc/tools/doc_status.py b/doc/tools/doc_status.py index 00907bb01da..4e108f352ea 100755 --- a/doc/tools/doc_status.py +++ b/doc/tools/doc_status.py @@ -11,7 +11,7 @@ import xml.etree.ElementTree as ET ################################################################################ flags = { - 'c': platform.platform() != 'Windows', # Disable by default on windows, since we use ANSI escape codes + 'c': platform.platform() != 'Windows', # Disable by default on windows, since we use ANSI escape codes 'b': False, 'g': False, 's': False, @@ -62,15 +62,15 @@ long_flags = { table_columns = ['name', 'brief_description', 'description', 'methods', 'constants', 'members', 'signals'] table_column_names = ['Name', 'Brief Desc.', 'Desc.', 'Methods', 'Constants', 'Members', 'Signals'] colors = { - 'name': [36], # cyan - 'part_big_problem': [4, 31], # underline, red - 'part_problem': [31], # red - 'part_mostly_good': [33], # yellow - 'part_good': [32], # green - 'url': [4, 34], # underline, blue - 'section': [1, 4], # bold, underline - 'state_off': [36], # cyan - 'state_on': [1, 35], # bold, magenta/plum + 'name': [36], # cyan + 'part_big_problem': [4, 31], # underline, red + 'part_problem': [31], # red + 'part_mostly_good': [33], # yellow + 'part_good': [32], # green + 'url': [4, 34], # underline, blue + 'section': [1, 4], # bold, underline + 'state_off': [36], # cyan + 'state_on': [1, 35], # bold, magenta/plum } overall_progress_description_weigth = 10 @@ -105,7 +105,7 @@ def nonescape_len(s): ################################################################################ class ClassStatusProgress: - def __init__(self, described = 0, total = 0): + def __init__(self, described=0, total=0): self.described = described self.total = total @@ -127,12 +127,12 @@ class ClassStatusProgress: return self.to_colored_string() def to_colored_string(self, format='{has}/{total}', pad_format='{pad_described}{s}{pad_total}'): - ratio = self.described/self.total if self.total != 0 else 1 - percent = round(100*ratio) - s = format.format(has = str(self.described), total = str(self.total), percent = str(percent)) + ratio = self.described / self.total if self.total != 0 else 1 + percent = round(100 * ratio) + s = format.format(has=str(self.described), total=str(self.total), percent=str(percent)) if self.described >= self.total: s = color('part_good', s) - elif self.described >= self.total/4*3: + elif self.described >= self.total / 4 * 3: s = color('part_mostly_good', s) elif self.described > 0: s = color('part_problem', s) @@ -142,7 +142,7 @@ class ClassStatusProgress: pad_described = ''.ljust(pad_size - len(str(self.described))) pad_percent = ''.ljust(3 - len(str(percent))) pad_total = ''.ljust(pad_size - len(str(self.total))) - return pad_format.format(pad_described = pad_described, pad_total = pad_total, pad_percent = pad_percent, s = s) + return pad_format.format(pad_described=pad_described, pad_total=pad_total, pad_percent=pad_percent, s=s) class ClassStatus: @@ -231,7 +231,7 @@ class ClassStatus: status.progresses[tag.tag].increment(len(sub_tag.text.strip()) > 0) elif tag.tag in ['theme_items']: - pass #Ignore those tags, since they seem to lack description at all + pass # Ignore those tags, since they seem to lack description at all else: print(tag.tag, tag.attrib) @@ -296,10 +296,10 @@ if len(input_file_list) < 1 or flags['h']: if long_flags[synonym] == flag: synonyms.append(color('name', '--' + synonym)) - print(('{synonyms} (Currently '+color('state_'+('on' if flags[flag] else 'off'), '{value}')+')\n\t{description}').format( - synonyms = ', '.join(synonyms), - value = ('on' if flags[flag] else 'off'), - description = flag_descriptions[flag] + print(('{synonyms} (Currently ' + color('state_' + ('on' if flags[flag] else 'off'), '{value}') + ')\n\t{description}').format( + synonyms=', '.join(synonyms), + value=('on' if flags[flag] else 'off'), + description=flag_descriptions[flag] )) sys.exit(0) @@ -413,9 +413,9 @@ for row_i, row in enumerate(table): for cell_i, cell in enumerate(row): padding_needed = table_column_sizes[cell_i] - nonescape_len(cell) + 2 if cell_i == 0: - row_string += table_row_chars[2] + cell + table_row_chars[2]*(padding_needed-1) + row_string += table_row_chars[2] + cell + table_row_chars[2] * (padding_needed - 1) else: - row_string += table_row_chars[2]*math.floor(padding_needed/2) + cell + table_row_chars[2]*math.ceil((padding_needed/2)) + row_string += table_row_chars[2] * math.floor(padding_needed / 2) + cell + table_row_chars[2] * math.ceil((padding_needed / 2)) row_string += table_column_chars print(row_string) diff --git a/doc/tools/makedoku.py b/doc/tools/makedoku.py index f00a4614d5c..32a52a584af 100644 --- a/doc/tools/makedoku.py +++ b/doc/tools/makedoku.py @@ -15,68 +15,68 @@ if len(input_list) < 1: sys.exit(0) -def validate_tag(elem,tag): +def validate_tag(elem, tag): if (elem.tag != tag): - print("Tag mismatch, expected '"+tag+"', got "+elem.tag); + print("Tag mismatch, expected '" + tag + "', got " + elem.tag); sys.exit(255) -class_names=[] -classes={} +class_names = [] +classes = {} -def make_class_list(class_list,columns): +def make_class_list(class_list, columns): - f=open("class_list.txt","wb") - prev=0 + f = open("class_list.txt", "wb") + prev = 0 col_max = len(class_list) / columns + 1 print("col max is ", col_max) col_count = 0 row_count = 0 last_initial = "" - fit_columns=[] + fit_columns = [] - for n in range(0,columns): - fit_columns+=[[]] + for n in range(0, columns): + fit_columns += [[]] - indexers=[] - last_initial="" + indexers = [] + last_initial = "" - idx=0 + idx = 0 for n in class_list: - col = idx/col_max - if (col>=columns): - col=columns-1 - fit_columns[col]+=[n] - idx+=1 - if (n[:1]!=last_initial): - indexers+=[n] - last_initial=n[:1] + col = idx / col_max + if (col >= columns): + col = columns - 1 + fit_columns[col] += [n] + idx += 1 + if (n[:1] != last_initial): + indexers += [n] + last_initial = n[:1] - row_max=0 + row_max = 0 - for n in range(0,columns): - if (len(fit_columns[n])>row_max): - row_max=len(fit_columns[n]) + for n in range(0, columns): + if (len(fit_columns[n]) > row_max): + row_max = len(fit_columns[n]) - for r in range(0,row_max): - s="|" - for c in range(0,columns): - if (r>=len(fit_columns[c])): + for r in range(0, row_max): + s = "|" + for c in range(0, columns): + if (r >= len(fit_columns[c])): continue classname = fit_columns[c][r] - initial=classname[0] + initial = classname[0] if (classname in indexers): - s+="**"+initial+"**|" + s += "**" + initial + "**|" else: - s+=" |" + s += " |" - s+="[["+classname.lower()+"|"+classname+"]]|" + s += "[[" + classname.lower() + "|" + classname + "]]|" - s+="\n" + s += "\n" f.write(s) @@ -86,221 +86,221 @@ def dokuize_text(txt): def dokuize_text(text): - pos=0 + pos = 0 while(True): - pos = text.find("[",pos) - if (pos==-1): + pos = text.find("[", pos) + if (pos == -1): break - endq_pos=text.find("]",pos+1) - if (endq_pos==-1): + endq_pos = text.find("]", pos + 1) + if (endq_pos == -1): break - pre_text=text[:pos] - post_text=text[endq_pos+1:] - tag_text=text[pos+1:endq_pos] + pre_text = text[:pos] + post_text = text[endq_pos + 1:] + tag_text = text[pos + 1:endq_pos] if (tag_text in class_names): - tag_text="[["+tag_text.lower()+"|"+tag_text+"]]" - else: #command - cmd=tag_text - space_pos=tag_text.find(" ") - if (cmd.find("html")==0): - cmd=tag_text[:space_pos] - param=tag_text[space_pos+1:] - tag_text="<"+param+">" - elif(cmd.find("method")==0): - cmd=tag_text[:space_pos] - param=tag_text[space_pos+1:] + tag_text = "[[" + tag_text.lower() + "|" + tag_text + "]]" + else: # command + cmd = tag_text + space_pos = tag_text.find(" ") + if (cmd.find("html") == 0): + cmd = tag_text[:space_pos] + param = tag_text[space_pos + 1:] + tag_text = "<" + param + ">" + elif(cmd.find("method") == 0): + cmd = tag_text[:space_pos] + param = tag_text[space_pos + 1:] - if (param.find(".")!=-1): - class_param,method_param=param.split(".") - tag_text="[["+class_param.lower()+"#"+method_param+"|"+class_param+'.'+method_param+"]]" + if (param.find(".") != -1): + class_param, method_param = param.split(".") + tag_text = "[[" + class_param.lower() + "#" + method_param + "|" + class_param + '.' + method_param + "]]" else: - tag_text="[[#"+param+"|"+param+"]]" - elif (cmd.find("image=")==0): - tag_text="{{"+cmd[6:]+"}}" - elif (cmd.find("url=")==0): - tag_text="[["+cmd[4:]+"|" - elif (cmd=="/url"): - tag_text="]]>" - elif (cmd=="center"): - tag_text="" - elif (cmd=="/center"): - tag_text="" - elif (cmd=="br"): - tag_text="\\\\\n" - elif (cmd=="i" or cmd=="/i"): - tag_text="//" - elif (cmd=="b" or cmd=="/b"): - tag_text="**" - elif (cmd=="u" or cmd=="/u"): - tag_text="__" + tag_text = "[[#" + param + "|" + param + "]]" + elif (cmd.find("image=") == 0): + tag_text = "{{" + cmd[6:] + "}}" + elif (cmd.find("url=") == 0): + tag_text = "[[" + cmd[4:] + "|" + elif (cmd == "/url"): + tag_text = "]]>" + elif (cmd == "center"): + tag_text = "" + elif (cmd == "/center"): + tag_text = "" + elif (cmd == "br"): + tag_text = "\\\\\n" + elif (cmd == "i" or cmd == "/i"): + tag_text = "//" + elif (cmd == "b" or cmd == "/b"): + tag_text = "**" + elif (cmd == "u" or cmd == "/u"): + tag_text = "__" else: - tag_text="["+tag_text+"]" + tag_text = "[" + tag_text + "]" - text=pre_text+tag_text+post_text - pos=len(pre_text)+len(tag_text) + text = pre_text + tag_text + post_text + pos = len(pre_text) + len(tag_text) #tnode = ET.SubElement(parent,"div") - #tnode.text=text + # tnode.text=text return text def make_type(t): global class_names if (t in class_names): - return "[["+t.lower()+"|"+t+"]]" + return "[[" + t.lower() + "|" + t + "]]" return t -def make_method(f,name,m,declare,event=False): +def make_method(f, name, m, declare, event=False): - s=" * " - ret_type="void" - args=list(m) - mdata={} - mdata["argidx"]=[] + s = " * " + ret_type = "void" + args = list(m) + mdata = {} + mdata["argidx"] = [] for a in args: - if (a.tag=="return"): - idx=-1 - elif (a.tag=="argument"): - idx=int(a.attrib["index"]) + if (a.tag == "return"): + idx = -1 + elif (a.tag == "argument"): + idx = int(a.attrib["index"]) else: continue mdata["argidx"].append(idx) - mdata[idx]=a + mdata[idx] = a if (not event): if (-1 in mdata["argidx"]): - s+=make_type(mdata[-1].attrib["type"]) + s += make_type(mdata[-1].attrib["type"]) else: - s+="void" - s+=" " + s += "void" + s += " " if (declare): - #span.attrib["class"]="funcdecl" - #a=ET.SubElement(span,"a") - #a.attrib["name"]=name+"_"+m.attrib["name"] - #a.text=name+"::"+m.attrib["name"] - s+="**"+m.attrib["name"]+"**" + # span.attrib["class"]="funcdecl" + # a=ET.SubElement(span,"a") + # a.attrib["name"]=name+"_"+m.attrib["name"] + # a.text=name+"::"+m.attrib["name"] + s += "**" + m.attrib["name"] + "**" else: - s+="[[#"+m.attrib["name"]+"|"+m.attrib["name"]+"]]" + s += "[[#" + m.attrib["name"] + "|" + m.attrib["name"] + "]]" - s+="**(**" - argfound=False + s += "**(**" + argfound = False for a in mdata["argidx"]: - arg=mdata[a] - if (a<0): + arg = mdata[a] + if (a < 0): continue - if (a>0): - s+=", " + if (a > 0): + s += ", " else: - s+=" " + s += " " - s+=make_type(arg.attrib["type"]) + s += make_type(arg.attrib["type"]) if ("name" in arg.attrib): - s+=" "+arg.attrib["name"] + s += " " + arg.attrib["name"] else: - s+=" arg"+str(a) + s += " arg" + str(a) if ("default" in arg.attrib): - s+="="+arg.attrib["default"] + s += "=" + arg.attrib["default"] - argfound=True + argfound = True if (argfound): - s+=" " - s+="**)**" + s += " " + s += "**)**" if ("qualifiers" in m.attrib): - s+=" "+m.attrib["qualifiers"] + s += " " + m.attrib["qualifiers"] - f.write(s+"\n") + f.write(s + "\n") def make_doku_class(node): name = node.attrib["name"] - f=open(name.lower()+".txt","wb") + f = open(name.lower() + ".txt", "wb") - f.write("====== "+name+" ======\n") + f.write("====== " + name + " ======\n") if ("inherits" in node.attrib): - inh=node.attrib["inherits"].strip() - f.write("**Inherits:** [["+inh.lower()+"|"+inh+"]]\\\\\n") + inh = node.attrib["inherits"].strip() + f.write("**Inherits:** [[" + inh.lower() + "|" + inh + "]]\\\\\n") if ("category" in node.attrib): - f.write("**Category:** "+node.attrib["category"].strip()+"\\\\\n") + f.write("**Category:** " + node.attrib["category"].strip() + "\\\\\n") briefd = node.find("brief_description") - if (briefd!=None): + if (briefd != None): f.write("===== Brief Description ======\n") - f.write( dokuize_text(briefd.text.strip())+"\n" ) + f.write(dokuize_text(briefd.text.strip()) + "\n") methods = node.find("methods") - if(methods!=None and len(list(methods))>0): + if(methods != None and len(list(methods)) > 0): f.write("===== Member Functions ======\n") for m in list(methods): - make_method(f,node.attrib["name"],m,False) + make_method(f, node.attrib["name"], m, False) events = node.find("signals") - if(events!=None and len(list(events))>0): + if(events != None and len(list(events)) > 0): f.write("===== Signals ======\n") for m in list(events): - make_method(f,node.attrib["name"],m,True,True) + make_method(f, node.attrib["name"], m, True, True) members = node.find("members") - if(members!=None and len(list(members))>0): + if(members != None and len(list(members)) > 0): f.write("===== Member Variables ======\n") for c in list(members): s = " * " - s+=make_type(c.attrib["type"])+" " - s+="**"+c.attrib["name"]+"**" - if (c.text.strip()!=""): - s+=" - "+c.text.strip() - f.write(s+"\n") + s += make_type(c.attrib["type"]) + " " + s += "**" + c.attrib["name"] + "**" + if (c.text.strip() != ""): + s += " - " + c.text.strip() + f.write(s + "\n") constants = node.find("constants") - if(constants!=None and len(list(constants))>0): + if(constants != None and len(list(constants)) > 0): f.write("===== Numeric Constants ======\n") for c in list(constants): s = " * " - s+="**"+c.attrib["name"]+"**" + s += "**" + c.attrib["name"] + "**" if ("value" in c.attrib): - s+=" = **"+c.attrib["value"]+"**" - if (c.text.strip()!=""): - s+=" - "+c.text.strip() - f.write(s+"\n") + s += " = **" + c.attrib["value"] + "**" + if (c.text.strip() != ""): + s += " - " + c.text.strip() + f.write(s + "\n") - descr=node.find("description") - if (descr!=None and descr.text.strip()!=""): + descr = node.find("description") + if (descr != None and descr.text.strip() != ""): f.write("===== Description ======\n") - f.write(dokuize_text(descr.text.strip())+"\n") + f.write(dokuize_text(descr.text.strip()) + "\n") methods = node.find("methods") - if(methods!=None and len(list(methods))>0): + if(methods != None and len(list(methods)) > 0): f.write("===== Member Function Description ======\n") for m in list(methods): - d=m.find("description") - if (d==None or d.text.strip()==""): + d = m.find("description") + if (d == None or d.text.strip() == ""): continue - f.write("== "+m.attrib["name"]+" ==\n") - make_method(f,node.attrib["name"],m,False) + f.write("== " + m.attrib["name"] + " ==\n") + make_method(f, node.attrib["name"], m, False) f.write("\\\\\n") f.write(dokuize_text(d.text.strip())) f.write("\n") @@ -488,27 +488,27 @@ def make_doku_class(node): """ for file in input_list: tree = ET.parse(file) - doc=tree.getroot() + doc = tree.getroot() if ("version" not in doc.attrib): print("Version missing from 'doc'") sys.exit(255) - version=doc.attrib["version"] + version = doc.attrib["version"] for c in list(doc): if (c.attrib["name"] in class_names): continue class_names.append(c.attrib["name"]) - classes[c.attrib["name"]]=c + classes[c.attrib["name"]] = c class_names.sort() -make_class_list(class_names,4) +make_class_list(class_names, 4) for cn in class_names: - c=classes[cn] + c = classes[cn] make_doku_class(c) diff --git a/doc/tools/makehtml.py b/doc/tools/makehtml.py index 12f92883a4f..95a35960b74 100644 --- a/doc/tools/makehtml.py +++ b/doc/tools/makehtml.py @@ -10,7 +10,7 @@ html_escape_table = { "'": "'" } -html_unescape_table = {v:k for k, v in html_escape_table.items()} +html_unescape_table = {v: k for k, v in html_escape_table.items()} def html_escape(text): return escape(text, html_escape_table) @@ -20,7 +20,7 @@ def html_unescape(text): input_list = [] -single_page=True +single_page = True for arg in sys.argv[1:]: if arg[:1] == "-": @@ -36,106 +36,106 @@ if len(input_list) < 1: sys.exit(0) -def validate_tag(elem,tag): +def validate_tag(elem, tag): if (elem.tag != tag): - print("Tag mismatch, expected '"+tag+"', got "+elem.tag); + print("Tag mismatch, expected '" + tag + "', got " + elem.tag); sys.exit(255) def make_html_bottom(body): - #make_html_top(body,True) - ET.SubElement(body,"hr") - copyright = ET.SubElement(body,"span") + # make_html_top(body,True) + ET.SubElement(body, "hr") + copyright = ET.SubElement(body, "span") copyright.text = "Copyright 2008-2010 Codenix SRL" -def make_html_top(body,bottom=False): +def make_html_top(body, bottom=False): if (bottom): - ET.SubElement(body,"hr") + ET.SubElement(body, "hr") - table = ET.SubElement(body,"table") - table.attrib["class"]="top_table" - tr = ET.SubElement(table,"tr") - td = ET.SubElement(tr,"td") - td.attrib["class"]="top_table" + table = ET.SubElement(body, "table") + table.attrib["class"] = "top_table" + tr = ET.SubElement(table, "tr") + td = ET.SubElement(tr, "td") + td.attrib["class"] = "top_table" - img = ET.SubElement(td,"image") - img.attrib["src"]="images/logo.png" - td = ET.SubElement(tr,"td") - td.attrib["class"]="top_table" - a = ET.SubElement(td,"a") - a.attrib["href"]="index.html" - a.text="Index" - td = ET.SubElement(tr,"td") - td.attrib["class"]="top_table" - a = ET.SubElement(td,"a") - a.attrib["href"]="alphabetical.html" - a.text="Classes" - td = ET.SubElement(tr,"td") - td.attrib["class"]="top_table" - a = ET.SubElement(td,"a") - a.attrib["href"]="category.html" - a.text="Categories" - td = ET.SubElement(tr,"td") - a = ET.SubElement(td,"a") - a.attrib["href"]="inheritance.html" - a.text="Inheritance" + img = ET.SubElement(td, "image") + img.attrib["src"] = "images/logo.png" + td = ET.SubElement(tr, "td") + td.attrib["class"] = "top_table" + a = ET.SubElement(td, "a") + a.attrib["href"] = "index.html" + a.text = "Index" + td = ET.SubElement(tr, "td") + td.attrib["class"] = "top_table" + a = ET.SubElement(td, "a") + a.attrib["href"] = "alphabetical.html" + a.text = "Classes" + td = ET.SubElement(tr, "td") + td.attrib["class"] = "top_table" + a = ET.SubElement(td, "a") + a.attrib["href"] = "category.html" + a.text = "Categories" + td = ET.SubElement(tr, "td") + a = ET.SubElement(td, "a") + a.attrib["href"] = "inheritance.html" + a.text = "Inheritance" if (not bottom): - ET.SubElement(body,"hr") + ET.SubElement(body, "hr") -def make_html_class_list(class_list,columns): +def make_html_class_list(class_list, columns): - div=ET.Element("div") - div.attrib["class"]="ClassList"; + div = ET.Element("div") + div.attrib["class"] = "ClassList"; - h1=ET.SubElement(div,"h2") - h1.text="Alphabetical Class List" + h1 = ET.SubElement(div, "h2") + h1.text = "Alphabetical Class List" - table=ET.SubElement(div,"table") - table.attrib["class"]="class_table" - table.attrib["width"]="100%" - prev=0 + table = ET.SubElement(div, "table") + table.attrib["class"] = "class_table" + table.attrib["width"] = "100%" + prev = 0 col_max = len(class_list) / columns + 1 print("col max is ", col_max) col_count = 0 row_count = 0 last_initial = "" - fit_columns=[] + fit_columns = [] - for n in range(0,columns): - fit_columns+=[[]] + for n in range(0, columns): + fit_columns += [[]] - indexers=[] - last_initial="" + indexers = [] + last_initial = "" - idx=0 + idx = 0 for n in class_list: - col = int(idx/col_max) - if (col>=columns): - col=columns-1 - fit_columns[col]+=[n] - idx+=1 - if (n[:1]!=last_initial): - indexers+=[n] - last_initial=n[:1] + col = int(idx / col_max) + if (col >= columns): + col = columns - 1 + fit_columns[col] += [n] + idx += 1 + if (n[:1] != last_initial): + indexers += [n] + last_initial = n[:1] - row_max=0 + row_max = 0 - for n in range(0,columns): - if (len(fit_columns[n])>row_max): - row_max=len(fit_columns[n]) + for n in range(0, columns): + if (len(fit_columns[n]) > row_max): + row_max = len(fit_columns[n]) - for r in range(0,row_max): - tr = ET.SubElement(table,"tr") - for c in range(0,columns): - tdi = ET.SubElement(tr,"td") - tdi.attrib["align"]="right" - td = ET.SubElement(tr,"td") - if (r>=len(fit_columns[c])): + for r in range(0, row_max): + tr = ET.SubElement(table, "tr") + for c in range(0, columns): + tdi = ET.SubElement(tr, "td") + tdi.attrib["align"] = "right" + td = ET.SubElement(tr, "td") + if (r >= len(fit_columns[c])): continue classname = fit_columns[c][r] @@ -147,17 +147,17 @@ def make_html_class_list(class_list,columns): span.text = classname[:1].upper() if (single_page): - link="#"+classname + link = "#" + classname else: - link=classname+".html" + link = classname + ".html" - a=ET.SubElement(td,"a") - a.attrib["href"]=link - a.text=classname + a = ET.SubElement(td, "a") + a.attrib["href"] = link + a.text = classname if (not single_page): - cat_class_list=ET.Element("html") + cat_class_list = ET.Element("html") csscc = ET.SubElement(cat_class_list, "link") csscc.attrib["href"] = "main.css" csscc.attrib["rel"] = "stylesheet" @@ -165,62 +165,62 @@ def make_html_class_list(class_list,columns): bodycc = ET.SubElement(cat_class_list, "body") make_html_top(bodycc) - cat_class_parent=bodycc + cat_class_parent = bodycc else: - cat_class_parent=div + cat_class_parent = div - h1=ET.SubElement(cat_class_parent,"h2") - h1.text="Class List By Category" + h1 = ET.SubElement(cat_class_parent, "h2") + h1.text = "Class List By Category" - class_cat_table={} - class_cat_list=[] + class_cat_table = {} + class_cat_list = [] for c in class_list: clss = classes[c] if ("category" in clss.attrib): - class_cat=clss.attrib["category"] + class_cat = clss.attrib["category"] else: - class_cat="Core" - if (class_cat.find("/")!=-1): - class_cat=class_cat[class_cat.rfind("/")+1:] + class_cat = "Core" + if (class_cat.find("/") != -1): + class_cat = class_cat[class_cat.rfind("/") + 1:] if (not class_cat in class_cat_list): class_cat_list.append(class_cat) - class_cat_table[class_cat]=[] + class_cat_table[class_cat] = [] class_cat_table[class_cat].append(c) class_cat_list.sort() - ct = ET.SubElement(cat_class_parent,"table") + ct = ET.SubElement(cat_class_parent, "table") for cl in class_cat_list: l = class_cat_table[cl] l.sort() - tr = ET.SubElement(ct,"tr") - tr.attrib["class"]="category_title" - td = ET.SubElement(ct,"td") - td.attrib["class"]="category_title" + tr = ET.SubElement(ct, "tr") + tr.attrib["class"] = "category_title" + td = ET.SubElement(ct, "td") + td.attrib["class"] = "category_title" - a = ET.SubElement(td,"a") - a.attrib["class"]="category_title" - a.text=cl - a.attrib["name"]="CATEGORY_"+cl + a = ET.SubElement(td, "a") + a.attrib["class"] = "category_title" + a.text = cl + a.attrib["name"] = "CATEGORY_" + cl - td = ET.SubElement(ct,"td") - td.attrib["class"]="category_title" + td = ET.SubElement(ct, "td") + td.attrib["class"] = "category_title" for clt in l: - tr = ET.SubElement(ct,"tr") - td = ET.SubElement(ct,"td") - make_type(clt,td) - clss=classes[clt] + tr = ET.SubElement(ct, "tr") + td = ET.SubElement(ct, "td") + make_type(clt, td) + clss = classes[clt] bd = clss.find("brief_description") - bdtext="" - if (bd!=None): - bdtext=bd.text - td = ET.SubElement(ct,"td") - td.text=bdtext + bdtext = "" + if (bd != None): + bdtext = bd.text + td = ET.SubElement(ct, "td") + td.text = bdtext if (not single_page): make_html_bottom(bodycc) @@ -229,31 +229,31 @@ def make_html_class_list(class_list,columns): if (not single_page): - inh_class_list=ET.Element("html") + inh_class_list = ET.Element("html") cssic = ET.SubElement(inh_class_list, "link") cssic.attrib["href"] = "main.css" cssic.attrib["rel"] = "stylesheet" cssic.attrib["type"] = "text/css" bodyic = ET.SubElement(inh_class_list, "body") make_html_top(bodyic) - inh_class_parent=bodyic + inh_class_parent = bodyic else: - inh_class_parent=div + inh_class_parent = div - h1=ET.SubElement(inh_class_parent,"h2") - h1.text="Class List By Inheritance" + h1 = ET.SubElement(inh_class_parent, "h2") + h1.text = "Class List By Inheritance" - itemlist = ET.SubElement(inh_class_parent,"list") + itemlist = ET.SubElement(inh_class_parent, "list") - class_inh_table={} + class_inh_table = {} def add_class(clss): if (clss.attrib["name"] in class_inh_table): - return #already added - parent_list=None + return # already added + parent_list = None if ("inherits" in clss.attrib): inhc = clss.attrib["inherits"] @@ -262,20 +262,20 @@ def make_html_class_list(class_list,columns): parent_list = class_inh_table[inhc].find("div") if (parent_list == None): - parent_div = ET.SubElement(class_inh_table[inhc],"div") - parent_list = ET.SubElement(parent_div,"list") - parent_div.attrib["class"]="inh_class_list" + parent_div = ET.SubElement(class_inh_table[inhc], "div") + parent_list = ET.SubElement(parent_div, "list") + parent_div.attrib["class"] = "inh_class_list" else: parent_list = parent_list.find("list") else: - parent_list=itemlist + parent_list = itemlist - item = ET.SubElement(parent_list,"li") + item = ET.SubElement(parent_list, "li") # item.attrib["class"]="inh_class_list" - class_inh_table[clss.attrib["name"]]=item - make_type(clss.attrib["name"],item) + class_inh_table[clss.attrib["name"]] = item + make_type(clss.attrib["name"], item) for c in class_list: @@ -290,98 +290,98 @@ def make_html_class_list(class_list,columns): - #h1=ET.SubElement(div,"h2") + # h1=ET.SubElement(div,"h2") #h1.text="Class List By Inheritance" return div -def make_type(p_type,p_parent): - if (p_type=="RefPtr"): - p_type="Resource" +def make_type(p_type, p_parent): + if (p_type == "RefPtr"): + p_type = "Resource" if (p_type in class_names): - a=ET.SubElement(p_parent,"a") - a.attrib["class"]="datatype_existing" - a.text=p_type+" " + a = ET.SubElement(p_parent, "a") + a.attrib["class"] = "datatype_existing" + a.text = p_type + " " if (single_page): - a.attrib["href"]="#"+p_type + a.attrib["href"] = "#" + p_type else: - a.attrib["href"]=p_type+".html" + a.attrib["href"] = p_type + ".html" else: - span=ET.SubElement(p_parent,"span") - span.attrib["class"]="datatype" - span.text=p_type+" " + span = ET.SubElement(p_parent, "span") + span.attrib["class"] = "datatype" + span.text = p_type + " " -def make_text_def(class_name,parent,text): +def make_text_def(class_name, parent, text): text = html_escape(text) - pos=0 + pos = 0 while(True): - pos = text.find("[",pos) - if (pos==-1): + pos = text.find("[", pos) + if (pos == -1): break - endq_pos=text.find("]",pos+1) - if (endq_pos==-1): + endq_pos = text.find("]", pos + 1) + if (endq_pos == -1): break - pre_text=text[:pos] - post_text=text[endq_pos+1:] - tag_text=text[pos+1:endq_pos] + pre_text = text[:pos] + post_text = text[endq_pos + 1:] + tag_text = text[pos + 1:endq_pos] if (tag_text in class_names): if (single_page): - tag_text=''+tag_text+'' + tag_text = '' + tag_text + '' else: - tag_text=''+tag_text+'' - else: #command - cmd=tag_text - space_pos=tag_text.find(" ") - if (cmd.find("html")==0): - cmd=tag_text[:space_pos] - param=tag_text[space_pos+1:] - tag_text="<"+param+">" - elif(cmd.find("method")==0): - cmd=tag_text[:space_pos] - param=tag_text[space_pos+1:] + tag_text = '' + tag_text + '' + else: # command + cmd = tag_text + space_pos = tag_text.find(" ") + if (cmd.find("html") == 0): + cmd = tag_text[:space_pos] + param = tag_text[space_pos + 1:] + tag_text = "<" + param + ">" + elif(cmd.find("method") == 0): + cmd = tag_text[:space_pos] + param = tag_text[space_pos + 1:] - if (not single_page and param.find(".")!=-1): - class_param,method_param=param.split(".") - tag_text=tag_text=''+class_param+'.'+method_param+'()' + if (not single_page and param.find(".") != -1): + class_param, method_param = param.split(".") + tag_text = tag_text = '' + class_param + '.' + method_param + '()' else: - tag_text=tag_text=''+class_name+'.'+param+'()' - elif (cmd.find("image=")==0): - print("found image: "+cmd) - tag_text="" - elif (cmd.find("url=")==0): - tag_text="" - elif (cmd=="/url"): - tag_text="" - elif (cmd=="center"): - tag_text="
" - elif (cmd=="/center"): - tag_text="
" - elif (cmd=="br"): - tag_text="
" - elif (cmd=="i" or cmd=="/i" or cmd=="b" or cmd=="/b" or cmd=="u" or cmd=="/u"): - tag_text="<"+tag_text+">" #html direct mapping + tag_text = tag_text = '' + class_name + '.' + param + '()' + elif (cmd.find("image=") == 0): + print("found image: " + cmd) + tag_text = "" + elif (cmd.find("url=") == 0): + tag_text = "" + elif (cmd == "/url"): + tag_text = "" + elif (cmd == "center"): + tag_text = "
" + elif (cmd == "/center"): + tag_text = "
" + elif (cmd == "br"): + tag_text = "
" + elif (cmd == "i" or cmd == "/i" or cmd == "b" or cmd == "/b" or cmd == "u" or cmd == "/u"): + tag_text = "<" + tag_text + ">" # html direct mapping else: - tag_text="["+tag_text+"]" + tag_text = "[" + tag_text + "]" - text=pre_text+tag_text+post_text - pos=len(pre_text)+len(tag_text) + text = pre_text + tag_text + post_text + pos = len(pre_text) + len(tag_text) #tnode = ET.SubElement(parent,"div") - #tnode.text=text - text="
"+text+"
" + # tnode.text=text + text = "
" + text + "
" try: - tnode=ET.XML(text) + tnode = ET.XML(text) parent.append(tnode) except: - print("Error parsing description text: '"+text+"'") + print("Error parsing description text: '" + text + "'") sys.exit(255) @@ -390,292 +390,292 @@ def make_text_def(class_name,parent,text): -def make_method_def(name,m,declare,event=False): +def make_method_def(name, m, declare, event=False): - mdata={} + mdata = {} if (not declare): - div=ET.Element("tr") - div.attrib["class"]="method" - ret_parent=ET.SubElement(div,"td") - ret_parent.attrib["align"]="right" - func_parent=ET.SubElement(div,"td") + div = ET.Element("tr") + div.attrib["class"] = "method" + ret_parent = ET.SubElement(div, "td") + ret_parent.attrib["align"] = "right" + func_parent = ET.SubElement(div, "td") else: - div=ET.Element("div") - div.attrib["class"]="method" - ret_parent=div - func_parent=div + div = ET.Element("div") + div.attrib["class"] = "method" + ret_parent = div + func_parent = div - mdata["argidx"]=[] - mdata["name"]=m.attrib["name"] - qualifiers="" + mdata["argidx"] = [] + mdata["name"] = m.attrib["name"] + qualifiers = "" if ("qualifiers" in m.attrib): - qualifiers=m.attrib["qualifiers"] + qualifiers = m.attrib["qualifiers"] - args=list(m) + args = list(m) for a in args: - if (a.tag=="return"): - idx=-1 - elif (a.tag=="argument"): - idx=int(a.attrib["index"]) + if (a.tag == "return"): + idx = -1 + elif (a.tag == "argument"): + idx = int(a.attrib["index"]) else: continue mdata["argidx"].append(idx) - mdata[idx]=a + mdata[idx] = a if (not event): if (-1 in mdata["argidx"]): - make_type(mdata[-1].attrib["type"],ret_parent) + make_type(mdata[-1].attrib["type"], ret_parent) mdata["argidx"].remove(-1) else: - make_type("void",ret_parent) + make_type("void", ret_parent) - span=ET.SubElement(func_parent,"span") + span = ET.SubElement(func_parent, "span") if (declare): - span.attrib["class"]="funcdecl" - a=ET.SubElement(span,"a") - a.attrib["name"]=name+"_"+m.attrib["name"] - a.text=name+"::"+m.attrib["name"] + span.attrib["class"] = "funcdecl" + a = ET.SubElement(span, "a") + a.attrib["name"] = name + "_" + m.attrib["name"] + a.text = name + "::" + m.attrib["name"] else: - span.attrib["class"]="identifier funcdef" - a=ET.SubElement(span,"a") - a.attrib["href"]="#"+name+"_"+m.attrib["name"] - a.text=m.attrib["name"] + span.attrib["class"] = "identifier funcdef" + a = ET.SubElement(span, "a") + a.attrib["href"] = "#" + name + "_" + m.attrib["name"] + a.text = m.attrib["name"] - span=ET.SubElement(func_parent,"span") - span.attrib["class"]="symbol" - span.text=" (" + span = ET.SubElement(func_parent, "span") + span.attrib["class"] = "symbol" + span.text = " (" for a in mdata["argidx"]: - arg=mdata[a] - if (a>0): - span=ET.SubElement(func_parent,"span") - span.text=", " + arg = mdata[a] + if (a > 0): + span = ET.SubElement(func_parent, "span") + span.text = ", " else: - span=ET.SubElement(func_parent,"span") - span.text=" " + span = ET.SubElement(func_parent, "span") + span.text = " " - make_type(arg.attrib["type"],func_parent) + make_type(arg.attrib["type"], func_parent) - span=ET.SubElement(func_parent,"span") - span.text=arg.attrib["name"] + span = ET.SubElement(func_parent, "span") + span.text = arg.attrib["name"] if ("default" in arg.attrib): - span.text=span.text+"="+arg.attrib["default"] + span.text = span.text + "=" + arg.attrib["default"] - span=ET.SubElement(func_parent,"span") - span.attrib["class"]="symbol" + span = ET.SubElement(func_parent, "span") + span.attrib["class"] = "symbol" if (len(mdata["argidx"])): - span.text=" )" + span.text = " )" else: - span.text=")" + span.text = ")" if (qualifiers): - span=ET.SubElement(func_parent,"span") - span.attrib["class"]="qualifier" - span.text=" "+qualifiers + span = ET.SubElement(func_parent, "span") + span.attrib["class"] = "qualifier" + span.text = " " + qualifiers return div def make_html_class(node): - div=ET.Element("div") - div.attrib["class"]="class"; + div = ET.Element("div") + div.attrib["class"] = "class"; - a=ET.SubElement(div,"a") - a.attrib["name"]=node.attrib["name"] + a = ET.SubElement(div, "a") + a.attrib["name"] = node.attrib["name"] - h3=ET.SubElement(a,"h3") - h3.attrib["class"]="title class_title" - h3.text=node.attrib["name"] + h3 = ET.SubElement(a, "h3") + h3.attrib["class"] = "title class_title" + h3.text = node.attrib["name"] briefd = node.find("brief_description") - if (briefd!=None): - div2=ET.SubElement(div,"div") - div2.attrib["class"]="description class_description" - div2.text=briefd.text + if (briefd != None): + div2 = ET.SubElement(div, "div") + div2.attrib["class"] = "description class_description" + div2.text = briefd.text if ("inherits" in node.attrib): - ET.SubElement(div,"br") + ET.SubElement(div, "br") - div2=ET.SubElement(div,"div") - div2.attrib["class"]="inheritance"; + div2 = ET.SubElement(div, "div") + div2.attrib["class"] = "inheritance"; - span=ET.SubElement(div2,"span") - span.text="Inherits: " + span = ET.SubElement(div2, "span") + span.text = "Inherits: " - make_type(node.attrib["inherits"],div2) + make_type(node.attrib["inherits"], div2) if ("category" in node.attrib): - ET.SubElement(div,"br") + ET.SubElement(div, "br") - div3=ET.SubElement(div,"div") - div3.attrib["class"]="category"; + div3 = ET.SubElement(div, "div") + div3.attrib["class"] = "category"; - span=ET.SubElement(div3,"span") - span.attrib["class"]="category" - span.text="Category: " + span = ET.SubElement(div3, "span") + span.attrib["class"] = "category" + span.text = "Category: " - a = ET.SubElement(div3,"a") - a.attrib["class"]="category_ref" - a.text=node.attrib["category"] - catname=a.text - if (catname.rfind("/")!=-1): - catname=catname[catname.rfind("/"):] - catname="CATEGORY_"+catname + a = ET.SubElement(div3, "a") + a.attrib["class"] = "category_ref" + a.text = node.attrib["category"] + catname = a.text + if (catname.rfind("/") != -1): + catname = catname[catname.rfind("/"):] + catname = "CATEGORY_" + catname if (single_page): - a.attrib["href"]="#"+catname + a.attrib["href"] = "#" + catname else: - a.attrib["href"]="category.html#"+catname + a.attrib["href"] = "category.html#" + catname methods = node.find("methods") - if(methods!=None and len(list(methods))>0): + if(methods != None and len(list(methods)) > 0): - h4=ET.SubElement(div,"h4") - h4.text="Public Methods:" + h4 = ET.SubElement(div, "h4") + h4.text = "Public Methods:" - method_table=ET.SubElement(div,"table") - method_table.attrib["class"]="method_list"; + method_table = ET.SubElement(div, "table") + method_table.attrib["class"] = "method_list"; for m in list(methods): #li = ET.SubElement(div2, "li") - method_table.append( make_method_def(node.attrib["name"],m,False) ) + method_table.append(make_method_def(node.attrib["name"], m, False)) events = node.find("signals") - if(events!=None and len(list(events))>0): - h4=ET.SubElement(div,"h4") - h4.text="Events:" + if(events != None and len(list(events)) > 0): + h4 = ET.SubElement(div, "h4") + h4.text = "Events:" - event_table=ET.SubElement(div,"table") - event_table.attrib["class"]="method_list"; + event_table = ET.SubElement(div, "table") + event_table.attrib["class"] = "method_list"; for m in list(events): #li = ET.SubElement(div2, "li") - event_table.append( make_method_def(node.attrib["name"],m,False,True) ) + event_table.append(make_method_def(node.attrib["name"], m, False, True)) members = node.find("members") - if(members!=None and len(list(members))>0): + if(members != None and len(list(members)) > 0): - h4=ET.SubElement(div,"h4") - h4.text="Public Variables:" - div2=ET.SubElement(div,"div") - div2.attrib["class"]="member_list"; + h4 = ET.SubElement(div, "h4") + h4.text = "Public Variables:" + div2 = ET.SubElement(div, "div") + div2.attrib["class"] = "member_list"; for c in list(members): li = ET.SubElement(div2, "li") - div3=ET.SubElement(li,"div") - div3.attrib["class"]="member"; - make_type(c.attrib["type"],div3) - span=ET.SubElement(div3,"span") - span.attrib["class"]="identifier member_name" - span.text=" "+c.attrib["name"]+" " - span=ET.SubElement(div3,"span") - span.attrib["class"]="member_description" - span.text=c.text + div3 = ET.SubElement(li, "div") + div3.attrib["class"] = "member"; + make_type(c.attrib["type"], div3) + span = ET.SubElement(div3, "span") + span.attrib["class"] = "identifier member_name" + span.text = " " + c.attrib["name"] + " " + span = ET.SubElement(div3, "span") + span.attrib["class"] = "member_description" + span.text = c.text constants = node.find("constants") - if(constants!=None and len(list(constants))>0): + if(constants != None and len(list(constants)) > 0): - h4=ET.SubElement(div,"h4") - h4.text="Constants:" - div2=ET.SubElement(div,"div") - div2.attrib["class"]="constant_list"; + h4 = ET.SubElement(div, "h4") + h4.text = "Constants:" + div2 = ET.SubElement(div, "div") + div2.attrib["class"] = "constant_list"; for c in list(constants): li = ET.SubElement(div2, "li") - div3=ET.SubElement(li,"div") - div3.attrib["class"]="constant"; + div3 = ET.SubElement(li, "div") + div3.attrib["class"] = "constant"; - span=ET.SubElement(div3,"span") - span.attrib["class"]="identifier constant_name" - span.text=c.attrib["name"]+" " + span = ET.SubElement(div3, "span") + span.attrib["class"] = "identifier constant_name" + span.text = c.attrib["name"] + " " if ("value" in c.attrib): - span=ET.SubElement(div3,"span") - span.attrib["class"]="symbol" - span.text="= " - span=ET.SubElement(div3,"span") - span.attrib["class"]="constant_value" - span.text=c.attrib["value"]+" " - span=ET.SubElement(div3,"span") - span.attrib["class"]="constant_description" - span.text=c.text + span = ET.SubElement(div3, "span") + span.attrib["class"] = "symbol" + span.text = "= " + span = ET.SubElement(div3, "span") + span.attrib["class"] = "constant_value" + span.text = c.attrib["value"] + " " + span = ET.SubElement(div3, "span") + span.attrib["class"] = "constant_description" + span.text = c.text # ET.SubElement(div,"br") - descr=node.find("description") - if (descr!=None and descr.text.strip()!=""): - h4=ET.SubElement(div,"h4") - h4.text="Description:" + descr = node.find("description") + if (descr != None and descr.text.strip() != ""): + h4 = ET.SubElement(div, "h4") + h4.text = "Description:" - make_text_def(node.attrib["name"],div,descr.text) + make_text_def(node.attrib["name"], div, descr.text) # div2=ET.SubElement(div,"div") # div2.attrib["class"]="description"; # div2.text=descr.text - if(methods!=None or events!=None): + if(methods != None or events != None): - h4=ET.SubElement(div,"h4") - h4.text="Method Documentation:" + h4 = ET.SubElement(div, "h4") + h4.text = "Method Documentation:" iter_list = [] - if (methods!=None): - iter_list+=list(methods) - if (events!=None): - iter_list+=list(events) + if (methods != None): + iter_list += list(methods) + if (events != None): + iter_list += list(events) for m in iter_list: - descr=m.find("description") + descr = m.find("description") - if (descr==None or descr.text.strip()==""): + if (descr == None or descr.text.strip() == ""): continue; - div2=ET.SubElement(div,"div") - div2.attrib["class"]="method_doc"; + div2 = ET.SubElement(div, "div") + div2.attrib["class"] = "method_doc"; - div2.append( make_method_def(node.attrib["name"],m,True) ) + div2.append(make_method_def(node.attrib["name"], m, True)) #anchor = ET.SubElement(div2, "a") - #anchor.attrib["name"] = - make_text_def(node.attrib["name"],div2,descr.text) - #div3=ET.SubElement(div2,"div") - #div3.attrib["class"]="description"; - #div3.text=descr.text + # anchor.attrib["name"] = + make_text_def(node.attrib["name"], div2, descr.text) + # div3=ET.SubElement(div2,"div") + # div3.attrib["class"]="description"; + # div3.text=descr.text return div -class_names=[] -classes={} +class_names = [] +classes = {} for file in input_list: tree = ET.parse(file) - doc=tree.getroot() + doc = tree.getroot() if ("version" not in doc.attrib): print("Version missing from 'doc'") sys.exit(255) - version=doc.attrib["version"] + version = doc.attrib["version"] for c in list(doc): if (c.attrib["name"] in class_names): continue class_names.append(c.attrib["name"]) - classes[c.attrib["name"]]=c + classes[c.attrib["name"]] = c html = ET.Element("html") css = ET.SubElement(html, "link") @@ -691,24 +691,24 @@ if (not single_page): class_names.sort() -body.append( make_html_class_list(class_names,5) ) +body.append(make_html_class_list(class_names, 5)) for cn in class_names: - c=classes[cn] + c = classes[cn] if (single_page): - body.append( make_html_class(c)) + body.append(make_html_class(c)) else: html2 = ET.Element("html") css = ET.SubElement(html2, "link") css.attrib["href"] = "main.css" css.attrib["rel"] = "stylesheet" css.attrib["type"] = "text/css" - body2 = ET.SubElement(html2, "body" ) + body2 = ET.SubElement(html2, "body") make_html_top(body2) - body2.append( make_html_class(c) ); + body2.append(make_html_class(c)); make_html_bottom(body2) et_out = ET.ElementTree(html2) - et_out.write(c.attrib["name"]+".html") + et_out.write(c.attrib["name"] + ".html") et_out = ET.ElementTree(html) diff --git a/doc/tools/makemd.py b/doc/tools/makemd.py index 2c9d6970397..0693291f35f 100644 --- a/doc/tools/makemd.py +++ b/doc/tools/makemd.py @@ -82,7 +82,7 @@ def make_class_list(class_list, columns): else: s += ' | ' - s += '[' + classname + '](class_'+ classname.lower()+') | ' + s += '[' + classname + '](class_' + classname.lower() + ') | ' s += '\n' f.write(s) @@ -126,14 +126,14 @@ def dokuize_text(text): if param.find('.') != -1: (class_param, method_param) = param.split('.') - tag_text = '['+class_param+'.'+method_param.replace("_","_")+'](' + class_param.lower() + '#' \ + tag_text = '[' + class_param + '.' + method_param.replace("_", "_") + '](' + class_param.lower() + '#' \ + method_param + ')' else: - tag_text = '[' + param.replace("_","_") + '](#' + param + ')' + tag_text = '[' + param.replace("_", "_") + '](#' + param + ')' elif cmd.find('image=') == 0: tag_text = '![](' + cmd[6:] + ')' elif cmd.find('url=') == 0: - tag_text = '[' + cmd[4:] + ']('+cmd[4:] + tag_text = '[' + cmd[4:] + '](' + cmd[4:] elif cmd == '/url': tag_text = ')' elif cmd == 'center': @@ -205,9 +205,9 @@ def make_method( # a.attrib["name"]=name+"_"+m.attrib["name"] # a.text=name+"::"+m.attrib["name"] - s += ' **'+m.attrib['name'].replace("_","_")+'** ' + s += ' **' + m.attrib['name'].replace("_", "_") + '** ' else: - s += ' **['+ m.attrib['name'].replace("_","_")+'](#' + m.attrib['name'] + ')** ' + s += ' **[' + m.attrib['name'].replace("_", "_") + '](#' + m.attrib['name'] + ')** ' s += ' **(**' argfound = False @@ -245,13 +245,13 @@ def make_doku_class(node): name = node.attrib['name'] - f = open("class_"+name.lower() + '.md', 'wb') + f = open("class_" + name.lower() + '.md', 'wb') f.write('# ' + name + ' \n') if 'inherits' in node.attrib: inh = node.attrib['inherits'].strip() - f.write('####**Inherits:** '+make_type(inh)+'\n') + f.write('####**Inherits:** ' + make_type(inh) + '\n') if 'category' in node.attrib: f.write('####**Category:** ' + node.attrib['category'].strip() + '\n') @@ -313,7 +313,7 @@ def make_doku_class(node): d = m.find('description') if d == None or d.text.strip() == '': continue - f.write('\n#### ' + m.attrib['name'] + '\n') + f.write('\n#### ' + m.attrib['name'] + '\n') make_method(f, node.attrib['name'], m, True) f.write('\n') f.write(dokuize_text(d.text.strip())) diff --git a/doc/tools/makerst.py b/doc/tools/makerst.py index 583f9afeaad..2f3b9c269d1 100644 --- a/doc/tools/makerst.py +++ b/doc/tools/makerst.py @@ -24,11 +24,11 @@ def validate_tag(elem, tag): class_names = [] classes = {} -def ul_string(str,ul): - str+="\n" - for i in range(len(str)-1): - str+=ul - str+="\n" +def ul_string(str, ul): + str += "\n" + for i in range(len(str) - 1): + str += ul + str += "\n" return str def make_class_list(class_list, columns): @@ -89,7 +89,7 @@ def make_class_list(class_list, columns): else: s += ' | ' - s += '[' + classname + '](class_'+ classname.lower()+') | ' + s += '[' + classname + '](class_' + classname.lower() + ') | ' s += '\n' f.write(s) @@ -99,7 +99,7 @@ def make_class_list(class_list, columns): f.write("\n") -def rstize_text(text,cclass): +def rstize_text(text, cclass): # Linebreak + tabs in the XML should become two line breaks unless in a "codeblock" pos = 0 @@ -109,9 +109,9 @@ def rstize_text(text,cclass): break pre_text = text[:pos] - while text[pos+1] == '\t': + while text[pos + 1] == '\t': pos += 1 - post_text = text[pos+1:] + post_text = text[pos + 1:] # Handle codeblocks if post_text.startswith("[codeblock]"): @@ -130,14 +130,14 @@ def rstize_text(text,cclass): break to_skip = 0 - while code_pos+to_skip+1 < len(code_text) and code_text[code_pos+to_skip+1] == '\t': + while code_pos + to_skip + 1 < len(code_text) and code_text[code_pos + to_skip + 1] == '\t': to_skip += 1 - if len(code_text[code_pos+to_skip+1:])==0: + if len(code_text[code_pos + to_skip + 1:]) == 0: code_text = code_text[:code_pos] + "\n" code_pos += 1 else: - code_text = code_text[:code_pos] + "\n " + code_text[code_pos+to_skip+1:] + code_text = code_text[:code_pos] + "\n " + code_text[code_pos + to_skip + 1:] code_pos += 5 - to_skip text = pre_text + "\n[codeblock]" + code_text + post_text @@ -163,7 +163,7 @@ def rstize_text(text,cclass): pos = text.find('_', pos) if pos == -1: break - if not text[pos + 1].isalnum(): # don't escape within a snake_case word + if not text[pos + 1].isalnum(): # don't escape within a snake_case word text = text[:pos] + "\_" + text[pos + 1:] pos += 2 else: @@ -186,7 +186,7 @@ def rstize_text(text,cclass): if tag_text in class_names: tag_text = make_type(tag_text) - else: # command + else: # command cmd = tag_text space_pos = tag_text.find(' ') if cmd.find('html') == 0: @@ -199,13 +199,13 @@ def rstize_text(text,cclass): if param.find('.') != -1: (class_param, method_param) = param.split('.') - tag_text = ':ref:`'+class_param+'.'+method_param+'`' + tag_text = ':ref:`' + class_param + '.' + method_param + '`' else: - tag_text = ':ref:`' + param + '`' + tag_text = ':ref:`' + param + '`' elif cmd.find('image=') == 0: - tag_text = "" #'![](' + cmd[6:] + ')' + tag_text = "" # '![](' + cmd[6:] + ')' elif cmd.find('url=') == 0: - tag_text = ':ref:`' + cmd[4:] + '<'+cmd[4:]+">`" + tag_text = ':ref:`' + cmd[4:] + '<' + cmd[4:] + ">`" elif cmd == '/url': tag_text = ')' elif cmd == 'center': @@ -234,7 +234,7 @@ def rstize_text(text,cclass): elif cmd == 'code' or cmd == '/code': tag_text = '``' else: - tag_text = ':ref:`' + tag_text + '`' + tag_text = ':ref:`' + tag_text + '`' text = pre_text + tag_text + post_text pos = len(pre_text) + len(tag_text) @@ -248,7 +248,7 @@ def rstize_text(text,cclass): def make_type(t): global class_names if t in class_names: - return ':ref:`'+t+'`' + return ':ref:`' + t + '`' return t @@ -262,7 +262,7 @@ def make_method( pp=None ): - if (declare or pp==None): + if (declare or pp == None): t = '- ' else: t = "" @@ -289,16 +289,16 @@ def make_method( t += 'void' t += ' ' - if declare or pp==None: + if declare or pp == None: # span.attrib["class"]="funcdecl" # a=ET.SubElement(span,"a") # a.attrib["name"]=name+"_"+m.attrib["name"] # a.text=name+"::"+m.attrib["name"] - s = ' **'+m.attrib['name']+'** ' + s = ' **' + m.attrib['name'] + '** ' else: - s = ':ref:`'+ m.attrib['name']+'` ' + s = ':ref:`' + m.attrib['name'] + '` ' s += ' **(**' argfound = False @@ -331,16 +331,16 @@ def make_method( # f.write(s) if (not declare): - if (pp!=None): - pp.append( (t,s) ) + if (pp != None): + pp.append((t, s)) else: - f.write("- "+t+" "+s+"\n") + f.write("- " + t + " " + s + "\n") else: - f.write(t+s+"\n") + f.write(t + s + "\n") def make_heading(title, underline): - return title + '\n' + underline*len(title) + "\n\n" + return title + '\n' + underline * len(title) + "\n\n" @@ -348,47 +348,47 @@ def make_rst_class(node): name = node.attrib['name'] - f = codecs.open("class_"+name.lower() + '.rst', 'wb', 'utf-8') + f = codecs.open("class_" + name.lower() + '.rst', 'wb', 'utf-8') # Warn contributors not to edit this file directly f.write(".. Generated automatically by doc/tools/makerst.py in Godot's source tree.\n") f.write(".. DO NOT EDIT THIS FILE, but the doc/base/classes.xml source instead.\n\n") - f.write(".. _class_"+name+":\n\n") + f.write(".. _class_" + name + ":\n\n") f.write(make_heading(name, '=')) if 'inherits' in node.attrib: inh = node.attrib['inherits'].strip() # whle inh in classes[cn] f.write('**Inherits:** ') - first=True + first = True while(inh in classes): if (not first): f.write(" **<** ") else: - first=False + first = False f.write(make_type(inh)) inode = classes[inh] if ('inherits' in inode.attrib): - inh=inode.attrib['inherits'].strip() + inh = inode.attrib['inherits'].strip() else: - inh=None + inh = None f.write("\n\n") - inherited=[] + inherited = [] for cn in classes: - c=classes[cn] + c = classes[cn] if 'inherits' in c.attrib: - if (c.attrib['inherits'].strip()==name): + if (c.attrib['inherits'].strip() == name): inherited.append(c.attrib['name']) if (len(inherited)): f.write('**Inherited By:** ') for i in range(len(inherited)): - if (i>0): + if (i > 0): f.write(", ") f.write(make_type(inherited[i])) f.write("\n\n") @@ -398,41 +398,41 @@ def make_rst_class(node): f.write(make_heading('Brief Description', '-')) briefd = node.find('brief_description') if briefd != None: - f.write(rstize_text(briefd.text.strip(),name) + "\n\n") + f.write(rstize_text(briefd.text.strip(), name) + "\n\n") methods = node.find('methods') if methods != None and len(list(methods)) > 0: f.write(make_heading('Member Functions', '-')) - ml=[] + ml = [] for m in list(methods): - make_method(f, node.attrib['name'], m, False,name,False,ml) + make_method(f, node.attrib['name'], m, False, name, False, ml) longest_t = 0 longest_s = 0 for s in ml: sl = len(s[0]) - if (sl>longest_s): - longest_s=sl + if (sl > longest_s): + longest_s = sl tl = len(s[1]) - if (tl>longest_t): - longest_t=tl + if (tl > longest_t): + longest_t = tl - sep="+" - for i in range(longest_s+2): - sep+="-" - sep+="+" - for i in range(longest_t+2): - sep+="-" - sep+="+\n" + sep = "+" + for i in range(longest_s + 2): + sep += "-" + sep += "+" + for i in range(longest_t + 2): + sep += "-" + sep += "+\n" f.write(sep) for s in ml: rt = s[0] - while( len(rt) < longest_s ): - rt+=" " + while(len(rt) < longest_s): + rt += " " st = s[1] - while( len(st) < longest_t ): - st+=" " - f.write("| "+rt+" | "+st+" |\n") + while(len(st) < longest_t): + st += " " + f.write("| " + rt + " | " + st + " |\n") f.write(sep) f.write('\n') @@ -441,7 +441,7 @@ def make_rst_class(node): if events != None and len(list(events)) > 0: f.write(make_heading('Signals', '-')) for m in list(events): - make_method(f, node.attrib['name'], m, True,name, True) + make_method(f, node.attrib['name'], m, True, name, True) f.write('\n') members = node.find('members') @@ -466,28 +466,28 @@ def make_rst_class(node): if 'value' in c.attrib: s += ' = **' + c.attrib['value'] + '**' if c.text.strip() != '': - s += ' --- ' + rstize_text(c.text.strip(),name) + s += ' --- ' + rstize_text(c.text.strip(), name) f.write(s + '\n') f.write('\n') descr = node.find('description') if descr != None and descr.text.strip() != '': f.write(make_heading('Description', '-')) - f.write(rstize_text(descr.text.strip(),name) + "\n\n") + f.write(rstize_text(descr.text.strip(), name) + "\n\n") methods = node.find('methods') if methods != None and len(list(methods)) > 0: f.write(make_heading('Member Function Description', '-')) for m in list(methods): - f.write(".. _class_"+name+"_"+m.attrib['name']+":\n\n") + f.write(".. _class_" + name + "_" + m.attrib['name'] + ":\n\n") # f.write(ul_string(m.attrib['name'],"^")) #f.write('\n' + m.attrib['name'] + '\n------\n') - make_method(f, node.attrib['name'], m, True,name) + make_method(f, node.attrib['name'], m, True, name) f.write('\n') d = m.find('description') if d == None or d.text.strip() == '': continue - f.write(rstize_text(d.text.strip(),name)) + f.write(rstize_text(d.text.strip(), name)) f.write("\n\n") f.write('\n') @@ -510,7 +510,7 @@ for file in input_list: class_names.sort() -#Don't make class list for Sphinx, :toctree: handles it +# Don't make class list for Sphinx, :toctree: handles it #make_class_list(class_names, 2) for cn in class_names: diff --git a/drivers/SCsub b/drivers/SCsub index 27871bb3370..4ff770c7d09 100644 --- a/drivers/SCsub +++ b/drivers/SCsub @@ -2,7 +2,7 @@ Import('env') -env.drivers_sources=[] +env.drivers_sources = [] if ("builtin_zlib" in env and env["builtin_zlib"] == "yes"): SConscript("zlib/SCsub"); @@ -28,15 +28,15 @@ SConscript("png/SCsub"); # Tools override # FIXME: Should likely be integrated in the tools/ codebase -if (env["tools"]=="yes"): +if (env["tools"] == "yes"): SConscript("convex_decomp/SCsub"); -if env['vsproj']=="yes": +if env['vsproj'] == "yes": env.AddToVSProject(env.drivers_sources) if env.split_drivers: env.split_lib("drivers") else: - env.add_source_files(env.drivers_sources,"*.cpp") - lib = env.Library("drivers",env.drivers_sources) + env.add_source_files(env.drivers_sources, "*.cpp") + lib = env.Library("drivers", env.drivers_sources) env.Prepend(LIBS=[lib]) diff --git a/drivers/gl_context/SCsub b/drivers/gl_context/SCsub index acdc3d52e85..efc925dae49 100644 --- a/drivers/gl_context/SCsub +++ b/drivers/gl_context/SCsub @@ -2,9 +2,9 @@ Import('env') -if (env["platform"] in ["haiku","osx","windows","x11"]): +if (env["platform"] in ["haiku", "osx", "windows", "x11"]): # Thirdparty source files - if (env["glew"] != "system"): # builtin + if (env["glew"] != "system"): # builtin thirdparty_dir = "#thirdparty/glew/" thirdparty_sources = [ "glew.c", @@ -12,10 +12,10 @@ if (env["platform"] in ["haiku","osx","windows","x11"]): thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env.add_source_files(env.drivers_sources, thirdparty_sources) - env.Append(CPPFLAGS = ['-DGLEW_STATIC']) - env.Append(CPPPATH = [thirdparty_dir]) + env.Append(CPPFLAGS=['-DGLEW_STATIC']) + env.Append(CPPPATH=[thirdparty_dir]) - env.Append(CPPFLAGS = ['-DGLEW_ENABLED']) + env.Append(CPPFLAGS=['-DGLEW_ENABLED']) # Godot source files env.add_source_files(env.drivers_sources, "*.cpp") diff --git a/drivers/png/SCsub b/drivers/png/SCsub index 6eabd8901dc..d3ece651961 100644 --- a/drivers/png/SCsub +++ b/drivers/png/SCsub @@ -27,12 +27,12 @@ if (env["libpng"] == "builtin"): thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_png.add_source_files(env.drivers_sources, thirdparty_sources) - env_png.Append(CPPPATH = [thirdparty_dir]) + env_png.Append(CPPPATH=[thirdparty_dir]) # Currently .ASM filter_neon.S does not compile on NT. import os - if ("neon_enabled" in env and env["neon_enabled"]) and os.name!="nt": - env_png.Append(CPPFLAGS = ["-DPNG_ARM_NEON_OPT=2"]) + if ("neon_enabled" in env and env["neon_enabled"]) and os.name != "nt": + env_png.Append(CPPFLAGS=["-DPNG_ARM_NEON_OPT=2"]) env_neon = env_png.Clone(); if "S_compiler" in env: env_neon['CC'] = env['S_compiler'] @@ -42,7 +42,7 @@ if (env["libpng"] == "builtin"): neon_sources.append(env_neon.Object(thirdparty_dir + "/arm/filter_neon.S")) env.drivers_sources += neon_sources else: - env_png.Append(CPPFLAGS = ["-DPNG_ARM_NEON_OPT=0"]) + env_png.Append(CPPFLAGS=["-DPNG_ARM_NEON_OPT=0"]) # Godot source files env_png.add_source_files(env.drivers_sources, "*.cpp") diff --git a/drivers/rtaudio/SCsub b/drivers/rtaudio/SCsub index 704c4b6740b..2b0a602965a 100644 --- a/drivers/rtaudio/SCsub +++ b/drivers/rtaudio/SCsub @@ -12,7 +12,7 @@ thirdparty_sources = [ thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env.add_source_files(env.drivers_sources, thirdparty_sources) -env.Append(CPPPATH = [thirdparty_dir]) +env.Append(CPPPATH=[thirdparty_dir]) # Driver source files env.add_source_files(env.drivers_sources, "*.cpp") diff --git a/drivers/unix/SCsub b/drivers/unix/SCsub index fe427e747fa..3766e782e41 100644 --- a/drivers/unix/SCsub +++ b/drivers/unix/SCsub @@ -2,13 +2,13 @@ Import('env') -g_set_p='#ifdef UNIX_ENABLED\n' -g_set_p+='#include "os_unix.h"\n' -g_set_p+='String OS_Unix::get_global_settings_path() const {\n' -g_set_p+='\treturn "' + env["unix_global_settings_path"]+'";\n' -g_set_p+='}\n' -g_set_p+='#endif' -f = open("os_unix_global_settings_path.cpp","wb") +g_set_p = '#ifdef UNIX_ENABLED\n' +g_set_p += '#include "os_unix.h"\n' +g_set_p += 'String OS_Unix::get_global_settings_path() const {\n' +g_set_p += '\treturn "' + env["unix_global_settings_path"] + '";\n' +g_set_p += '}\n' +g_set_p += '#endif' +f = open("os_unix_global_settings_path.cpp", "wb") f.write(g_set_p) f.close() diff --git a/drivers/zlib/SCsub b/drivers/zlib/SCsub index 928128e40ca..407deb5f6e1 100644 --- a/drivers/zlib/SCsub +++ b/drivers/zlib/SCsub @@ -23,4 +23,4 @@ thirdparty_sources = [ thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env.add_source_files(env.drivers_sources, thirdparty_sources) -env.Append(CPPPATH = [thirdparty_dir]) +env.Append(CPPPATH=[thirdparty_dir]) diff --git a/main/SCsub b/main/SCsub index cd9002de0a2..a83563f44da 100644 --- a/main/SCsub +++ b/main/SCsub @@ -2,11 +2,11 @@ Import('env') -env.main_sources=[] -env.add_source_files(env.main_sources,"*.cpp") +env.main_sources = [] +env.add_source_files(env.main_sources, "*.cpp") Export('env') -lib = env.Library("main",env.main_sources) +lib = env.Library("main", env.main_sources) env.Prepend(LIBS=[lib]) diff --git a/methods.py b/methods.py index 0ede914aec2..38dee11a353 100755 --- a/methods.py +++ b/methods.py @@ -1,231 +1,231 @@ import os -def add_source_files(self, sources, filetype, lib_env = None, shared = False): +def add_source_files(self, sources, filetype, lib_env=None, shared=False): import glob; import string; - #if not lib_objects: + # if not lib_objects: if not lib_env: lib_env = self if type(filetype) == type(""): dir = self.Dir('.').abspath - list = glob.glob(dir + "/"+filetype) + list = glob.glob(dir + "/" + filetype) for f in list: - sources.append( self.Object(f) ) + sources.append(self.Object(f)) else: for f in filetype: sources.append(self.Object(f)) -def build_shader_header( target, source, env ): +def build_shader_header(target, source, env): for x in source: print x name = str(x) - name = name[ name.rfind("/")+1: ] - name = name[ name.rfind("\\")+1: ] - name = name.replace(".","_") + name = name[name.rfind("/") + 1:] + name = name[name.rfind("\\") + 1:] + name = name.replace(".", "_") - fs = open(str(x),"r") - fd = open(str(x)+".h","w") + fs = open(str(x), "r") + fd = open(str(x) + ".h", "w") fd.write("/* this file has been generated by SCons, do not edit! */\n") - fd.write("static const char *"+name+"=\n") - line=fs.readline() + fd.write("static const char *" + name + "=\n") + line = fs.readline() while(line): - line=line.replace("\r","") - line=line.replace("\n","") - line=line.replace("\\","\\\\") - line=line.replace("\"","\\\"") - fd.write("\""+line+"\\n\"\n") - line=fs.readline() + line = line.replace("\r", "") + line = line.replace("\n", "") + line = line.replace("\\", "\\\\") + line = line.replace("\"", "\\\"") + fd.write("\"" + line + "\\n\"\n") + line = fs.readline() fd.write(";\n") return 0 -def build_glsl_header( filename ): +def build_glsl_header(filename): - fs = open(filename,"r") - line=fs.readline() + fs = open(filename, "r") + line = fs.readline() - vertex_lines=[] - fragment_lines=[] - uniforms=[] - attributes=[] - fbos=[] - conditionals=[] - texunits=[] - texunit_names=[] - ubos=[] - ubo_names=[] + vertex_lines = [] + fragment_lines = [] + uniforms = [] + attributes = [] + fbos = [] + conditionals = [] + texunits = [] + texunit_names = [] + ubos = [] + ubo_names = [] - reading="" - line_offset=0 - vertex_offset=0 - fragment_offset=0 + reading = "" + line_offset = 0 + vertex_offset = 0 + fragment_offset = 0 while(line): - if (line.find("[vertex]")!=-1): - reading="vertex" - line=fs.readline() - line_offset+=1 - vertex_offset=line_offset + if (line.find("[vertex]") != -1): + reading = "vertex" + line = fs.readline() + line_offset += 1 + vertex_offset = line_offset continue - if (line.find("[fragment]")!=-1): - reading="fragment" - line=fs.readline() - line_offset+=1 - fragment_offset=line_offset + if (line.find("[fragment]") != -1): + reading = "fragment" + line = fs.readline() + line_offset += 1 + fragment_offset = line_offset continue - if (line.find("#ifdef ")!=-1): - ifdefline = line.replace("#ifdef ","").strip() + if (line.find("#ifdef ") != -1): + ifdefline = line.replace("#ifdef ", "").strip() if (not ifdefline in conditionals): - conditionals+=[ifdefline] + conditionals += [ifdefline] - if (line.find("#elif defined(")!=-1): - ifdefline = line.replace("#elif defined(","").strip() - ifdefline = ifdefline.replace(")","").strip() + if (line.find("#elif defined(") != -1): + ifdefline = line.replace("#elif defined(", "").strip() + ifdefline = ifdefline.replace(")", "").strip() if (not ifdefline in conditionals): - conditionals+=[ifdefline] + conditionals += [ifdefline] import re if re.search(r"^\s*uniform", line): - if (line.lower().find("texunit:")!=-1): - #texture unit - texunit = str(int( line[line.find(":")+1:].strip() )) - uline=line[:line.lower().find("//")] - uline = uline.replace("uniform",""); - uline = uline.replace(";",""); + if (line.lower().find("texunit:") != -1): + # texture unit + texunit = str(int(line[line.find(":") + 1:].strip())) + uline = line[:line.lower().find("//")] + uline = uline.replace("uniform", ""); + uline = uline.replace(";", ""); lines = uline.split(",") for x in lines: x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x[x.rfind(" ") + 1:] + if (x.find("[") != -1): + # unfiorm array + x = x[:x.find("[")] if (not x in texunit_names): - texunits+=[(x,texunit)] - texunit_names+=[x] + texunits += [(x, texunit)] + texunit_names += [x] - elif (line.lower().find("ubo:")!=-1): - #ubo - uboidx = str(int( line[line.find(":")+1:].strip() )) - uline=line[:line.lower().find("//")] - uline = uline[uline.find("uniform")+len("uniform"):]; - uline = uline.replace(";",""); - uline = uline.replace("{",""); + elif (line.lower().find("ubo:") != -1): + # ubo + uboidx = str(int(line[line.find(":") + 1:].strip())) + uline = line[:line.lower().find("//")] + uline = uline[uline.find("uniform") + len("uniform"):]; + uline = uline.replace(";", ""); + uline = uline.replace("{", ""); lines = uline.split(",") for x in lines: x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x[x.rfind(" ") + 1:] + if (x.find("[") != -1): + # unfiorm array + x = x[:x.find("[")] if (not x in ubo_names): - ubos+=[(x,uboidx)] - ubo_names+=[x] + ubos += [(x, uboidx)] + ubo_names += [x] else: - uline = line.replace("uniform",""); - uline = uline.replace(";",""); + uline = line.replace("uniform", ""); + uline = uline.replace(";", ""); lines = uline.split(",") for x in lines: x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x[x.rfind(" ") + 1:] + if (x.find("[") != -1): + # unfiorm array + x = x[:x.find("[")] if (not x in uniforms): - uniforms+=[x] + uniforms += [x] - if ((line.strip().find("in ")==0 or line.strip().find("attribute ")==0) and line.find("attrib:")!=-1): - uline = line.replace("in ",""); - uline = uline.replace("attribute ",""); - uline = uline.replace(";",""); - uline = uline[ uline.find(" "): ].strip() + if ((line.strip().find("in ") == 0 or line.strip().find("attribute ") == 0) and line.find("attrib:") != -1): + uline = line.replace("in ", ""); + uline = uline.replace("attribute ", ""); + uline = uline.replace(";", ""); + uline = uline[uline.find(" "):].strip() - if (uline.find("//")!=-1): - name,bind = uline.split("//") - if (bind.find("attrib:")!=-1): - name=name.strip() - bind=bind.replace("attrib:","").strip() - attributes+=[(name,bind)] + if (uline.find("//") != -1): + name, bind = uline.split("//") + if (bind.find("attrib:") != -1): + name = name.strip() + bind = bind.replace("attrib:", "").strip() + attributes += [(name, bind)] - if (line.strip().find("out ")==0): - uline = line.replace("out","").strip(); - uline = uline.replace(";",""); - uline = uline[ uline.find(" "): ].strip() + if (line.strip().find("out ") == 0): + uline = line.replace("out", "").strip(); + uline = uline.replace(";", ""); + uline = uline[uline.find(" "):].strip() - if (uline.find("//")!=-1): - name,bind = uline.split("//") - if (bind.find("drawbuffer:")!=-1): - name=name.strip() - bind=bind.replace("drawbuffer:","").strip() - fbos+=[(name,bind)] + if (uline.find("//") != -1): + name, bind = uline.split("//") + if (bind.find("drawbuffer:") != -1): + name = name.strip() + bind = bind.replace("drawbuffer:", "").strip() + fbos += [(name, bind)] - line=line.replace("\r","") - line=line.replace("\n","") - line=line.replace("\\","\\\\") - line=line.replace("\"","\\\"") - #line=line+"\\n\\" no need to anymore + line = line.replace("\r", "") + line = line.replace("\n", "") + line = line.replace("\\", "\\\\") + line = line.replace("\"", "\\\"") + # line=line+"\\n\\" no need to anymore - if (reading=="vertex"): - vertex_lines+=[line] - if (reading=="fragment"): - fragment_lines+=[line] + if (reading == "vertex"): + vertex_lines += [line] + if (reading == "fragment"): + fragment_lines += [line] - line=fs.readline() - line_offset+=1 + line = fs.readline() + line_offset += 1 fs.close(); - out_file = filename+".h" - fd = open(out_file,"w") + out_file = filename + ".h" + fd = open(out_file, "w") fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); out_file_base = out_file - out_file_base = out_file_base[ out_file_base.rfind("/")+1: ] - out_file_base = out_file_base[ out_file_base.rfind("\\")+1: ] + out_file_base = out_file_base[out_file_base.rfind("/") + 1:] + out_file_base = out_file_base[out_file_base.rfind("\\") + 1:] # print("out file "+out_file+" base " +out_file_base) - out_file_ifdef = out_file_base.replace(".","_").upper() - fd.write("#ifndef "+out_file_ifdef+"\n") - fd.write("#define "+out_file_ifdef+"\n") + out_file_ifdef = out_file_base.replace(".", "_").upper() + fd.write("#ifndef " + out_file_ifdef + "\n") + fd.write("#define " + out_file_ifdef + "\n") - out_file_class = out_file_base.replace(".glsl.h","").title().replace("_","").replace(".","")+"ShaderGL"; + out_file_class = out_file_base.replace(".glsl.h", "").title().replace("_", "").replace(".", "") + "ShaderGL"; fd.write("\n\n"); fd.write("#include \"drivers/opengl/shader_gl.h\"\n\n\n"); - fd.write("class "+out_file_class+" : public ShaderGL {\n\n"); - fd.write("\t virtual String get_shader_name() const { return \""+out_file_class+"\"; }\n"); + fd.write("class " + out_file_class + " : public ShaderGL {\n\n"); + fd.write("\t virtual String get_shader_name() const { return \"" + out_file_class + "\"; }\n"); fd.write("public:\n\n"); if (len(conditionals)): fd.write("\tenum Conditionals {\n"); for x in conditionals: - fd.write("\t\t"+x+",\n"); + fd.write("\t\t" + x + ",\n"); fd.write("\t};\n\n"); if (len(uniforms)): fd.write("\tenum Uniforms {\n"); for x in uniforms: - fd.write("\t\t"+x.upper()+",\n"); + fd.write("\t\t" + x.upper() + ",\n"); fd.write("\t};\n\n"); fd.write("\t_FORCE_INLINE_ int get_uniform(Uniforms p_uniform) const { return _get_uniform(p_uniform); }\n\n"); @@ -337,7 +337,7 @@ def build_glsl_header( filename ): fd.write("\t\tstatic const char* _conditional_strings[]={\n") if (len(conditionals)): for x in conditionals: - fd.write("\t\t\t\"#define "+x+"\\n\",\n"); + fd.write("\t\t\t\"#define " + x + "\\n\",\n"); fd.write("\t\t};\n\n"); else: fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") @@ -347,7 +347,7 @@ def build_glsl_header( filename ): fd.write("\t\tstatic const char* _uniform_strings[]={\n") if (len(uniforms)): for x in uniforms: - fd.write("\t\t\t\""+x+"\",\n"); + fd.write("\t\t\t\"" + x + "\",\n"); fd.write("\t\t};\n\n"); else: fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") @@ -356,7 +356,7 @@ def build_glsl_header( filename ): fd.write("\t\tstatic AttributePair _attribute_pairs[]={\n") for x in attributes: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); fd.write("\t\t};\n\n"); else: fd.write("\t\tstatic AttributePair *_attribute_pairs=NULL;\n") @@ -366,7 +366,7 @@ def build_glsl_header( filename ): if (len(fbos)): fd.write("\t\tstatic FBOPair _fbo_pairs[]={\n") for x in fbos: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); fd.write("\t\t};\n\n"); else: fd.write("\t\tstatic FBOPair *_fbo_pairs=NULL;\n") @@ -374,7 +374,7 @@ def build_glsl_header( filename ): if (len(ubos)): fd.write("\t\tstatic UBOPair _ubo_pairs[]={\n") for x in ubos: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); fd.write("\t\t};\n\n"); else: fd.write("\t\tstatic UBOPair *_ubo_pairs=NULL;\n") @@ -382,27 +382,27 @@ def build_glsl_header( filename ): if (len(texunits)): fd.write("\t\tstatic TexUnitPair _texunit_pairs[]={\n") for x in texunits: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); fd.write("\t\t};\n\n"); else: fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n") fd.write("\t\tstatic const char* _vertex_code=\"\\\n") for x in vertex_lines: - fd.write("\t\t\t"+x+"\n"); + fd.write("\t\t\t" + x + "\n"); fd.write("\t\t\";\n\n"); - fd.write("\t\tstatic const int _vertex_code_start="+str(vertex_offset)+";\n") + fd.write("\t\tstatic const int _vertex_code_start=" + str(vertex_offset) + ";\n") fd.write("\t\tstatic const char* _fragment_code=\"\\\n") for x in fragment_lines: - fd.write("\t\t\t"+x+"\n"); + fd.write("\t\t\t" + x + "\n"); fd.write("\t\t\";\n\n"); - fd.write("\t\tstatic const int _fragment_code_start="+str(fragment_offset)+";\n") + fd.write("\t\tstatic const int _fragment_code_start=" + str(fragment_offset) + ";\n") - fd.write("\t\tsetup(_conditional_strings,"+str(len(conditionals))+",_uniform_strings,"+str(len(uniforms))+",_attribute_pairs,"+str(len(attributes))+",_fbo_pairs,"+str(len(fbos))+",_ubo_pairs,"+str(len(ubos))+",_texunit_pairs,"+str(len(texunits))+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") + fd.write("\t\tsetup(_conditional_strings," + str(len(conditionals)) + ",_uniform_strings," + str(len(uniforms)) + ",_attribute_pairs," + str(len(attributes)) + ",_fbo_pairs," + str(len(fbos)) + ",_ubo_pairs," + str(len(ubos)) + ",_texunit_pairs," + str(len(texunits)) + ",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") fd.write("\t};\n\n") fd.write("};\n\n"); @@ -410,7 +410,7 @@ def build_glsl_header( filename ): fd.close(); -def build_glsl_headers( target, source, env ): +def build_glsl_headers(target, source, env): for x in source: @@ -421,110 +421,110 @@ def build_glsl_headers( target, source, env ): -def build_hlsl_dx9_header( filename ): +def build_hlsl_dx9_header(filename): - fs = open(filename,"r") - line=fs.readline() + fs = open(filename, "r") + line = fs.readline() - vertex_lines=[] - fragment_lines=[] - uniforms=[] - fragment_uniforms=[] - attributes=[] - fbos=[] - conditionals=[] + vertex_lines = [] + fragment_lines = [] + uniforms = [] + fragment_uniforms = [] + attributes = [] + fbos = [] + conditionals = [] - reading="" - line_offset=0 - vertex_offset=0 - fragment_offset=0 + reading = "" + line_offset = 0 + vertex_offset = 0 + fragment_offset = 0 while(line): - if (line.find("[vertex]")!=-1): - reading="vertex" - line=fs.readline() - line_offset+=1 - vertex_offset=line_offset + if (line.find("[vertex]") != -1): + reading = "vertex" + line = fs.readline() + line_offset += 1 + vertex_offset = line_offset continue - if (line.find("[fragment]")!=-1): - reading="fragment" - line=fs.readline() - line_offset+=1 - fragment_offset=line_offset + if (line.find("[fragment]") != -1): + reading = "fragment" + line = fs.readline() + line_offset += 1 + fragment_offset = line_offset continue - if (line.find("#ifdef ")!=-1): - ifdefline = line.replace("#ifdef ","").strip() + if (line.find("#ifdef ") != -1): + ifdefline = line.replace("#ifdef ", "").strip() if (not ifdefline in conditionals): - conditionals+=[ifdefline] + conditionals += [ifdefline] - if (line.find("#elif defined(")!=-1): - ifdefline = line.replace("#elif defined(","").strip() - ifdefline = ifdefline.replace(")","").strip() + if (line.find("#elif defined(") != -1): + ifdefline = line.replace("#elif defined(", "").strip() + ifdefline = ifdefline.replace(")", "").strip() if (not ifdefline in conditionals): - conditionals+=[ifdefline] - if (line.find("uniform")!=-1): - uline = line.replace("uniform",""); - uline = uline.replace(";",""); + conditionals += [ifdefline] + if (line.find("uniform") != -1): + uline = line.replace("uniform", ""); + uline = uline.replace(";", ""); lines = uline.split(",") for x in lines: x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x[x.rfind(" ") + 1:] + if (x.find("[") != -1): + # unfiorm array + x = x[:x.find("[")] if (not x in uniforms): - uniforms+=[x] - fragment_uniforms+=[reading=="fragment"] - line=line.replace("\r","") - line=line.replace("\n","") - line=line.replace("\\","\\\\") - line=line.replace("\"","\\\"") - line=line+"\\n\\" + uniforms += [x] + fragment_uniforms += [reading == "fragment"] + line = line.replace("\r", "") + line = line.replace("\n", "") + line = line.replace("\\", "\\\\") + line = line.replace("\"", "\\\"") + line = line + "\\n\\" - if (reading=="vertex"): - vertex_lines+=[line] - if (reading=="fragment"): - fragment_lines+=[line] + if (reading == "vertex"): + vertex_lines += [line] + if (reading == "fragment"): + fragment_lines += [line] - line=fs.readline() - line_offset+=1 + line = fs.readline() + line_offset += 1 fs.close(); - out_file = filename+".h" - fd = open(out_file,"w") + out_file = filename + ".h" + fd = open(out_file, "w") fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); out_file_base = out_file - out_file_base = out_file_base[ out_file_base.rfind("/")+1: ] - out_file_base = out_file_base[ out_file_base.rfind("\\")+1: ] + out_file_base = out_file_base[out_file_base.rfind("/") + 1:] + out_file_base = out_file_base[out_file_base.rfind("\\") + 1:] # print("out file "+out_file+" base " +out_file_base) - out_file_ifdef = out_file_base.replace(".","_").upper() - fd.write("#ifndef "+out_file_ifdef+"\n") - fd.write("#define "+out_file_ifdef+"\n") + out_file_ifdef = out_file_base.replace(".", "_").upper() + fd.write("#ifndef " + out_file_ifdef + "\n") + fd.write("#define " + out_file_ifdef + "\n") - out_file_class = out_file_base.replace(".hlsl.h","").title().replace("_","").replace(".","")+"ShaderDX9"; + out_file_class = out_file_base.replace(".hlsl.h", "").title().replace("_", "").replace(".", "") + "ShaderDX9"; fd.write("\n\n"); fd.write("#include \"drivers/directx9/shader_dx9.h\"\n\n\n"); - fd.write("class "+out_file_class+" : public ShaderDX9 {\n\n"); - fd.write("\t virtual String get_shader_name() const { return \""+out_file_class+"\"; }\n"); + fd.write("class " + out_file_class + " : public ShaderDX9 {\n\n"); + fd.write("\t virtual String get_shader_name() const { return \"" + out_file_class + "\"; }\n"); fd.write("public:\n\n"); if (len(conditionals)): fd.write("\tenum Conditionals {\n"); for x in conditionals: - fd.write("\t\t"+x+",\n"); + fd.write("\t\t" + x + ",\n"); fd.write("\t};\n\n"); if (len(uniforms)): fd.write("\tenum Uniforms {\n"); for x in uniforms: - fd.write("\t\t"+x.upper()+",\n"); + fd.write("\t\t" + x.upper() + ",\n"); fd.write("\t};\n\n"); @@ -604,7 +604,7 @@ def build_hlsl_dx9_header( filename ): fd.write("\t\tstatic const char* _conditional_strings[]={\n") if (len(conditionals)): for x in conditionals: - fd.write("\t\t\t\""+x+"\",\n"); + fd.write("\t\t\t\"" + x + "\",\n"); fd.write("\t\t};\n\n"); else: fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") @@ -614,7 +614,7 @@ def build_hlsl_dx9_header( filename ): fd.write("\t\tstatic const char* _uniform_strings[]={\n") if (len(uniforms)): for x in uniforms: - fd.write("\t\t\t\""+x+"\",\n"); + fd.write("\t\t\t\"" + x + "\",\n"); fd.write("\t\t};\n\n"); fd.write("\t\tstatic const bool _fragment_uniforms[]={\n") @@ -633,20 +633,20 @@ def build_hlsl_dx9_header( filename ): fd.write("\t\tstatic const char* _vertex_code=\"\\\n") for x in vertex_lines: - fd.write("\t\t\t"+x+"\n"); + fd.write("\t\t\t" + x + "\n"); fd.write("\t\t\";\n\n"); - fd.write("\t\tstatic const int _vertex_code_start="+str(vertex_offset)+";\n") + fd.write("\t\tstatic const int _vertex_code_start=" + str(vertex_offset) + ";\n") fd.write("\t\tstatic const char* _fragment_code=\"\\\n") for x in fragment_lines: - fd.write("\t\t\t"+x+"\n"); + fd.write("\t\t\t" + x + "\n"); fd.write("\t\t\";\n\n"); - fd.write("\t\tstatic const int _fragment_code_start="+str(fragment_offset)+";\n") + fd.write("\t\tstatic const int _fragment_code_start=" + str(fragment_offset) + ";\n") - fd.write("\t\tsetup(p_device,p_version,_conditional_strings,"+str(len(conditionals))+",_uniform_strings,"+str(len(uniforms))+",_fragment_uniforms,_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") + fd.write("\t\tsetup(p_device,p_version,_conditional_strings," + str(len(conditionals)) + ",_uniform_strings," + str(len(uniforms)) + ",_fragment_uniforms,_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") fd.write("\t};\n\n") fd.write("};\n\n"); @@ -654,7 +654,7 @@ def build_hlsl_dx9_header( filename ): fd.close(); -def build_hlsl_dx9_headers( target, source, env ): +def build_hlsl_dx9_headers(target, source, env): for x in source: @@ -666,154 +666,154 @@ def build_hlsl_dx9_headers( target, source, env ): class LegacyGLHeaderStruct: def __init__(self): - self.vertex_lines=[] - self.fragment_lines=[] - self.uniforms=[] - self.attributes=[] - self.fbos=[] - self.conditionals=[] - self.enums={} - self.texunits=[] - self.texunit_names=[] - self.ubos=[] - self.ubo_names=[] + self.vertex_lines = [] + self.fragment_lines = [] + self.uniforms = [] + self.attributes = [] + self.fbos = [] + self.conditionals = [] + self.enums = {} + self.texunits = [] + self.texunit_names = [] + self.ubos = [] + self.ubo_names = [] - self.vertex_included_files=[] - self.fragment_included_files=[] + self.vertex_included_files = [] + self.fragment_included_files = [] - self.reading="" - self.line_offset=0 - self.vertex_offset=0 - self.fragment_offset=0 + self.reading = "" + self.line_offset = 0 + self.vertex_offset = 0 + self.fragment_offset = 0 -def include_file_in_legacygl_header( filename, header_data, depth ): - fs = open(filename,"r") - line=fs.readline() +def include_file_in_legacygl_header(filename, header_data, depth): + fs = open(filename, "r") + line = fs.readline() while(line): - if (line.find("[vertex]")!=-1): - header_data.reading="vertex" - line=fs.readline() - header_data.line_offset+=1 - header_data.vertex_offset=header_data.line_offset + if (line.find("[vertex]") != -1): + header_data.reading = "vertex" + line = fs.readline() + header_data.line_offset += 1 + header_data.vertex_offset = header_data.line_offset continue - if (line.find("[fragment]")!=-1): - header_data.reading="fragment" - line=fs.readline() - header_data.line_offset+=1 - header_data.fragment_offset=header_data.line_offset + if (line.find("[fragment]") != -1): + header_data.reading = "fragment" + line = fs.readline() + header_data.line_offset += 1 + header_data.fragment_offset = header_data.line_offset continue - while(line.find("#include ")!=-1): - includeline = line.replace("#include ","").strip()[1:-1] + while(line.find("#include ") != -1): + includeline = line.replace("#include ", "").strip()[1:-1] import os.path included_file = os.path.relpath(os.path.dirname(filename) + "/" + includeline) - if (not included_file in header_data.vertex_included_files and header_data.reading=="vertex"): - header_data.vertex_included_files+=[included_file] - if(include_file_in_legacygl_header( included_file, header_data, depth + 1 ) == None): + if (not included_file in header_data.vertex_included_files and header_data.reading == "vertex"): + header_data.vertex_included_files += [included_file] + if(include_file_in_legacygl_header(included_file, header_data, depth + 1) == None): print "Error in file '" + filename + "': #include " + includeline + "could not be found!" - elif (not included_file in header_data.fragment_included_files and header_data.reading=="fragment"): - header_data.fragment_included_files+=[included_file] - if(include_file_in_legacygl_header( included_file, header_data, depth + 1 ) == None): + elif (not included_file in header_data.fragment_included_files and header_data.reading == "fragment"): + header_data.fragment_included_files += [included_file] + if(include_file_in_legacygl_header(included_file, header_data, depth + 1) == None): print "Error in file '" + filename + "': #include " + includeline + "could not be found!" - line=fs.readline() + line = fs.readline() - if (line.find("#ifdef ")!=-1 or line.find("#elif defined(")!=-1): - if (line.find("#ifdef ")!=-1): - ifdefline = line.replace("#ifdef ","").strip() + if (line.find("#ifdef ") != -1 or line.find("#elif defined(") != -1): + if (line.find("#ifdef ") != -1): + ifdefline = line.replace("#ifdef ", "").strip() else: - ifdefline = line.replace("#elif defined(","").strip() - ifdefline = ifdefline.replace(")","").strip() + ifdefline = line.replace("#elif defined(", "").strip() + ifdefline = ifdefline.replace(")", "").strip() - if (line.find("_EN_")!=-1): + if (line.find("_EN_") != -1): enumbase = ifdefline[:ifdefline.find("_EN_")]; - ifdefline = ifdefline.replace("_EN_","_") - line = line.replace("_EN_","_") + ifdefline = ifdefline.replace("_EN_", "_") + line = line.replace("_EN_", "_") # print(enumbase+":"+ifdefline); if (enumbase not in header_data.enums): - header_data.enums[enumbase]=[] + header_data.enums[enumbase] = [] if (ifdefline not in header_data.enums[enumbase]): header_data.enums[enumbase].append(ifdefline); elif (not ifdefline in header_data.conditionals): - header_data.conditionals+=[ifdefline] + header_data.conditionals += [ifdefline] - if (line.find("uniform")!=-1 and line.lower().find("texunit:")!=-1): - #texture unit - texunitstr = line[line.find(":")+1:].strip() - if (texunitstr=="auto"): - texunit="-1" + if (line.find("uniform") != -1 and line.lower().find("texunit:") != -1): + # texture unit + texunitstr = line[line.find(":") + 1:].strip() + if (texunitstr == "auto"): + texunit = "-1" else: - texunit = str(int(texunitstr )) - uline=line[:line.lower().find("//")] - uline = uline.replace("uniform",""); - uline = uline.replace("highp",""); - uline = uline.replace(";",""); + texunit = str(int(texunitstr)) + uline = line[:line.lower().find("//")] + uline = uline.replace("uniform", ""); + uline = uline.replace("highp", ""); + uline = uline.replace(";", ""); lines = uline.split(",") for x in lines: x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x[x.rfind(" ") + 1:] + if (x.find("[") != -1): + # unfiorm array + x = x[:x.find("[")] if (not x in header_data.texunit_names): - header_data.texunits+=[(x,texunit)] - header_data.texunit_names+=[x] + header_data.texunits += [(x, texunit)] + header_data.texunit_names += [x] - elif (line.find("uniform")!=-1): - uline = line.replace("uniform",""); - uline = uline.replace(";",""); + elif (line.find("uniform") != -1): + uline = line.replace("uniform", ""); + uline = uline.replace(";", ""); lines = uline.split(",") for x in lines: x = x.strip() - x = x[ x.rfind(" ")+1: ] - if (x.find("[")!=-1): - #unfiorm array - x = x[ :x.find("[") ] + x = x[x.rfind(" ") + 1:] + if (x.find("[") != -1): + # unfiorm array + x = x[:x.find("[")] if (not x in header_data.uniforms): - header_data.uniforms+=[x] + header_data.uniforms += [x] - if ((line.strip().find("in ")==0 or line.strip().find("attribute ")==0) and line.find("attrib:")!=-1): - uline = line.replace("in ",""); - uline = uline.replace("attribute ",""); - uline = uline.replace("highp ",""); - uline = uline.replace(";",""); - uline = uline[ uline.find(" "): ].strip() + if ((line.strip().find("in ") == 0 or line.strip().find("attribute ") == 0) and line.find("attrib:") != -1): + uline = line.replace("in ", ""); + uline = uline.replace("attribute ", ""); + uline = uline.replace("highp ", ""); + uline = uline.replace(";", ""); + uline = uline[uline.find(" "):].strip() - if (uline.find("//")!=-1): - name,bind = uline.split("//") - if (bind.find("attrib:")!=-1): - name=name.strip() - bind=bind.replace("attrib:","").strip() - header_data.attributes+=[(name,bind)] + if (uline.find("//") != -1): + name, bind = uline.split("//") + if (bind.find("attrib:") != -1): + name = name.strip() + bind = bind.replace("attrib:", "").strip() + header_data.attributes += [(name, bind)] - line=line.replace("\r","") - line=line.replace("\n","") - #line=line.replace("\\","\\\\") - #line=line.replace("\"","\\\"") - #line=line+"\\n\\" + line = line.replace("\r", "") + line = line.replace("\n", "") + # line=line.replace("\\","\\\\") + # line=line.replace("\"","\\\"") + # line=line+"\\n\\" - if (header_data.reading=="vertex"): - header_data.vertex_lines+=[line] - if (header_data.reading=="fragment"): - header_data.fragment_lines+=[line] + if (header_data.reading == "vertex"): + header_data.vertex_lines += [line] + if (header_data.reading == "fragment"): + header_data.fragment_lines += [line] - line=fs.readline() - header_data.line_offset+=1 + line = fs.readline() + header_data.line_offset += 1 fs.close(); @@ -821,31 +821,31 @@ def include_file_in_legacygl_header( filename, header_data, depth ): -def build_legacygl_header( filename, include, class_suffix, output_attribs ): +def build_legacygl_header(filename, include, class_suffix, output_attribs): header_data = LegacyGLHeaderStruct() - include_file_in_legacygl_header( filename, header_data, 0 ) + include_file_in_legacygl_header(filename, header_data, 0) - out_file = filename+".h" - fd = open(out_file,"w") + out_file = filename + ".h" + fd = open(out_file, "w") - enum_constants=[] + enum_constants = [] fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); out_file_base = out_file - out_file_base = out_file_base[ out_file_base.rfind("/")+1: ] - out_file_base = out_file_base[ out_file_base.rfind("\\")+1: ] + out_file_base = out_file_base[out_file_base.rfind("/") + 1:] + out_file_base = out_file_base[out_file_base.rfind("\\") + 1:] # print("out file "+out_file+" base " +out_file_base) - out_file_ifdef = out_file_base.replace(".","_").upper() - fd.write("#ifndef "+out_file_ifdef+class_suffix+"_120\n") - fd.write("#define "+out_file_ifdef+class_suffix+"_120\n") + out_file_ifdef = out_file_base.replace(".", "_").upper() + fd.write("#ifndef " + out_file_ifdef + class_suffix + "_120\n") + fd.write("#define " + out_file_ifdef + class_suffix + "_120\n") - out_file_class = out_file_base.replace(".glsl.h","").title().replace("_","").replace(".","")+"Shader"+class_suffix; + out_file_class = out_file_base.replace(".glsl.h", "").title().replace("_", "").replace(".", "") + "Shader" + class_suffix; fd.write("\n\n"); fd.write("#include \"" + include + "\"\n\n\n"); - fd.write("class "+out_file_class+" : public Shader"+class_suffix+" {\n\n"); - fd.write("\t virtual String get_shader_name() const { return \""+out_file_class+"\"; }\n"); + fd.write("class " + out_file_class + " : public Shader" + class_suffix + " {\n\n"); + fd.write("\t virtual String get_shader_name() const { return \"" + out_file_class + "\"; }\n"); fd.write("public:\n\n"); @@ -853,14 +853,14 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): if (len(header_data.conditionals)): fd.write("\tenum Conditionals {\n"); for x in header_data.conditionals: - fd.write("\t\t"+x.upper()+",\n"); + fd.write("\t\t" + x.upper() + ",\n"); fd.write("\t};\n\n"); if (len(header_data.uniforms)): fd.write("\tenum Uniforms {\n"); for x in header_data.uniforms: - fd.write("\t\t"+x.upper()+",\n"); + fd.write("\t\t" + x.upper() + ",\n"); fd.write("\t};\n\n"); fd.write("\t_FORCE_INLINE_ int get_uniform(Uniforms p_uniform) const { return _get_uniform(p_uniform); }\n\n"); @@ -969,47 +969,47 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): fd.write("\tvirtual void init() {\n\n"); - enum_value_count=0; + enum_value_count = 0; if (len(header_data.enums)): fd.write("\t\t//Written using math, given nonstandarity of 64 bits integer constants..\n"); fd.write("\t\tstatic const Enum _enums[]={\n") - bitofs=len(header_data.conditionals) - enum_vals=[] + bitofs = len(header_data.conditionals) + enum_vals = [] for xv in header_data.enums: - x=header_data.enums[xv] - bits=1 + x = header_data.enums[xv] + bits = 1 amt = len(x); # print(x) while(2**bits < amt): - bits+=1 + bits += 1 # print("amount: "+str(amt)+" bits "+str(bits)); - strs="{" + strs = "{" for i in range(amt): - strs+="\"#define "+x[i]+"\\n\"," + strs += "\"#define " + x[i] + "\\n\"," - v={} - v["set_mask"]="uint64_t("+str(i)+")<<"+str(bitofs) - v["clear_mask"]="((uint64_t(1)<<40)-1) ^ (((uint64_t(1)<<"+str(bits)+") - 1)<<"+str(bitofs)+")" + v = {} + v["set_mask"] = "uint64_t(" + str(i) + ")<<" + str(bitofs) + v["clear_mask"] = "((uint64_t(1)<<40)-1) ^ (((uint64_t(1)<<" + str(bits) + ") - 1)<<" + str(bitofs) + ")" enum_vals.append(v) enum_constants.append(x[i]) - strs+="NULL}" + strs += "NULL}" - fd.write("\t\t\t{(uint64_t(1<<"+str(bits)+")-1)<<"+str(bitofs)+","+str(bitofs)+","+strs+"},\n"); - bitofs+=bits + fd.write("\t\t\t{(uint64_t(1<<" + str(bits) + ")-1)<<" + str(bitofs) + "," + str(bitofs) + "," + strs + "},\n"); + bitofs += bits fd.write("\t\t};\n\n"); fd.write("\t\tstatic const EnumValue _enum_values[]={\n") - enum_value_count=len(enum_vals); + enum_value_count = len(enum_vals); for x in enum_vals: - fd.write("\t\t\t{"+x["set_mask"]+","+x["clear_mask"]+"},\n"); + fd.write("\t\t\t{" + x["set_mask"] + "," + x["clear_mask"] + "},\n"); fd.write("\t\t};\n\n"); else: @@ -1021,7 +1021,7 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): fd.write("\t\tstatic const char* _conditional_strings[]={\n") if (len(header_data.conditionals)): for x in header_data.conditionals: - fd.write("\t\t\t\"#define "+x+"\\n\",\n"); + fd.write("\t\t\t\"#define " + x + "\\n\",\n"); fd.write("\t\t};\n\n"); else: fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") @@ -1031,7 +1031,7 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): fd.write("\t\tstatic const char* _uniform_strings[]={\n") if (len(header_data.uniforms)): for x in header_data.uniforms: - fd.write("\t\t\t\""+x+"\",\n"); + fd.write("\t\t\t\"" + x + "\",\n"); fd.write("\t\t};\n\n"); else: fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") @@ -1041,7 +1041,7 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): fd.write("\t\tstatic AttributePair _attribute_pairs[]={\n") for x in header_data.attributes: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); fd.write("\t\t};\n\n"); else: fd.write("\t\tstatic AttributePair *_attribute_pairs=NULL;\n") @@ -1050,7 +1050,7 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): if (len(header_data.texunits)): fd.write("\t\tstatic TexUnitPair _texunit_pairs[]={\n") for x in header_data.texunits: - fd.write("\t\t\t{\""+x[0]+"\","+x[1]+"},\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); fd.write("\t\t};\n\n"); else: fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n") @@ -1058,28 +1058,28 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): fd.write("\t\tstatic const char _vertex_code[]={\n") for x in header_data.vertex_lines: for i in range(len(x)): - fd.write(str(ord(x[i]))+","); + fd.write(str(ord(x[i])) + ","); - fd.write(str(ord('\n'))+","); + fd.write(str(ord('\n')) + ","); fd.write("\t\t0};\n\n"); - fd.write("\t\tstatic const int _vertex_code_start="+str(header_data.vertex_offset)+";\n") + fd.write("\t\tstatic const int _vertex_code_start=" + str(header_data.vertex_offset) + ";\n") fd.write("\t\tstatic const char _fragment_code[]={\n") for x in header_data.fragment_lines: for i in range(len(x)): - fd.write(str(ord(x[i]))+","); + fd.write(str(ord(x[i])) + ","); - fd.write(str(ord('\n'))+","); + fd.write(str(ord('\n')) + ","); fd.write("\t\t0};\n\n"); - fd.write("\t\tstatic const int _fragment_code_start="+str(header_data.fragment_offset)+";\n") + fd.write("\t\tstatic const int _fragment_code_start=" + str(header_data.fragment_offset) + ";\n") if output_attribs: - fd.write("\t\tsetup(_conditional_strings,"+str(len(header_data.conditionals))+",_uniform_strings,"+str(len(header_data.uniforms))+",_attribute_pairs,"+str(len(header_data.attributes))+", _texunit_pairs,"+str(len(header_data.texunits))+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") + fd.write("\t\tsetup(_conditional_strings," + str(len(header_data.conditionals)) + ",_uniform_strings," + str(len(header_data.uniforms)) + ",_attribute_pairs," + str(len(header_data.attributes)) + ", _texunit_pairs," + str(len(header_data.texunits)) + ",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") else: - fd.write("\t\tsetup(_conditional_strings,"+str(len(header_data.conditionals))+",_uniform_strings,"+str(len(header_data.uniforms))+",_texunit_pairs,"+str(len(header_data.texunits))+",_enums,"+str(len(header_data.enums))+",_enum_values,"+str(enum_value_count)+",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") + fd.write("\t\tsetup(_conditional_strings," + str(len(header_data.conditionals)) + ",_uniform_strings," + str(len(header_data.uniforms)) + ",_texunit_pairs," + str(len(header_data.texunits)) + ",_enums," + str(len(header_data.enums)) + ",_enum_values," + str(enum_value_count) + ",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") fd.write("\t};\n\n") @@ -1088,7 +1088,7 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): fd.write("\tenum EnumConditionals {\n") for x in enum_constants: - fd.write("\t\t"+x.upper()+",\n"); + fd.write("\t\t" + x.upper() + ",\n"); fd.write("\t};\n\n"); fd.write("\tvoid set_enum_conditional(EnumConditionals p_cond) { _set_enum_conditional(p_cond); }\n") @@ -1098,47 +1098,47 @@ def build_legacygl_header( filename, include, class_suffix, output_attribs ): fd.close(); -def build_legacygl_headers( target, source, env ): +def build_legacygl_headers(target, source, env): for x in source: - build_legacygl_header(str(x), include = "drivers/legacygl/shader_lgl.h", class_suffix = "LGL", output_attribs = False); + build_legacygl_header(str(x), include="drivers/legacygl/shader_lgl.h", class_suffix="LGL", output_attribs=False); return 0 -def build_gles2_headers( target, source, env ): +def build_gles2_headers(target, source, env): for x in source: - build_legacygl_header(str(x), include="drivers/gles2/shader_gles2.h", class_suffix = "GLES2", output_attribs = True) + build_legacygl_header(str(x), include="drivers/gles2/shader_gles2.h", class_suffix="GLES2", output_attribs=True) def update_version(): rev = "custom_build" - if (os.getenv("BUILD_REVISION")!=None): - rev=os.getenv("BUILD_REVISION") - print("Using custom revision: "+rev) + if (os.getenv("BUILD_REVISION") != None): + rev = os.getenv("BUILD_REVISION") + print("Using custom revision: " + rev) import version - f=open("core/version.h","wb") - f.write("#define VERSION_SHORT_NAME "+str(version.short_name)+"\n") - f.write("#define VERSION_NAME "+str(version.name)+"\n") - f.write("#define VERSION_MAJOR "+str(version.major)+"\n") - f.write("#define VERSION_MINOR "+str(version.minor)+"\n") + f = open("core/version.h", "wb") + f.write("#define VERSION_SHORT_NAME " + str(version.short_name) + "\n") + f.write("#define VERSION_NAME " + str(version.name) + "\n") + f.write("#define VERSION_MAJOR " + str(version.major) + "\n") + f.write("#define VERSION_MINOR " + str(version.minor) + "\n") if (hasattr(version, 'patch')): - f.write("#define VERSION_PATCH "+str(version.patch)+"\n") - f.write("#define VERSION_REVISION "+str(rev)+"\n") - f.write("#define VERSION_STATUS "+str(version.status)+"\n") + f.write("#define VERSION_PATCH " + str(version.patch) + "\n") + f.write("#define VERSION_REVISION " + str(rev) + "\n") + f.write("#define VERSION_STATUS " + str(version.status) + "\n") import datetime - f.write("#define VERSION_YEAR "+str(datetime.datetime.now().year)+"\n") + f.write("#define VERSION_YEAR " + str(datetime.datetime.now().year) + "\n") def parse_cg_file(fname, uniforms, sizes, conditionals): import re fs = open(fname, "r") - line=fs.readline() + line = fs.readline() while line: @@ -1172,15 +1172,15 @@ def build_cg_shader(sname): vp_uniform_sizes = [] vp_conditionals = [] - parse_cg_file("vp_"+sname+".cg", vp_uniforms, vp_uniform_sizes, vp_conditionals); + parse_cg_file("vp_" + sname + ".cg", vp_uniforms, vp_uniform_sizes, vp_conditionals); fp_uniforms = [] fp_uniform_sizes = [] fp_conditionals = [] - parse_cg_file("fp_"+sname+".cg", fp_uniforms, fp_uniform_sizes, fp_conditionals); + parse_cg_file("fp_" + sname + ".cg", fp_uniforms, fp_uniform_sizes, fp_conditionals); - fd = open("shader_"+sname+".cg.h", "w"); + fd = open("shader_" + sname + ".cg.h", "w"); fd.write('\n#include "shader_cell.h"\n'); fd.write("\nclass Shader_" + sname + " : public ShaderCell {\n"); @@ -1208,53 +1208,53 @@ def build_cg_shader(sname): import glob def detect_modules(): - module_list=[] - includes_cpp="" - register_cpp="" - unregister_cpp="" + module_list = [] + includes_cpp = "" + register_cpp = "" + unregister_cpp = "" files = glob.glob("modules/*") - files.sort() #so register_module_types does not change that often, and also plugins are registered in alphabetic order + files.sort() # so register_module_types does not change that often, and also plugins are registered in alphabetic order for x in files: if (not os.path.isdir(x)): continue - x=x.replace("modules/","") # rest of world - x=x.replace("modules\\","") # win32 + x = x.replace("modules/", "") # rest of world + x = x.replace("modules\\", "") # win32 module_list.append(x) try: - with open("modules/"+x+"/register_types.h"): - includes_cpp+='#include "modules/'+x+'/register_types.h"\n' - register_cpp+='#ifdef MODULE_'+x.upper()+'_ENABLED\n' - register_cpp+='\tregister_'+x+'_types();\n' - register_cpp+='#endif\n' - unregister_cpp+='#ifdef MODULE_'+x.upper()+'_ENABLED\n' - unregister_cpp+='\tunregister_'+x+'_types();\n' - unregister_cpp+='#endif\n' + with open("modules/" + x + "/register_types.h"): + includes_cpp += '#include "modules/' + x + '/register_types.h"\n' + register_cpp += '#ifdef MODULE_' + x.upper() + '_ENABLED\n' + register_cpp += '\tregister_' + x + '_types();\n' + register_cpp += '#endif\n' + unregister_cpp += '#ifdef MODULE_' + x.upper() + '_ENABLED\n' + unregister_cpp += '\tunregister_' + x + '_types();\n' + unregister_cpp += '#endif\n' except IOError: pass - modules_cpp=""" + modules_cpp = """ // modules.cpp - THIS FILE IS GENERATED, DO NOT EDIT!!!!!!! #include "register_module_types.h" -"""+includes_cpp+""" +""" + includes_cpp + """ void register_module_types() { -"""+register_cpp+""" +""" + register_cpp + """ } void unregister_module_types() { -"""+unregister_cpp+""" +""" + unregister_cpp + """ } """ - f=open("modules/register_module_types.cpp","wb") + f = open("modules/register_module_types.cpp", "wb") f.write(modules_cpp) return module_list @@ -1271,7 +1271,7 @@ def win32_spawn(sh, escape, cmd, args, env): if type(env[e]) != type(""): env[e] = str(env[e]) proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, startupinfo=startupinfo, shell = False, env = env) + stderr=subprocess.PIPE, startupinfo=startupinfo, shell=False, env=env) data, err = proc.communicate() rv = proc.wait() if rv: @@ -1309,48 +1309,48 @@ def win32_spawn(sh, escape, cmd, args, spawnenv): return exit_code """ -def android_add_maven_repository(self,url): +def android_add_maven_repository(self, url): self.android_maven_repos.append(url) -def android_add_dependency(self,depline): +def android_add_dependency(self, depline): self.android_dependencies.append(depline) -def android_add_java_dir(self,subpath): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath +def android_add_java_dir(self, subpath): + base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + subpath self.android_java_dirs.append(base_path) -def android_add_res_dir(self,subpath): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath +def android_add_res_dir(self, subpath): + base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + subpath self.android_res_dirs.append(base_path) -def android_add_aidl_dir(self,subpath): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath +def android_add_aidl_dir(self, subpath): + base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + subpath self.android_aidl_dirs.append(base_path) -def android_add_jni_dir(self,subpath): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+subpath +def android_add_jni_dir(self, subpath): + base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + subpath self.android_jni_dirs.append(base_path) -def android_add_default_config(self,config): +def android_add_default_config(self, config): self.android_default_config.append(config) -def android_add_to_manifest(self,file): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+file - f = open(base_path,"rb") - self.android_manifest_chunk+=f.read() -def android_add_to_permissions(self,file): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+file - f = open(base_path,"rb") - self.android_permission_chunk+=f.read() -def android_add_to_attributes(self,file): - base_path = self.Dir(".").abspath+"/modules/"+self.current_module+"/"+file - f = open(base_path,"rb") - self.android_appattributes_chunk+=f.read() +def android_add_to_manifest(self, file): + base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + file + f = open(base_path, "rb") + self.android_manifest_chunk += f.read() +def android_add_to_permissions(self, file): + base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + file + f = open(base_path, "rb") + self.android_permission_chunk += f.read() +def android_add_to_attributes(self, file): + base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + file + f = open(base_path, "rb") + self.android_appattributes_chunk += f.read() def disable_module(self): self.disabled_modules.append(self.current_module) def use_windows_spawn_fix(self, platform=None): - if (os.name!="nt"): - return #not needed, only for windows + if (os.name != "nt"): + return # not needed, only for windows # On Windows, due to the limited command line length, when creating a static library # from a very high number of objects SCons will invoke "ar" once per object file; @@ -1364,7 +1364,7 @@ def use_windows_spawn_fix(self, platform=None): import subprocess - def mySubProcess(cmdline,env): + def mySubProcess(cmdline, env): prefix = "" if(platform == 'javascript'): prefix = "python.exe " @@ -1372,7 +1372,7 @@ def use_windows_spawn_fix(self, platform=None): startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW proc = subprocess.Popen(prefix + cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, startupinfo=startupinfo, shell = False, env = env) + stderr=subprocess.PIPE, startupinfo=startupinfo, shell=False, env=env) data, err = proc.communicate() rv = proc.wait() if rv: @@ -1386,16 +1386,16 @@ def use_windows_spawn_fix(self, platform=None): newargs = ' '.join(args[1:]) cmdline = cmd + " " + newargs - rv=0 + rv = 0 env = {str(key): str(value) for key, value in env.iteritems()} - if len(cmdline) > 32000 and cmd.endswith("ar") : + if len(cmdline) > 32000 and cmd.endswith("ar"): cmdline = cmd + " " + args[1] + " " + args[2] + " " - for i in range(3,len(args)) : - rv = mySubProcess( cmdline + args[i], env ) - if rv : + for i in range(3, len(args)): + rv = mySubProcess(cmdline + args[i], env) + if rv: break else: - rv = mySubProcess( cmdline, env ) + rv = mySubProcess(cmdline, env) return rv @@ -1435,7 +1435,7 @@ def split_lib(self, libname): if len(lib_list) > 0: import os, sys if os.name == 'posix' and sys.platform == 'msys': - env.Replace(ARFLAGS = ['rcsT']) + env.Replace(ARFLAGS=['rcsT']) lib = env.Library(libname + "_collated", lib_list) lib_list = [lib] @@ -1443,65 +1443,65 @@ def split_lib(self, libname): env.add_source_files(lib_base, "*.cpp") lib_list.insert(0, env.Library(libname, lib_base)) - env.Prepend(LIBS = lib_list) + env.Prepend(LIBS=lib_list) -def save_active_platforms(apnames,ap): +def save_active_platforms(apnames, ap): for x in ap: - pth = x+"/logo.png" + pth = x + "/logo.png" # print("open path: "+pth) - pngf=open(pth,"rb"); - b=pngf.read(1); - str=" /* AUTOGENERATED FILE, DO NOT EDIT */ \n" - str+=" static const unsigned char _"+x[9:]+"_logo[]={" - while(len(b)==1): - str+=hex(ord(b)) - b=pngf.read(1); - if (len(b)==1): - str+="," + pngf = open(pth, "rb"); + b = pngf.read(1); + str = " /* AUTOGENERATED FILE, DO NOT EDIT */ \n" + str += " static const unsigned char _" + x[9:] + "_logo[]={" + while(len(b) == 1): + str += hex(ord(b)) + b = pngf.read(1); + if (len(b) == 1): + str += "," - str+="};\n" + str += "};\n" - wf = x+"/logo.h" - logow = open(wf,"wb") + wf = x + "/logo.h" + logow = open(wf, "wb") logow.write(str) -def no_verbose(sys,env): +def no_verbose(sys, env): - #If the output is not a terminal, do nothing + # If the output is not a terminal, do nothing if not sys.stdout.isatty(): return colors = {} - colors['cyan'] = '\033[96m' + colors['cyan'] = '\033[96m' colors['purple'] = '\033[95m' - colors['blue'] = '\033[94m' - colors['green'] = '\033[92m' + colors['blue'] = '\033[94m' + colors['green'] = '\033[92m' colors['yellow'] = '\033[93m' - colors['red'] = '\033[91m' - colors['end'] = '\033[0m' + colors['red'] = '\033[91m' + colors['end'] = '\033[0m' - compile_source_message = '%sCompiling %s==> %s$SOURCE%s' % (colors['blue'], colors['purple'], colors['yellow'], colors['end']) - java_compile_source_message = '%sCompiling %s==> %s$SOURCE%s' % (colors['blue'], colors['purple'], colors['yellow'], colors['end']) + compile_source_message = '%sCompiling %s==> %s$SOURCE%s' % (colors['blue'], colors['purple'], colors['yellow'], colors['end']) + java_compile_source_message = '%sCompiling %s==> %s$SOURCE%s' % (colors['blue'], colors['purple'], colors['yellow'], colors['end']) compile_shared_source_message = '%sCompiling shared %s==> %s$SOURCE%s' % (colors['blue'], colors['purple'], colors['yellow'], colors['end']) - link_program_message = '%sLinking Program %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) - link_library_message = '%sLinking Static Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) - ranlib_library_message = '%sRanlib Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) - link_shared_library_message = '%sLinking Shared Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) - java_library_message = '%sCreating Java Archive %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) + link_program_message = '%sLinking Program %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) + link_library_message = '%sLinking Static Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) + ranlib_library_message = '%sRanlib Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) + link_shared_library_message = '%sLinking Shared Library %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) + java_library_message = '%sCreating Java Archive %s==> %s$TARGET%s' % (colors['red'], colors['purple'], colors['yellow'], colors['end']) - env.Append( CXXCOMSTR=[compile_source_message] ) - env.Append( CCCOMSTR=[compile_source_message] ) - env.Append( SHCCCOMSTR=[compile_shared_source_message] ) - env.Append( SHCXXCOMSTR=[compile_shared_source_message] ) - env.Append( ARCOMSTR=[link_library_message] ) - env.Append( RANLIBCOMSTR=[ranlib_library_message] ) - env.Append( SHLINKCOMSTR=[link_shared_library_message] ) - env.Append( LINKCOMSTR=[link_program_message] ) - env.Append( JARCOMSTR=[java_library_message] ) - env.Append( JAVACCOMSTR=[java_compile_source_message] ) + env.Append(CXXCOMSTR=[compile_source_message]) + env.Append(CCCOMSTR=[compile_source_message]) + env.Append(SHCCCOMSTR=[compile_shared_source_message]) + env.Append(SHCXXCOMSTR=[compile_shared_source_message]) + env.Append(ARCOMSTR=[link_library_message]) + env.Append(RANLIBCOMSTR=[ranlib_library_message]) + env.Append(SHLINKCOMSTR=[link_shared_library_message]) + env.Append(LINKCOMSTR=[link_program_message]) + env.Append(JARCOMSTR=[java_library_message]) + env.Append(JAVACCOMSTR=[java_compile_source_message]) def detect_visual_c_compiler_version(tools_env): # tools_env is the variable scons uses to call tools that execute tasks, SCons's env['ENV'] that executes tasks... @@ -1531,12 +1531,12 @@ def detect_visual_c_compiler_version(tools_env): # find() works with -1 so big ifs bellow are needed... the simplest solution, in fact # First test if amd64 and amd64_x86 compilers are present in the path - vc_amd64_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"]+"BIN\\amd64;") + vc_amd64_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"] + "BIN\\amd64;") if(vc_amd64_compiler_detection_index > -1): vc_chosen_compiler_index = vc_amd64_compiler_detection_index vc_chosen_compiler_str = "amd64" - vc_amd64_x86_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"]+"BIN\\amd64_x86;") + vc_amd64_x86_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"] + "BIN\\amd64_x86;") if(vc_amd64_x86_compiler_detection_index > -1 and (vc_chosen_compiler_index == -1 or vc_chosen_compiler_index > vc_amd64_x86_compiler_detection_index)): @@ -1545,14 +1545,14 @@ def detect_visual_c_compiler_version(tools_env): # Now check the 32 bit compilers - vc_x86_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"]+"BIN;") + vc_x86_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"] + "BIN;") if(vc_x86_compiler_detection_index > -1 and (vc_chosen_compiler_index == -1 or vc_chosen_compiler_index > vc_x86_compiler_detection_index)): vc_chosen_compiler_index = vc_x86_compiler_detection_index vc_chosen_compiler_str = "x86" - vc_x86_amd64_compiler_detection_index = tools_env["PATH"].find(tools_env['VCINSTALLDIR']+"BIN\\x86_amd64;") + vc_x86_amd64_compiler_detection_index = tools_env["PATH"].find(tools_env['VCINSTALLDIR'] + "BIN\\x86_amd64;") if(vc_x86_amd64_compiler_detection_index > -1 and (vc_chosen_compiler_index == -1 or vc_chosen_compiler_index > vc_x86_amd64_compiler_detection_index)): @@ -1560,11 +1560,11 @@ def detect_visual_c_compiler_version(tools_env): vc_chosen_compiler_str = "x86_amd64" # debug help - #print vc_amd64_compiler_detection_index - #print vc_amd64_x86_compiler_detection_index - #print vc_x86_compiler_detection_index - #print vc_x86_amd64_compiler_detection_index - #print "chosen "+str(vc_chosen_compiler_index)+ " | "+str(vc_chosen_compiler_str) + # print vc_amd64_compiler_detection_index + # print vc_amd64_x86_compiler_detection_index + # print vc_x86_compiler_detection_index + # print vc_x86_amd64_compiler_detection_index + # print "chosen "+str(vc_chosen_compiler_index)+ " | "+str(vc_chosen_compiler_str) return vc_chosen_compiler_str diff --git a/modules/SCsub b/modules/SCsub index 76ba9116121..4b9c08cf789 100644 --- a/modules/SCsub +++ b/modules/SCsub @@ -6,18 +6,18 @@ env_modules = env.Clone() Export('env_modules') -env.modules_sources=[ +env.modules_sources = [ "register_module_types.cpp", ] -#env.add_source_files(env.modules_sources,"*.cpp") +# env.add_source_files(env.modules_sources,"*.cpp") Export('env') for x in env.module_list: if (x in env.disabled_modules): continue - env_modules.Append(CPPFLAGS=["-DMODULE_"+x.upper()+"_ENABLED"]) - SConscript(x+"/SCsub") + env_modules.Append(CPPFLAGS=["-DMODULE_" + x.upper() + "_ENABLED"]) + SConscript(x + "/SCsub") -lib = env_modules.Library("modules",env.modules_sources) +lib = env_modules.Library("modules", env.modules_sources) env.Prepend(LIBS=[lib]) diff --git a/modules/enet/SCsub b/modules/enet/SCsub index 14aa59fdb8c..81881076039 100644 --- a/modules/enet/SCsub +++ b/modules/enet/SCsub @@ -7,7 +7,7 @@ Import('env_modules') env_enet = env_modules.Clone() -if (env["enet"] != "system"): # builtin +if (env["enet"] != "system"): # builtin thirdparty_dir = "#thirdparty/enet/" thirdparty_sources = [ "callbacks.c", @@ -23,6 +23,6 @@ if (env["enet"] != "system"): # builtin thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_enet.add_source_files(env.modules_sources, thirdparty_sources) - env_enet.Append(CPPPATH = [thirdparty_dir]) + env_enet.Append(CPPPATH=[thirdparty_dir]) env_enet.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/etc1/SCsub b/modules/etc1/SCsub index 3d19e7c9a76..0c5dc66d2ed 100644 --- a/modules/etc1/SCsub +++ b/modules/etc1/SCsub @@ -14,7 +14,7 @@ thirdparty_sources = [ thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_etc1.add_source_files(env.modules_sources, thirdparty_sources) -env_etc1.Append(CPPPATH = [thirdparty_dir]) +env_etc1.Append(CPPPATH=[thirdparty_dir]) # Godot source files env_etc1.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/freetype/SCsub b/modules/freetype/SCsub index 0e21deac6a5..c68c40265c5 100644 --- a/modules/freetype/SCsub +++ b/modules/freetype/SCsub @@ -5,7 +5,7 @@ Import('env') # Not building in a separate env as core needs it # Thirdparty source files -if (env["freetype"] != "system"): # builtin +if (env["freetype"] != "system"): # builtin thirdparty_dir = "#thirdparty/freetype/" thirdparty_sources = [ "src/autofit/autofit.c", @@ -56,13 +56,13 @@ if (env["freetype"] != "system"): # builtin # Include header for WinRT to fix build issues if "platform" in env and env["platform"] == "winrt": - env.Append(CCFLAGS = ['/FI', '"modules/freetype/winrtdef.h"']) + env.Append(CCFLAGS=['/FI', '"modules/freetype/winrtdef.h"']) - env.Append(CPPPATH = [thirdparty_dir, thirdparty_dir + "/include"]) + env.Append(CPPPATH=[thirdparty_dir, thirdparty_dir + "/include"]) # also requires libpng headers - if (env["libpng"] != "system"): # builtin - env.Append(CPPPATH = ["#thirdparty/libpng"]) + if (env["libpng"] != "system"): # builtin + env.Append(CPPPATH=["#thirdparty/libpng"]) """ FIXME: Remove this commented code if Windows can handle the monolithic lib # fix for Windows' shell miserably failing on long lines, split in two libraries @@ -81,10 +81,10 @@ if (env["freetype"] != "system"): # builtin """ lib = env.Library("freetype_builtin", thirdparty_sources) - env.Append(LIBS = [lib]) + env.Append(LIBS=[lib]) # Godot source files env.add_source_files(env.modules_sources, "*.cpp") -env.Append(CCFLAGS = ['-DFREETYPE_ENABLED']) +env.Append(CCFLAGS=['-DFREETYPE_ENABLED']) Export('env') diff --git a/modules/jpg/SCsub b/modules/jpg/SCsub index 1ecc9c0fd66..e72dc6a1cab 100644 --- a/modules/jpg/SCsub +++ b/modules/jpg/SCsub @@ -14,7 +14,7 @@ thirdparty_sources = [ thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_jpg.add_source_files(env.modules_sources, thirdparty_sources) -env_jpg.Append(CPPPATH = [thirdparty_dir]) +env_jpg.Append(CPPPATH=[thirdparty_dir]) # Godot's own source files env_jpg.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/mpc/SCsub b/modules/mpc/SCsub index deadf8687d8..80d2ab4abea 100644 --- a/modules/mpc/SCsub +++ b/modules/mpc/SCsub @@ -6,7 +6,7 @@ Import('env_modules') env_mpc = env_modules.Clone() # Thirdparty source files -if (env["libmpcdec"] != "system"): # builtin +if (env["libmpcdec"] != "system"): # builtin thirdparty_dir = "#thirdparty/libmpcdec/" thirdparty_sources = [ "huffman.c", @@ -22,7 +22,7 @@ if (env["libmpcdec"] != "system"): # builtin thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_mpc.add_source_files(env.modules_sources, thirdparty_sources) - env_mpc.Append(CPPPATH = [thirdparty_dir]) + env_mpc.Append(CPPPATH=[thirdparty_dir]) # Godot source files env_mpc.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/ogg/SCsub b/modules/ogg/SCsub index 3946e548427..719ee4662b6 100644 --- a/modules/ogg/SCsub +++ b/modules/ogg/SCsub @@ -6,7 +6,7 @@ Import('env_modules') env_ogg = env_modules.Clone() # Thirdparty source files -if (env["libogg"] != "system"): # builtin +if (env["libogg"] != "system"): # builtin thirdparty_dir = "#thirdparty/libogg/" thirdparty_sources = [ "bitwise.c", @@ -15,7 +15,7 @@ if (env["libogg"] != "system"): # builtin thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_ogg.add_source_files(env.modules_sources, thirdparty_sources) - env_ogg.Append(CPPPATH = [thirdparty_dir]) + env_ogg.Append(CPPPATH=[thirdparty_dir]) # Godot source files env_ogg.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/openssl/SCsub b/modules/openssl/SCsub index 325816fb19c..75ea9b6bdee 100644 --- a/modules/openssl/SCsub +++ b/modules/openssl/SCsub @@ -6,7 +6,7 @@ Import('env_modules') env_openssl = env_modules.Clone() # Thirdparty source files -if (env["openssl"] != "system"): # builtin +if (env["openssl"] != "system"): # builtin thirdparty_dir = "#thirdparty/openssl/" thirdparty_sources = [ @@ -664,15 +664,15 @@ if (env["openssl"] != "system"): # builtin "crypto/modes", "openssl", ] - env_openssl.Append(CPPPATH = [thirdparty_dir + "/" + dir for dir in thirdparty_include_paths]) + env_openssl.Append(CPPPATH=[thirdparty_dir + "/" + dir for dir in thirdparty_include_paths]) - env_openssl.Append(CPPFLAGS = ["-DOPENSSL_NO_ASM", "-DOPENSSL_THREADS", "-DL_ENDIAN"]) + env_openssl.Append(CPPFLAGS=["-DOPENSSL_NO_ASM", "-DOPENSSL_THREADS", "-DL_ENDIAN"]) # Workaround for compilation error with GCC/Clang when -Werror is too greedy (GH-4517) import os import methods - if not (os.name=="nt" and os.getenv("VCINSTALLDIR")): # not Windows and not MSVC - env_openssl.Append(CFLAGS = ["-Wno-error=implicit-function-declaration"]) + if not (os.name == "nt" and os.getenv("VCINSTALLDIR")): # not Windows and not MSVC + env_openssl.Append(CFLAGS=["-Wno-error=implicit-function-declaration"]) # Module sources @@ -681,7 +681,7 @@ env_openssl.add_source_files(env.modules_sources, "*.c") # platform/winrt need to know openssl is available, pass to main env if "platform" in env and env["platform"] == "winrt": - env.Append(CPPPATH = [thirdparty_dir]) - env.Append(CPPFLAGS = ['-DOPENSSL_ENABLED']); + env.Append(CPPPATH=[thirdparty_dir]) + env.Append(CPPFLAGS=['-DOPENSSL_ENABLED']); Export('env') diff --git a/modules/opus/SCsub b/modules/opus/SCsub index 2cfd439ebef..e116fc46071 100644 --- a/modules/opus/SCsub +++ b/modules/opus/SCsub @@ -6,7 +6,7 @@ Import('env_modules') env_opus = env_modules.Clone() # Thirdparty source files -if (env["opus"] != "system"): # builtin +if (env["opus"] != "system"): # builtin thirdparty_dir = "#thirdparty/opus/" thirdparty_sources = [ @@ -129,8 +129,8 @@ if (env["opus"] != "system"): # builtin opus_sources_silk = [] - if("opus_fixed_point" in env and env.opus_fixed_point=="yes"): - env_opus.Append(CFLAGS = ["-DFIXED_POINT"]) + if("opus_fixed_point" in env and env.opus_fixed_point == "yes"): + env_opus.Append(CFLAGS=["-DFIXED_POINT"]) opus_sources_silk = [ "silk/fixed/schur64_FIX.c", "silk/fixed/residual_energy16_FIX.c", @@ -205,11 +205,11 @@ if (env["opus"] != "system"): # builtin "silk/fixed", "silk/float", ] - env_opus.Append(CPPPATH = [thirdparty_dir + "/" + dir for dir in thirdparty_include_paths]) + env_opus.Append(CPPPATH=[thirdparty_dir + "/" + dir for dir in thirdparty_include_paths]) # also requires libogg - if (env["libogg"] != "system"): # builtin - env_opus.Append(CPPPATH = ["#thirdparty/libogg"]) + if (env["libogg"] != "system"): # builtin + env_opus.Append(CPPPATH=["#thirdparty/libogg"]) # Module files env_opus.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/pvr/SCsub b/modules/pvr/SCsub index 04a69cfa23d..ddca7a794e1 100644 --- a/modules/pvr/SCsub +++ b/modules/pvr/SCsub @@ -18,7 +18,7 @@ thirdparty_sources = [ thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_pvr.add_source_files(env.modules_sources, thirdparty_sources) -env_pvr.Append(CPPPATH = [thirdparty_dir]) +env_pvr.Append(CPPPATH=[thirdparty_dir]) # Godot source files env_pvr.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/squish/SCsub b/modules/squish/SCsub index 116fc0bb7f4..462b6a05cf3 100644 --- a/modules/squish/SCsub +++ b/modules/squish/SCsub @@ -6,7 +6,7 @@ Import('env_modules') env_squish = env_modules.Clone() # Thirdparty source files -if (env["squish"] != "system"): # builtin +if (env["squish"] != "system"): # builtin thirdparty_dir = "#thirdparty/squish/" thirdparty_sources = [ "alpha.cpp", @@ -23,7 +23,7 @@ if (env["squish"] != "system"): # builtin thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_squish.add_source_files(env.modules_sources, thirdparty_sources) - env_squish.Append(CPPPATH = [thirdparty_dir]) + env_squish.Append(CPPPATH=[thirdparty_dir]) # Godot source files env_squish.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/theora/SCsub b/modules/theora/SCsub index cbd19ee6d68..ff34b858795 100644 --- a/modules/theora/SCsub +++ b/modules/theora/SCsub @@ -6,7 +6,7 @@ Import('env_modules') env_theora = env_modules.Clone() # Thirdparty source files -if (env["libtheora"] != "system"): # builtin +if (env["libtheora"] != "system"): # builtin thirdparty_dir = "#thirdparty/libtheora/" thirdparty_sources = [ #"analyze.c", @@ -66,18 +66,18 @@ if (env["libtheora"] != "system"): # builtin thirdparty_sources += thirdparty_sources_x86_vc if (env["x86_libtheora_opt_gcc"] or env["x86_libtheora_opt_vc"]): - env_theora.Append(CCFLAGS = ["-DOC_X86_ASM"]) + env_theora.Append(CCFLAGS=["-DOC_X86_ASM"]) thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_theora.add_source_files(env.modules_sources, thirdparty_sources) - env_theora.Append(CPPPATH = [thirdparty_dir]) + env_theora.Append(CPPPATH=[thirdparty_dir]) # also requires libogg and libvorbis - if (env["libogg"] != "system"): # builtin - env_theora.Append(CPPPATH = ["#thirdparty/libogg"]) - if (env["libvorbis"] != "system"): # builtin - env_theora.Append(CPPPATH = ["#thirdparty/libvorbis"]) + if (env["libogg"] != "system"): # builtin + env_theora.Append(CPPPATH=["#thirdparty/libogg"]) + if (env["libvorbis"] != "system"): # builtin + env_theora.Append(CPPPATH=["#thirdparty/libvorbis"]) # Godot source files env_theora.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/vorbis/SCsub b/modules/vorbis/SCsub index 4189e119cfd..3bd092105e4 100644 --- a/modules/vorbis/SCsub +++ b/modules/vorbis/SCsub @@ -6,7 +6,7 @@ Import('env_modules') env_vorbis = env_modules.Clone() # Thirdparty source files -if (env["libvorbis"] != "system"): # builtin +if (env["libvorbis"] != "system"): # builtin thirdparty_dir = "#thirdparty/libvorbis/" thirdparty_sources = [ #"analysis.c", @@ -39,11 +39,11 @@ if (env["libvorbis"] != "system"): # builtin thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_vorbis.add_source_files(env.modules_sources, thirdparty_sources) - env_vorbis.Append(CPPPATH = [thirdparty_dir]) + env_vorbis.Append(CPPPATH=[thirdparty_dir]) # also requires libogg - if (env["libogg"] != "system"): # builtin - env_vorbis.Append(CPPPATH = ["#thirdparty/libogg"]) + if (env["libogg"] != "system"): # builtin + env_vorbis.Append(CPPPATH=["#thirdparty/libogg"]) # Godot source files env_vorbis.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/webm/SCsub b/modules/webm/SCsub index bb87e73980e..c4a707b8e0b 100644 --- a/modules/webm/SCsub +++ b/modules/webm/SCsub @@ -16,17 +16,17 @@ thirdparty_libsimplewebm_sources = [ thirdparty_libsimplewebm_sources = [thirdparty_libsimplewebm_dir + file for file in thirdparty_libsimplewebm_sources] env_webm.add_source_files(env.modules_sources, thirdparty_libsimplewebm_sources) -env_webm.Append(CPPPATH = [thirdparty_libsimplewebm_dir, thirdparty_libsimplewebm_dir + "libwebm/"]) +env_webm.Append(CPPPATH=[thirdparty_libsimplewebm_dir, thirdparty_libsimplewebm_dir + "libwebm/"]) # also requires libogg, libvorbis and libopus -if (env["libogg"] != "system"): # builtin - env_webm.Append(CPPPATH = ["#thirdparty/libogg"]) -if (env["libvorbis"] != "system"): # builtin - env_webm.Append(CPPPATH = ["#thirdparty/libvorbis"]) -if (env["opus"] != "system"): # builtin - env_webm.Append(CPPPATH = ["#thirdparty"]) +if (env["libogg"] != "system"): # builtin + env_webm.Append(CPPPATH=["#thirdparty/libogg"]) +if (env["libvorbis"] != "system"): # builtin + env_webm.Append(CPPPATH=["#thirdparty/libvorbis"]) +if (env["opus"] != "system"): # builtin + env_webm.Append(CPPPATH=["#thirdparty"]) -if (env["libvpx"] != "system"): # builtin +if (env["libvpx"] != "system"): # builtin Export('env_webm') SConscript("libvpx/SCsub") diff --git a/modules/webm/libvpx/SCsub b/modules/webm/libvpx/SCsub index 12e37886f4d..a7eca29c051 100644 --- a/modules/webm/libvpx/SCsub +++ b/modules/webm/libvpx/SCsub @@ -248,10 +248,10 @@ libvpx_sources_arm_neon_gas_apple = [libvpx_dir + file for file in libvpx_source Import('env') Import('env_webm') -env_webm.Append(CPPPATH = [libvpx_dir]) +env_webm.Append(CPPPATH=[libvpx_dir]) env_libvpx = env.Clone() -env_libvpx.Append(CPPPATH = [libvpx_dir]) +env_libvpx.Append(CPPPATH=[libvpx_dir]) cpu_bits = env["bits"] osx_fat = (env["platform"] == 'osx' and cpu_bits == 'fat') diff --git a/modules/webm/libvpx/yasm_osx_fat.py b/modules/webm/libvpx/yasm_osx_fat.py index c39b95c7803..0065e2766c3 100644 --- a/modules/webm/libvpx/yasm_osx_fat.py +++ b/modules/webm/libvpx/yasm_osx_fat.py @@ -3,9 +3,9 @@ import sys import os -includes = sys.argv[1] +includes = sys.argv[1] output_file = sys.argv[2] -input_file = sys.argv[3] +input_file = sys.argv[3] can_remove = {} diff --git a/modules/webp/SCsub b/modules/webp/SCsub index 57dde509e3b..ab60bfd6b27 100644 --- a/modules/webp/SCsub +++ b/modules/webp/SCsub @@ -6,7 +6,7 @@ Import('env_modules') env_webp = env_modules.Clone() # Thirdparty source files -if (env["libwebp"] != "system"): # builtin +if (env["libwebp"] != "system"): # builtin thirdparty_dir = "#thirdparty/libwebp/" thirdparty_sources = [ "enc/webpenc.c", @@ -115,7 +115,7 @@ if (env["libwebp"] != "system"): # builtin thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_webp.add_source_files(env.modules_sources, thirdparty_sources) - env_webp.Append(CPPPATH = [thirdparty_dir]) + env_webp.Append(CPPPATH=[thirdparty_dir]) # Godot source files env_webp.add_source_files(env.modules_sources, "*.cpp") diff --git a/platform/android/SCsub b/platform/android/SCsub index 9db6636f8b3..146f7c25a31 100644 --- a/platform/android/SCsub +++ b/platform/android/SCsub @@ -22,7 +22,7 @@ android_files = [ 'java_class_wrapper.cpp' ] -#env.Depends('#core/math/vector3.h', 'vector3_psp.h') +# env.Depends('#core/math/vector3.h', 'vector3_psp.h') #obj = env.SharedObject('godot_android.cpp') @@ -30,86 +30,86 @@ env_android = env.Clone() if env['target'] == "profile": env_android.Append(CPPFLAGS=['-DPROFILER_ENABLED']) -android_objects=[] +android_objects = [] for x in android_files: - android_objects.append( env_android.SharedObject( x ) ) + android_objects.append(env_android.SharedObject(x)) prog = None -abspath=env.Dir(".").abspath +abspath = env.Dir(".").abspath -gradle_basein = open(abspath+"/build.gradle.template","rb") -gradle_baseout = open(abspath+"/java/build.gradle","wb") +gradle_basein = open(abspath + "/build.gradle.template", "rb") +gradle_baseout = open(abspath + "/java/build.gradle", "wb") gradle_text = gradle_basein.read() -gradle_maven_repos_text="" +gradle_maven_repos_text = "" if len(env.android_maven_repos) > 0: - gradle_maven_repos_text+="maven {\n" + gradle_maven_repos_text += "maven {\n" for x in env.android_maven_repos: - gradle_maven_repos_text+="\t\t"+x+"\n" - gradle_maven_repos_text+="\t}\n" + gradle_maven_repos_text += "\t\t" + x + "\n" + gradle_maven_repos_text += "\t}\n" -gradle_maven_dependencies_text="" +gradle_maven_dependencies_text = "" for x in env.android_dependencies: - gradle_maven_dependencies_text+=x+"\n" + gradle_maven_dependencies_text += x + "\n" -gradle_java_dirs_text="" +gradle_java_dirs_text = "" for x in env.android_java_dirs: - gradle_java_dirs_text+=",'"+x.replace("\\","/")+"'" + gradle_java_dirs_text += ",'" + x.replace("\\", "/") + "'" -gradle_res_dirs_text="" +gradle_res_dirs_text = "" for x in env.android_res_dirs: - gradle_res_dirs_text+=",'"+x.replace("\\","/")+"'" + gradle_res_dirs_text += ",'" + x.replace("\\", "/") + "'" -gradle_aidl_dirs_text="" +gradle_aidl_dirs_text = "" for x in env.android_aidl_dirs: - gradle_aidl_dirs_text+=",'"+x.replace("\\","/")+"'" + gradle_aidl_dirs_text += ",'" + x.replace("\\", "/") + "'" -gradle_jni_dirs_text="" +gradle_jni_dirs_text = "" for x in env.android_jni_dirs: - gradle_jni_dirs_text+=",'"+x.replace("\\","/")+"'" + gradle_jni_dirs_text += ",'" + x.replace("\\", "/") + "'" -gradle_asset_dirs_text="" +gradle_asset_dirs_text = "" -gradle_default_config_text="" +gradle_default_config_text = "" for x in env.android_default_config: - gradle_default_config_text+=x+"\n\t\t" + gradle_default_config_text += x + "\n\t\t" -gradle_text = gradle_text.replace("$$GRADLE_REPOSITORY_URLS$$",gradle_maven_repos_text) -gradle_text = gradle_text.replace("$$GRADLE_DEPENDENCIES$$",gradle_maven_dependencies_text) -gradle_text = gradle_text.replace("$$GRADLE_JAVA_DIRS$$",gradle_java_dirs_text) -gradle_text = gradle_text.replace("$$GRADLE_RES_DIRS$$",gradle_res_dirs_text) -gradle_text = gradle_text.replace("$$GRADLE_ASSET_DIRS$$",gradle_asset_dirs_text) -gradle_text = gradle_text.replace("$$GRADLE_AIDL_DIRS$$",gradle_aidl_dirs_text) -gradle_text = gradle_text.replace("$$GRADLE_JNI_DIRS$$",gradle_jni_dirs_text) -gradle_text = gradle_text.replace("$$GRADLE_DEFAULT_CONFIG$$",gradle_default_config_text) +gradle_text = gradle_text.replace("$$GRADLE_REPOSITORY_URLS$$", gradle_maven_repos_text) +gradle_text = gradle_text.replace("$$GRADLE_DEPENDENCIES$$", gradle_maven_dependencies_text) +gradle_text = gradle_text.replace("$$GRADLE_JAVA_DIRS$$", gradle_java_dirs_text) +gradle_text = gradle_text.replace("$$GRADLE_RES_DIRS$$", gradle_res_dirs_text) +gradle_text = gradle_text.replace("$$GRADLE_ASSET_DIRS$$", gradle_asset_dirs_text) +gradle_text = gradle_text.replace("$$GRADLE_AIDL_DIRS$$", gradle_aidl_dirs_text) +gradle_text = gradle_text.replace("$$GRADLE_JNI_DIRS$$", gradle_jni_dirs_text) +gradle_text = gradle_text.replace("$$GRADLE_DEFAULT_CONFIG$$", gradle_default_config_text) -gradle_baseout.write( gradle_text ) +gradle_baseout.write(gradle_text) gradle_baseout.close() -pp_basein = open(abspath+"/AndroidManifest.xml.template","rb") -pp_baseout = open(abspath+"/java/AndroidManifest.xml","wb") +pp_basein = open(abspath + "/AndroidManifest.xml.template", "rb") +pp_baseout = open(abspath + "/java/AndroidManifest.xml", "wb") manifest = pp_basein.read() -manifest = manifest.replace("$$ADD_APPLICATION_CHUNKS$$",env.android_manifest_chunk) -manifest = manifest.replace("$$ADD_PERMISSION_CHUNKS$$",env.android_permission_chunk) -manifest = manifest.replace("$$ADD_APPATTRIBUTE_CHUNKS$$",env.android_appattributes_chunk) -pp_baseout.write( manifest ) +manifest = manifest.replace("$$ADD_APPLICATION_CHUNKS$$", env.android_manifest_chunk) +manifest = manifest.replace("$$ADD_PERMISSION_CHUNKS$$", env.android_permission_chunk) +manifest = manifest.replace("$$ADD_APPATTRIBUTE_CHUNKS$$", env.android_appattributes_chunk) +pp_baseout.write(manifest) -env_android.SharedLibrary("#bin/libgodot",[android_objects],SHLIBSUFFIX=env["SHLIBSUFFIX"]) +env_android.SharedLibrary("#bin/libgodot", [android_objects], SHLIBSUFFIX=env["SHLIBSUFFIX"]) lib_arch_dir = '' @@ -125,8 +125,8 @@ else: if lib_arch_dir != '': if env['target'] == 'release': lib_type_dir = 'release' - else: # release_debug, debug + else: # release_debug, debug lib_type_dir = 'debug' - out_dir = '#platform/android/java/libs/'+lib_type_dir+'/'+lib_arch_dir - env_android.Command(out_dir+'/libgodot_android.so', '#bin/libgodot'+env['SHLIBSUFFIX'], Move("$TARGET", "$SOURCE")) + out_dir = '#platform/android/java/libs/' + lib_type_dir + '/' + lib_arch_dir + env_android.Command(out_dir + '/libgodot_android.so', '#bin/libgodot' + env['SHLIBSUFFIX'], Move("$TARGET", "$SOURCE")) diff --git a/platform/android/detect.py b/platform/android/detect.py index 9a144528621..f1331d24d5f 100644 --- a/platform/android/detect.py +++ b/platform/android/detect.py @@ -21,12 +21,12 @@ def get_opts(): return [ ('ANDROID_NDK_ROOT', 'the path to Android NDK', os.environ.get("ANDROID_NDK_ROOT", 0)), - ('NDK_TARGET', 'toolchain to use for the NDK',os.environ.get("NDK_TARGET", "arm-linux-androideabi-4.9")), - ('NDK_TARGET_X86', 'toolchain to use for the NDK x86',os.environ.get("NDK_TARGET_X86", "x86-4.9")), - ('ndk_platform', 'compile for platform: (android- , example: android-14)',"android-14"), - ('android_arch', 'select compiler architecture: (armv7/armv6/x86)',"armv7"), - ('android_neon','enable neon (armv7 only)',"yes"), - ('android_stl','enable STL support in android port (for modules)',"no") + ('NDK_TARGET', 'toolchain to use for the NDK', os.environ.get("NDK_TARGET", "arm-linux-androideabi-4.9")), + ('NDK_TARGET_X86', 'toolchain to use for the NDK x86', os.environ.get("NDK_TARGET_X86", "x86-4.9")), + ('ndk_platform', 'compile for platform: (android- , example: android-14)', "android-14"), + ('android_arch', 'select compiler architecture: (armv7/armv6/x86)', "armv7"), + ('android_neon', 'enable neon (armv7 only)', "yes"), + ('android_stl', 'enable STL support in android port (for modules)', "no") ] @@ -34,7 +34,7 @@ def get_flags(): return [ ('tools', 'no'), - ('openssl', 'builtin'), #use builtin openssl + ('openssl', 'builtin'), # use builtin openssl ] @@ -52,16 +52,16 @@ def configure(env): # Workaround for MinGW. See: # http://www.scons.org/wiki/LongCmdLinesOnWin32 import os - if (os.name=="nt"): + if (os.name == "nt"): import subprocess - def mySubProcess(cmdline,env): - #print "SPAWNED : " + cmdline + def mySubProcess(cmdline, env): + # print "SPAWNED : " + cmdline startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW proc = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, startupinfo=startupinfo, shell = False, env = env) + stderr=subprocess.PIPE, startupinfo=startupinfo, shell=False, env=env) data, err = proc.communicate() rv = proc.wait() if rv: @@ -75,176 +75,176 @@ def configure(env): newargs = ' '.join(args[1:]) cmdline = cmd + " " + newargs - rv=0 - if len(cmdline) > 32000 and cmd.endswith("ar") : + rv = 0 + if len(cmdline) > 32000 and cmd.endswith("ar"): cmdline = cmd + " " + args[1] + " " + args[2] + " " - for i in range(3,len(args)) : - rv = mySubProcess( cmdline + args[i], env ) - if rv : + for i in range(3, len(args)): + rv = mySubProcess(cmdline + args[i], env) + if rv: break else: - rv = mySubProcess( cmdline, env ) + rv = mySubProcess(cmdline, env) return rv env['SPAWN'] = mySpawn - ndk_platform=env['ndk_platform'] + ndk_platform = env['ndk_platform'] - if env['android_arch'] not in ['armv7','armv6','x86']: - env['android_arch']='armv7' + if env['android_arch'] not in ['armv7', 'armv6', 'x86']: + env['android_arch'] = 'armv7' - if env['android_arch']=='x86': - env['NDK_TARGET']=env['NDK_TARGET_X86'] - env["x86_libtheora_opt_gcc"]=True + if env['android_arch'] == 'x86': + env['NDK_TARGET'] = env['NDK_TARGET_X86'] + env["x86_libtheora_opt_gcc"] = True if env['PLATFORM'] == 'win32': env.Tool('gcc') env['SHLIBSUFFIX'] = '.so' - neon_text="" - if env["android_arch"]=="armv7" and env['android_neon']=='yes': - neon_text=" (with neon)" - print("Godot Android!!!!! ("+env['android_arch']+")"+neon_text) + neon_text = "" + if env["android_arch"] == "armv7" and env['android_neon'] == 'yes': + neon_text = " (with neon)" + print("Godot Android!!!!! (" + env['android_arch'] + ")" + neon_text) env.Append(CPPPATH=['#platform/android']) - if env['android_arch']=='x86': - env.extra_suffix=".x86"+env.extra_suffix - elif env['android_arch']=='armv6': - env.extra_suffix=".armv6"+env.extra_suffix - elif env["android_arch"]=="armv7": - if env['android_neon']=='yes': - env.extra_suffix=".armv7.neon"+env.extra_suffix + if env['android_arch'] == 'x86': + env.extra_suffix = ".x86" + env.extra_suffix + elif env['android_arch'] == 'armv6': + env.extra_suffix = ".armv6" + env.extra_suffix + elif env["android_arch"] == "armv7": + if env['android_neon'] == 'yes': + env.extra_suffix = ".armv7.neon" + env.extra_suffix else: - env.extra_suffix=".armv7"+env.extra_suffix + env.extra_suffix = ".armv7" + env.extra_suffix - gcc_path=env["ANDROID_NDK_ROOT"]+"/toolchains/"+env["NDK_TARGET"]+"/prebuilt/"; + gcc_path = env["ANDROID_NDK_ROOT"] + "/toolchains/" + env["NDK_TARGET"] + "/prebuilt/"; if (sys.platform.startswith("linux")): if (platform.machine().endswith('64')): - gcc_path=gcc_path+"/linux-x86_64/bin" + gcc_path = gcc_path + "/linux-x86_64/bin" else: - gcc_path=gcc_path+"/linux-x86/bin" + gcc_path = gcc_path + "/linux-x86/bin" elif (sys.platform.startswith("darwin")): - gcc_path=gcc_path+"/darwin-x86_64/bin" + gcc_path = gcc_path + "/darwin-x86_64/bin" env['SHLINKFLAGS'][1] = '-shared' env['SHLIBSUFFIX'] = '.so' elif (sys.platform.startswith('win')): if (platform.machine().endswith('64')): - gcc_path=gcc_path+"/windows-x86_64/bin" + gcc_path = gcc_path + "/windows-x86_64/bin" else: - gcc_path=gcc_path+"/windows-x86/bin" + gcc_path = gcc_path + "/windows-x86/bin" - env['ENV']['PATH'] = gcc_path+":"+env['ENV']['PATH'] - if env['android_arch']=='x86': - env['CC'] = gcc_path+'/i686-linux-android-gcc' - env['CXX'] = gcc_path+'/i686-linux-android-g++' - env['AR'] = gcc_path+"/i686-linux-android-ar" - env['RANLIB'] = gcc_path+"/i686-linux-android-ranlib" - env['AS'] = gcc_path+"/i686-linux-android-as" + env['ENV']['PATH'] = gcc_path + ":" + env['ENV']['PATH'] + if env['android_arch'] == 'x86': + env['CC'] = gcc_path + '/i686-linux-android-gcc' + env['CXX'] = gcc_path + '/i686-linux-android-g++' + env['AR'] = gcc_path + "/i686-linux-android-ar" + env['RANLIB'] = gcc_path + "/i686-linux-android-ranlib" + env['AS'] = gcc_path + "/i686-linux-android-as" else: - env['CC'] = gcc_path+'/arm-linux-androideabi-gcc' - env['CXX'] = gcc_path+'/arm-linux-androideabi-g++' - env['AR'] = gcc_path+"/arm-linux-androideabi-ar" - env['RANLIB'] = gcc_path+"/arm-linux-androideabi-ranlib" - env['AS'] = gcc_path+"/arm-linux-androideabi-as" + env['CC'] = gcc_path + '/arm-linux-androideabi-gcc' + env['CXX'] = gcc_path + '/arm-linux-androideabi-g++' + env['AR'] = gcc_path + "/arm-linux-androideabi-ar" + env['RANLIB'] = gcc_path + "/arm-linux-androideabi-ranlib" + env['AS'] = gcc_path + "/arm-linux-androideabi-as" - if env['android_arch']=='x86': + if env['android_arch'] == 'x86': env['ARCH'] = 'arch-x86' else: env['ARCH'] = 'arch-arm' import string - #include path - gcc_include=env["ANDROID_NDK_ROOT"]+"/platforms/"+ndk_platform+"/"+env['ARCH'] +"/usr/include" - ld_sysroot=env["ANDROID_NDK_ROOT"]+"/platforms/"+ndk_platform+"/"+env['ARCH'] - #glue_include=env["ANDROID_NDK_ROOT"]+"/sources/android/native_app_glue" - ld_path=env["ANDROID_NDK_ROOT"]+"/platforms/"+ndk_platform+"/"+env['ARCH']+"/usr/lib" + # include path + gcc_include = env["ANDROID_NDK_ROOT"] + "/platforms/" + ndk_platform + "/" + env['ARCH'] + "/usr/include" + ld_sysroot = env["ANDROID_NDK_ROOT"] + "/platforms/" + ndk_platform + "/" + env['ARCH'] + # glue_include=env["ANDROID_NDK_ROOT"]+"/sources/android/native_app_glue" + ld_path = env["ANDROID_NDK_ROOT"] + "/platforms/" + ndk_platform + "/" + env['ARCH'] + "/usr/lib" env.Append(CPPPATH=[gcc_include]) # env['CCFLAGS'] = string.split('-DNO_THREADS -MMD -MP -MF -fpic -ffunction-sections -funwind-tables -fstack-protector -D__ARM_ARCH_5__ -D__ARM_ARCH_5T__ -D__ARM_ARCH_5E__ -D__ARM_ARCH_5TE__ -Wno-psabi -march=armv5te -mtune=xscale -msoft-float -fno-exceptions -mthumb -fno-strict-aliasing -DANDROID -Wa,--noexecstack -DGLES2_ENABLED ') - env['neon_enabled']=False - if env['android_arch']=='x86': + env['neon_enabled'] = False + if env['android_arch'] == 'x86': env.Append(CCFLAGS=string.split('-DNO_STATVFS -fpic -ffunction-sections -funwind-tables -fstack-protector -fvisibility=hidden -D__GLIBC__ -Wno-psabi -ftree-vectorize -funsafe-math-optimizations -fno-strict-aliasing -DANDROID -Wa,--noexecstack -DGLES2_ENABLED')) - elif env["android_arch"]=="armv6": + elif env["android_arch"] == "armv6": env.Append(CCFLAGS=string.split('-DNO_STATVFS -fpic -ffunction-sections -funwind-tables -fstack-protector -fvisibility=hidden -D__ARM_ARCH_6__ -D__GLIBC__ -Wno-psabi -march=armv6 -mfpu=vfp -mfloat-abi=softfp -funsafe-math-optimizations -fno-strict-aliasing -DANDROID -Wa,--noexecstack -DGLES2_ENABLED')) - elif env["android_arch"]=="armv7": + elif env["android_arch"] == "armv7": env.Append(CCFLAGS=string.split('-DNO_STATVFS -fpic -ffunction-sections -funwind-tables -fstack-protector -fvisibility=hidden -D__ARM_ARCH_7__ -D__ARM_ARCH_7A__ -D__GLIBC__ -Wno-psabi -march=armv7-a -mfloat-abi=softfp -ftree-vectorize -funsafe-math-optimizations -fno-strict-aliasing -DANDROID -Wa,--noexecstack -DGLES2_ENABLED')) - if env['android_neon']=='yes': - env['neon_enabled']=True - env.Append(CCFLAGS=['-mfpu=neon','-D__ARM_NEON__']) + if env['android_neon'] == 'yes': + env['neon_enabled'] = True + env.Append(CCFLAGS=['-mfpu=neon', '-D__ARM_NEON__']) else: env.Append(CCFLAGS=['-mfpu=vfpv3-d16']) env.Append(LDPATH=[ld_path]) env.Append(LIBS=['OpenSLES']) # env.Append(LIBS=['c','m','stdc++','log','EGL','GLESv1_CM','GLESv2','OpenSLES','supc++','android']) - env.Append(LIBS=['EGL','OpenSLES','android']) - env.Append(LIBS=['c','m','stdc++','log','GLESv1_CM','GLESv2', 'z']) + env.Append(LIBS=['EGL', 'OpenSLES', 'android']) + env.Append(LIBS=['c', 'm', 'stdc++', 'log', 'GLESv1_CM', 'GLESv2', 'z']) - env["LINKFLAGS"]= string.split(" -g --sysroot="+ld_sysroot+" -Wl,--no-undefined -Wl,-z,noexecstack ") + env["LINKFLAGS"] = string.split(" -g --sysroot=" + ld_sysroot + " -Wl,--no-undefined -Wl,-z,noexecstack ") env.Append(LINKFLAGS=["-Wl,-soname,libgodot_android.so"]) - if (env["target"]=="release"): + if (env["target"] == "release"): - env.Append(CCFLAGS=['-O2', '-ffast-math','-fomit-frame-pointer']) + env.Append(CCFLAGS=['-O2', '-ffast-math', '-fomit-frame-pointer']) - elif (env["target"]=="release_debug"): + elif (env["target"] == "release_debug"): - env.Append(CCFLAGS=['-O2', '-ffast-math','-DDEBUG_ENABLED']) + env.Append(CCFLAGS=['-O2', '-ffast-math', '-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): + elif (env["target"] == "debug"): env.Append(CCFLAGS=['-D_DEBUG', '-g1', '-Wall', '-O0', '-DDEBUG_ENABLED']) env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ALLOC']) - env.Append(CPPFLAGS=['-DANDROID_ENABLED', '-DUNIX_ENABLED', '-DNO_FCNTL','-DMPC_FIXED_POINT']) + env.Append(CPPFLAGS=['-DANDROID_ENABLED', '-DUNIX_ENABLED', '-DNO_FCNTL', '-DMPC_FIXED_POINT']) # env.Append(CPPFLAGS=['-DANDROID_ENABLED', '-DUNIX_ENABLED','-DMPC_FIXED_POINT']) # TODO: Move that to opus module's config if("module_opus_enabled" in env and env["module_opus_enabled"] != "no"): - if (env["android_arch"]=="armv6" or env["android_arch"]=="armv7"): + if (env["android_arch"] == "armv6" or env["android_arch"] == "armv7"): env.Append(CFLAGS=["-DOPUS_ARM_OPT"]) - env.opus_fixed_point="yes" + env.opus_fixed_point = "yes" - if (env['android_stl']=='yes'): - #env.Append(CCFLAGS=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/system/include"]) - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/include"]) - if env['android_arch']=='x86': - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86/include"]) - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86"]) - elif env['android_arch']=='armv6': - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi/include"]) - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi"]) - elif env["android_arch"]=="armv7": - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a/include"]) - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a"]) + if (env['android_stl'] == 'yes'): + # env.Append(CCFLAGS=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/system/include"]) + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/gnu-libstdc++/4.9/include"]) + if env['android_arch'] == 'x86': + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86/include"]) + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86"]) + elif env['android_arch'] == 'armv6': + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi/include"]) + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi"]) + elif env["android_arch"] == "armv7": + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a/include"]) + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a"]) - env.Append(LIBS=["gnustl_static","supc++"]) - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cpufeatures"]) + env.Append(LIBS=["gnustl_static", "supc++"]) + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cpufeatures"]) - #env.Append(CCFLAGS=["-I"+env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/stlport/stlport"]) - #env.Append(CCFLAGS=["-I"+env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/libs/armeabi/include"]) - #env.Append(LINKFLAGS=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/libs/armeabi/libstdc++.a"]) + # env.Append(CCFLAGS=["-I"+env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/stlport/stlport"]) + # env.Append(CCFLAGS=["-I"+env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/libs/armeabi/include"]) + # env.Append(LINKFLAGS=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/libs/armeabi/libstdc++.a"]) else: - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/include"]) - env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cpufeatures"]) - if env['android_arch']=='x86': - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86"]) - elif env["android_arch"]=="armv6": - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi"]) - elif env["android_arch"]=="armv7": - env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"]+"/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a"]) + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/gnu-libstdc++/4.9/include"]) + env.Append(CPPPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cpufeatures"]) + if env['android_arch'] == 'x86': + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/gnu-libstdc++/4.9/libs/x86"]) + elif env["android_arch"] == "armv6": + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi"]) + elif env["android_arch"] == "armv7": + env.Append(LIBPATH=[env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi-v7a"]) env.Append(LIBS=['gnustl_static']) - env.Append(CCFLAGS=["-fno-exceptions",'-DNO_SAFE_CAST']) + env.Append(CCFLAGS=["-fno-exceptions", '-DNO_SAFE_CAST']) import methods - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append(BUILDERS={'GLSL120': env.Builder(action=methods.build_legacygl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL': env.Builder(action=methods.build_glsl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL120GLES': env.Builder(action=methods.build_gles2_headers, suffix='glsl.h', src_suffix='.glsl')}) env.use_windows_spawn_fix() diff --git a/platform/bb10/detect.py b/platform/bb10/detect.py index 76ccbe3af6d..ed320bebc17 100644 --- a/platform/bb10/detect.py +++ b/platform/bb10/detect.py @@ -53,7 +53,7 @@ def configure(env): env['OBJSUFFIX'] = ".qnx.${qnx_target}.o" env['LIBSUFFIX'] = ".qnx.${qnx_target}.a" env['PROGSUFFIX'] = ".qnx.${qnx_target}" - print("PROGSUFFIX: "+env['PROGSUFFIX']+" target: "+env['qnx_target']) + print("PROGSUFFIX: " + env['PROGSUFFIX'] + " target: " + env['qnx_target']) env.PrependENVPath('PATH', env['QNX_CONFIGURATION'] + '/bin') env.PrependENVPath('PATH', env['QNX_CONFIGURATION'] + '/usr/bin') @@ -66,23 +66,23 @@ def configure(env): env['AR'] = '$qnx_prefix-ar' env['RANLIB'] = '$qnx_prefix-ranlib' - env.Append(CPPPATH = ['#platform/bb10']) - env.Append(LIBPATH = ['#platform/bb10/lib/$qnx_target', '#platform/bb10/lib/$qnx_target_ver']) - env.Append(CCFLAGS = string.split('-DBB10_ENABLED -DUNIX_ENABLED -DGLES2_ENABLED -DGLES1_ENABLED -D_LITTLE_ENDIAN -DNO_THREADS -DNO_FCNTL')) - if env['bb10_exceptions']=="yes": - env.Append(CCFLAGS = ['-fexceptions']) + env.Append(CPPPATH=['#platform/bb10']) + env.Append(LIBPATH=['#platform/bb10/lib/$qnx_target', '#platform/bb10/lib/$qnx_target_ver']) + env.Append(CCFLAGS=string.split('-DBB10_ENABLED -DUNIX_ENABLED -DGLES2_ENABLED -DGLES1_ENABLED -D_LITTLE_ENDIAN -DNO_THREADS -DNO_FCNTL')) + if env['bb10_exceptions'] == "yes": + env.Append(CCFLAGS=['-fexceptions']) else: - env.Append(CCFLAGS = ['-fno-exceptions']) + env.Append(CCFLAGS=['-fno-exceptions']) - #env.Append(LINKFLAGS = string.split() + # env.Append(LINKFLAGS = string.split() - if (env["target"]=="release"): + if (env["target"] == "release"): - env.Append(CCFLAGS=['-O3','-DRELEASE_BUILD']) + env.Append(CCFLAGS=['-O3', '-DRELEASE_BUILD']) - elif (env["target"]=="debug"): + elif (env["target"] == "debug"): - env.Append(CCFLAGS=['-g', '-O0','-DDEBUG_ENABLED', '-D_DEBUG']) + env.Append(CCFLAGS=['-g', '-O0', '-DDEBUG_ENABLED', '-D_DEBUG']) env.Append(LINKFLAGS=['-g']) env.Append(LIBS=['bps', 'pps', 'screen', 'socket', 'EGL', 'GLESv2', 'GLESv1_CM', 'm', 'asound']) diff --git a/platform/haiku/SCsub b/platform/haiku/SCsub index bac287ef62c..23e5e2e7119 100644 --- a/platform/haiku/SCsub +++ b/platform/haiku/SCsub @@ -20,7 +20,7 @@ target = env.Program( command = env.Command('#bin/godot.rsrc', '#platform/haiku/godot.rdef', ['rc -o $TARGET $SOURCE']) -def addResourcesAction(target = None, source = None, env = None): +def addResourcesAction(target=None, source=None, env=None): return env.Execute('xres -o ' + File(target)[0].path + ' bin/godot.rsrc') env.AddPostAction(target, addResourcesAction) diff --git a/platform/haiku/detect.py b/platform/haiku/detect.py index aef423b404b..e64be0a188f 100644 --- a/platform/haiku/detect.py +++ b/platform/haiku/detect.py @@ -18,7 +18,7 @@ def can_build(): def get_opts(): return [ - ('debug_release', 'Add debug symbols to release version','no') + ('debug_release', 'Add debug symbols to release version', 'no') ] def get_flags(): @@ -28,34 +28,34 @@ def get_flags(): def configure(env): is64 = sys.maxsize > 2**32 - if (env["bits"]=="default"): + if (env["bits"] == "default"): if (is64): - env["bits"]="64" + env["bits"] = "64" else: - env["bits"]="32" + env["bits"] = "32" - env.Append(CPPPATH = ['#platform/haiku']) + env.Append(CPPPATH=['#platform/haiku']) env["CC"] = "gcc-x86" env["CXX"] = "g++-x86" - if (env["target"]=="release"): - if (env["debug_release"]=="yes"): + if (env["target"] == "release"): + if (env["debug_release"] == "yes"): env.Append(CCFLAGS=['-g2']) else: - env.Append(CCFLAGS=['-O3','-ffast-math']) - elif (env["target"]=="release_debug"): - env.Append(CCFLAGS=['-O2','-ffast-math','-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): - env.Append(CCFLAGS=['-g2', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) + env.Append(CCFLAGS=['-O3', '-ffast-math']) + elif (env["target"] == "release_debug"): + env.Append(CCFLAGS=['-O2', '-ffast-math', '-DDEBUG_ENABLED']) + elif (env["target"] == "debug"): + env.Append(CCFLAGS=['-g2', '-Wall', '-DDEBUG_ENABLED', '-DDEBUG_MEMORY_ENABLED']) - #env.Append(CCFLAGS=['-DFREETYPE_ENABLED']) - env.Append(CPPFLAGS = ['-DPTHREAD_NO_RENAME']) # TODO: enable when we have pthread_setname_np - env.Append(CPPFLAGS = ['-DOPENGL_ENABLED', '-DMEDIA_KIT_ENABLED']) - env.Append(CPPFLAGS = ['-DUNIX_ENABLED', '-DGLES2_ENABLED', '-DGLES_OVER_GL']) - env.Append(LIBS = ['be', 'game', 'media', 'network', 'bnetapi', 'z', 'GL']) + # env.Append(CCFLAGS=['-DFREETYPE_ENABLED']) + env.Append(CPPFLAGS=['-DPTHREAD_NO_RENAME']) # TODO: enable when we have pthread_setname_np + env.Append(CPPFLAGS=['-DOPENGL_ENABLED', '-DMEDIA_KIT_ENABLED']) + env.Append(CPPFLAGS=['-DUNIX_ENABLED', '-DGLES2_ENABLED', '-DGLES_OVER_GL']) + env.Append(LIBS=['be', 'game', 'media', 'network', 'bnetapi', 'z', 'GL']) import methods - env.Append(BUILDERS = {'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl')}) - env.Append(BUILDERS = {'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl')}) - env.Append(BUILDERS = {'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl')}) + env.Append(BUILDERS={'GLSL120': env.Builder(action=methods.build_legacygl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL': env.Builder(action=methods.build_glsl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL120GLES': env.Builder(action=methods.build_gles2_headers, suffix='glsl.h', src_suffix='.glsl')}) diff --git a/platform/iphone/SCsub b/platform/iphone/SCsub index 1d7797ebd3f..7e71faa377a 100644 --- a/platform/iphone/SCsub +++ b/platform/iphone/SCsub @@ -19,7 +19,7 @@ iphone_lib = [ 'ios.mm', ] -#env.Depends('#core/math/vector3.h', 'vector3_psp.h') +# env.Depends('#core/math/vector3.h', 'vector3_psp.h') #iphone_lib = env.Library('iphone', iphone_lib) @@ -30,7 +30,7 @@ if env['ios_gles22_override'] == "yes": env_ios.Append(CPPFLAGS=['-DGLES2_OVERRIDE']) -#if env['ios_appirater'] == "yes": +# if env['ios_appirater'] == "yes": # env_ios.Append(CPPFLAGS=['-DAPPIRATER_ENABLED']) @@ -38,5 +38,5 @@ obj = env_ios.Object('godot_iphone.cpp') prog = None prog = env_ios.Program('#bin/godot', [obj] + iphone_lib) -action = "$IPHONEPATH/usr/bin/dsymutil "+File(prog)[0].path+" -o " + File(prog)[0].path + ".dSYM" +action = "$IPHONEPATH/usr/bin/dsymutil " + File(prog)[0].path + " -o " + File(prog)[0].path + ".dSYM" env.AddPostAction(prog, action) diff --git a/platform/iphone/detect.py b/platform/iphone/detect.py index cea79d0f212..541487d5868 100644 --- a/platform/iphone/detect.py +++ b/platform/iphone/detect.py @@ -39,7 +39,7 @@ def get_flags(): ('tools', 'no'), ('webp', 'yes'), ('builtin_zlib', 'yes'), - ('openssl','builtin'), #use builtin openssl + ('openssl', 'builtin'), # use builtin openssl ] @@ -48,7 +48,7 @@ def configure(env): env.Append(CPPPATH=['#platform/iphone']) - env['ENV']['PATH'] = env['IPHONEPATH']+"/Developer/usr/bin/:"+env['ENV']['PATH'] + env['ENV']['PATH'] = env['IPHONEPATH'] + "/Developer/usr/bin/:" + env['ENV']['PATH'] env['CC'] = '$IPHONEPATH/usr/bin/${ios_triple}clang' env['CXX'] = '$IPHONEPATH/usr/bin/${ios_triple}clang++' @@ -56,21 +56,21 @@ def configure(env): env['RANLIB'] = '$IPHONEPATH/usr/bin/${ios_triple}ranlib' import string - if (env["ios_sim"]=="yes" or env["arch"] == "x86"): # i386, simulator - env["arch"]="x86" - env["bits"]="32" + if (env["ios_sim"] == "yes" or env["arch"] == "x86"): # i386, simulator + env["arch"] = "x86" + env["bits"] = "32" env['CCFLAGS'] = string.split('-arch i386 -fobjc-abi-version=2 -fobjc-legacy-dispatch -fmessage-length=0 -fpascal-strings -fasm-blocks -Wall -D__IPHONE_OS_VERSION_MIN_REQUIRED=40100 -isysroot $IPHONESDK -mios-simulator-version-min=4.3 -DCUSTOM_MATRIX_TRANSFORM_H=\\\"build/iphone/matrix4_iphone.h\\\" -DCUSTOM_VECTOR3_TRANSFORM_H=\\\"build/iphone/vector3_iphone.h\\\"') - elif (env["arch"]=="arm64"): # arm64 + elif (env["arch"] == "arm64"): # arm64 env["bits"] = "64" env['CCFLAGS'] = string.split('-fno-objc-arc -arch arm64 -fmessage-length=0 -fno-strict-aliasing -fdiagnostics-print-source-range-info -fdiagnostics-show-category=id -fdiagnostics-parseable-fixits -Wno-trigraphs -fpascal-strings -Wmissing-prototypes -Wreturn-type -Wparentheses -Wswitch -Wno-unused-parameter -Wunused-variable -Wunused-value -Wno-shorten-64-to-32 -fvisibility=hidden -Wno-sign-conversion -MMD -MT dependencies -miphoneos-version-min=5.1.1 -isysroot $IPHONESDK') env.Append(CPPFLAGS=['-DNEED_LONG_INT']) env.Append(CPPFLAGS=['-DLIBYUV_DISABLE_NEON']) - else: # armv7 + else: # armv7 env["arch"] = "arm" env["bits"] = "32" env['CCFLAGS'] = string.split('-fno-objc-arc -arch armv7 -fmessage-length=0 -fno-strict-aliasing -fdiagnostics-print-source-range-info -fdiagnostics-show-category=id -fdiagnostics-parseable-fixits -Wno-trigraphs -fpascal-strings -Wmissing-prototypes -Wreturn-type -Wparentheses -Wswitch -Wno-unused-parameter -Wunused-variable -Wunused-value -Wno-shorten-64-to-32 -isysroot /Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS5.0.sdk -fvisibility=hidden -Wno-sign-conversion -mthumb "-DIBOutlet=__attribute__((iboutlet))" "-DIBOutletCollection(ClassName)=__attribute__((iboutletcollection(ClassName)))" "-DIBAction=void)__attribute__((ibaction)" -miphoneos-version-min=5.1.1 -MMD -MT dependencies -isysroot $IPHONESDK') - if (env["arch"]=="x86"): + if (env["arch"] == "x86"): env['IPHONEPLATFORM'] = 'iPhoneSimulator' env.Append(LINKFLAGS=['-arch', 'i386', '-mios-simulator-version-min=4.3', '-isysroot', '$IPHONESDK', @@ -92,7 +92,7 @@ def configure(env): '-framework', 'SystemConfiguration', '-F$IPHONESDK', ]) - elif (env["arch"]=="arm64"): + elif (env["arch"] == "arm64"): env.Append(LINKFLAGS=['-arch', 'arm64', '-Wl,-dead_strip', '-miphoneos-version-min=5.1.1', '-isysroot', '$IPHONESDK', #'-stdlib=libc++', @@ -139,39 +139,39 @@ def configure(env): if env['icloud'] == 'yes': env.Append(CPPFLAGS=['-DICLOUD_ENABLED']) - env.Append(CPPPATH = ['$IPHONESDK/usr/include', '$IPHONESDK/System/Library/Frameworks/OpenGLES.framework/Headers', '$IPHONESDK/System/Library/Frameworks/AudioUnit.framework/Headers']) + env.Append(CPPPATH=['$IPHONESDK/usr/include', '$IPHONESDK/System/Library/Frameworks/OpenGLES.framework/Headers', '$IPHONESDK/System/Library/Frameworks/AudioUnit.framework/Headers']) - if (env["target"]=="release"): + if (env["target"] == "release"): - env.Append(CCFLAGS=['-O3', '-DNS_BLOCK_ASSERTIONS=1','-Wall', '-gdwarf-2']) # removed -ffast-math - env.Append(LINKFLAGS=['-O3']) # + env.Append(CCFLAGS=['-O3', '-DNS_BLOCK_ASSERTIONS=1', '-Wall', '-gdwarf-2']) # removed -ffast-math + env.Append(LINKFLAGS=['-O3']) elif env["target"] == "release_debug": - env.Append(CCFLAGS=['-Os', '-DNS_BLOCK_ASSERTIONS=1','-Wall','-DDEBUG_ENABLED']) + env.Append(CCFLAGS=['-Os', '-DNS_BLOCK_ASSERTIONS=1', '-Wall', '-DDEBUG_ENABLED']) env.Append(LINKFLAGS=['-Os']) env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ENABLED']) - elif (env["target"]=="debug"): + elif (env["target"] == "debug"): env.Append(CCFLAGS=['-D_DEBUG', '-DDEBUG=1', '-gdwarf-2', '-Wall', '-O0', '-DDEBUG_ENABLED']) env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ENABLED']) - elif (env["target"]=="profile"): + elif (env["target"] == "profile"): - env.Append(CCFLAGS=['-g','-pg', '-Os']) + env.Append(CCFLAGS=['-g', '-pg', '-Os']) env.Append(LINKFLAGS=['-pg']) - if (env["ios_sim"]=="yes"): #TODO: Check if needed? + if (env["ios_sim"] == "yes"): # TODO: Check if needed? env['ENV']['MACOSX_DEPLOYMENT_TARGET'] = '10.6' env['ENV']['CODESIGN_ALLOCATE'] = '/Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/codesign_allocate' env.Append(CPPFLAGS=['-DIPHONE_ENABLED', '-DUNIX_ENABLED', '-DGLES2_ENABLED', '-DMPC_FIXED_POINT']) # TODO: Move that to opus module's config if("module_opus_enabled" in env and env["module_opus_enabled"] != "no"): - env.opus_fixed_point="yes" - if env["arch"]=="x86": + env.opus_fixed_point = "yes" + if env["arch"] == "x86": pass - elif(env["arch"]=="arm64"): + elif(env["arch"] == "arm64"): env.Append(CFLAGS=["-DOPUS_ARM64_OPT"]) else: env.Append(CFLAGS=["-DOPUS_ARM_OPT"]) @@ -180,10 +180,10 @@ def configure(env): env.Append(CPPFLAGS=['-fexceptions']) else: env.Append(CPPFLAGS=['-fno-exceptions']) - #env['neon_enabled']=True + # env['neon_enabled']=True env['S_compiler'] = '$IPHONEPATH/Developer/usr/bin/gcc' import methods - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append(BUILDERS={'GLSL120': env.Builder(action=methods.build_legacygl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL': env.Builder(action=methods.build_glsl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL120GLES': env.Builder(action=methods.build_gles2_headers, suffix='glsl.h', src_suffix='.glsl')}) diff --git a/platform/javascript/SCsub b/platform/javascript/SCsub index 07852ac2065..5d5cd1590aa 100644 --- a/platform/javascript/SCsub +++ b/platform/javascript/SCsub @@ -14,24 +14,24 @@ env_javascript = env.Clone() if env['target'] == "profile": env_javascript.Append(CPPFLAGS=['-DPROFILER_ENABLED']) -javascript_objects=[] +javascript_objects = [] for x in javascript_files: - javascript_objects.append( env_javascript.Object( x ) ) + javascript_objects.append(env_javascript.Object(x)) -env.Append(LINKFLAGS=["-s","EXPORTED_FUNCTIONS=\"['_main','_audio_server_mix_function','_main_after_fs_sync']\""]) -env.Append(LINKFLAGS=["--shell-file",'"platform/javascript/godot_shell.html"']) +env.Append(LINKFLAGS=["-s", "EXPORTED_FUNCTIONS=\"['_main','_audio_server_mix_function','_main_after_fs_sync']\""]) +env.Append(LINKFLAGS=["--shell-file", '"platform/javascript/godot_shell.html"']) -build = env.Program('#bin/godot',javascript_objects,PROGSUFFIX=env["PROGSUFFIX"]+".html") +build = env.Program('#bin/godot', javascript_objects, PROGSUFFIX=env["PROGSUFFIX"] + ".html") def make_html_shell(target, source, env): - html_path = target[0].rstr() - assert html_path[:4] == 'bin/' - assert html_path[-5:] == '.html' - basename = html_path[4:-5] - with open(html_path, 'r+') as html_file: - fixed_html = html_file.read().replace('.html.mem', '.mem').replace(basename, '$GODOT_BASE') - html_file.seek(0) - html_file.truncate() - html_file.write(fixed_html) + html_path = target[0].rstr() + assert html_path[:4] == 'bin/' + assert html_path[-5:] == '.html' + basename = html_path[4:-5] + with open(html_path, 'r+') as html_file: + fixed_html = html_file.read().replace('.html.mem', '.mem').replace(basename, '$GODOT_BASE') + html_file.seek(0) + html_file.truncate() + html_file.write(fixed_html) env.AddPostAction(build, Action(make_html_shell, "Creating HTML shell file")) diff --git a/platform/javascript/detect.py b/platform/javascript/detect.py index ce6149c2503..56b41a49aca 100644 --- a/platform/javascript/detect.py +++ b/platform/javascript/detect.py @@ -18,8 +18,8 @@ def can_build(): def get_opts(): return [ - ['wasm','Compile to WebAssembly','no'], - ['javascript_eval','Enable JavaScript eval interface','yes'], + ['wasm', 'Compile to WebAssembly', 'no'], + ['javascript_eval', 'Enable JavaScript eval interface', 'yes'], ] def get_flags(): @@ -40,13 +40,13 @@ def configure(env): env.Append(CPPPATH=['#platform/javascript']) - em_path=os.environ["EMSCRIPTEN_ROOT"] + em_path = os.environ["EMSCRIPTEN_ROOT"] - env['ENV']['PATH'] = em_path+":"+env['ENV']['PATH'] - env['CC'] = em_path+'/emcc' - env['CXX'] = em_path+'/emcc' + env['ENV']['PATH'] = em_path + ":" + env['ENV']['PATH'] + env['CC'] = em_path + '/emcc' + env['CXX'] = em_path + '/emcc' #env['AR'] = em_path+"/emar" - env['AR'] = em_path+"/emcc" + env['AR'] = em_path + "/emcc" env['ARFLAGS'] = "-o" # env['RANLIB'] = em_path+"/emranlib" @@ -60,11 +60,11 @@ def configure(env): # env["LINKFLAGS"]= string.split(" -g --sysroot="+ld_sysroot+" -Wl,--no-undefined -Wl,-z,noexecstack ") - if (env["target"]=="release"): + if (env["target"] == "release"): env.Append(CCFLAGS=['-O2']) - elif (env["target"]=="release_debug"): - env.Append(CCFLAGS=['-O2','-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): + elif (env["target"] == "release_debug"): + env.Append(CCFLAGS=['-O2', '-DDEBUG_ENABLED']) + elif (env["target"] == "debug"): env.Append(CCFLAGS=['-D_DEBUG', '-Wall', '-O2', '-DDEBUG_ENABLED']) #env.Append(CCFLAGS=['-D_DEBUG', '-Wall', '-g4', '-DDEBUG_ENABLED']) env.Append(CPPFLAGS=['-DDEBUG_MEMORY_ALLOC']) @@ -73,33 +73,33 @@ def configure(env): if("module_opus_enabled" in env and env["module_opus_enabled"] != "no"): env.opus_fixed_point = "yes" - env.Append(CPPFLAGS=["-fno-exceptions",'-DNO_SAFE_CAST','-fno-rtti']) - env.Append(CPPFLAGS=['-DJAVASCRIPT_ENABLED', '-DUNIX_ENABLED', '-DPTHREAD_NO_RENAME', '-DNO_FCNTL','-DMPC_FIXED_POINT','-DTYPED_METHOD_BIND','-DNO_THREADS']) + env.Append(CPPFLAGS=["-fno-exceptions", '-DNO_SAFE_CAST', '-fno-rtti']) + env.Append(CPPFLAGS=['-DJAVASCRIPT_ENABLED', '-DUNIX_ENABLED', '-DPTHREAD_NO_RENAME', '-DNO_FCNTL', '-DMPC_FIXED_POINT', '-DTYPED_METHOD_BIND', '-DNO_THREADS']) env.Append(CPPFLAGS=['-DGLES2_ENABLED']) env.Append(CPPFLAGS=['-DGLES_NO_CLIENT_ARRAYS']) - env.Append(CPPFLAGS=['-s','FULL_ES2=1']) + env.Append(CPPFLAGS=['-s', 'FULL_ES2=1']) # env.Append(CPPFLAGS=['-DANDROID_ENABLED', '-DUNIX_ENABLED','-DMPC_FIXED_POINT']) if env['wasm'] == 'yes': - env.Append(LINKFLAGS=['-s','BINARYEN=1']) - env.Append(LINKFLAGS=['-s','\'BINARYEN_METHOD="native-wasm"\'']) - env["PROGSUFFIX"]+=".webassembly" + env.Append(LINKFLAGS=['-s', 'BINARYEN=1']) + env.Append(LINKFLAGS=['-s', '\'BINARYEN_METHOD="native-wasm"\'']) + env["PROGSUFFIX"] += ".webassembly" else: - env.Append(CPPFLAGS=['-s','ASM_JS=1']) - env.Append(LINKFLAGS=['-s','ASM_JS=1']) + env.Append(CPPFLAGS=['-s', 'ASM_JS=1']) + env.Append(LINKFLAGS=['-s', 'ASM_JS=1']) if env['javascript_eval'] == 'yes': env.Append(CPPFLAGS=['-DJAVASCRIPT_EVAL_ENABLED']) env.Append(LINKFLAGS=['-O2']) - #env.Append(LINKFLAGS=['-g4']) + # env.Append(LINKFLAGS=['-g4']) - #print "CCCOM is:", env.subst('$CCCOM') - #print "P: ", env['p'], " Platofrm: ", env['platform'] + # print "CCCOM is:", env.subst('$CCCOM') + # print "P: ", env['p'], " Platofrm: ", env['platform'] import methods - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append(BUILDERS={'GLSL120': env.Builder(action=methods.build_legacygl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL': env.Builder(action=methods.build_glsl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL120GLES': env.Builder(action=methods.build_gles2_headers, suffix='glsl.h', src_suffix='.glsl')}) #env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) diff --git a/platform/osx/SCsub b/platform/osx/SCsub index 68484b72886..c8e0e17612a 100644 --- a/platform/osx/SCsub +++ b/platform/osx/SCsub @@ -12,4 +12,4 @@ files = [ 'joystick_osx.cpp', ] -env.Program('#bin/godot',files) +env.Program('#bin/godot', files) diff --git a/platform/osx/detect.py b/platform/osx/detect.py index 358c4ef0152..ddb27742a0a 100644 --- a/platform/osx/detect.py +++ b/platform/osx/detect.py @@ -20,8 +20,8 @@ def can_build(): def get_opts(): return [ - ('force_64_bits','Force 64 bits binary','no'), - ('osxcross_sdk','OSXCross SDK version','darwin14'), + ('force_64_bits', 'Force 64 bits binary', 'no'), + ('osxcross_sdk', 'OSXCross SDK version', 'darwin14'), ] @@ -36,67 +36,67 @@ def configure(env): env.Append(CPPPATH=['#platform/osx']) - if (env["bits"]=="default"): - env["bits"]="32" + if (env["bits"] == "default"): + env["bits"] = "32" - if (env["target"]=="release"): + if (env["target"] == "release"): - env.Append(CCFLAGS=['-O2','-ffast-math','-fomit-frame-pointer','-ftree-vectorize','-msse2']) + env.Append(CCFLAGS=['-O2', '-ffast-math', '-fomit-frame-pointer', '-ftree-vectorize', '-msse2']) - elif (env["target"]=="release_debug"): + elif (env["target"] == "release_debug"): - env.Append(CCFLAGS=['-O2','-DDEBUG_ENABLED']) + env.Append(CCFLAGS=['-O2', '-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): + elif (env["target"] == "debug"): - env.Append(CCFLAGS=['-g3', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) + env.Append(CCFLAGS=['-g3', '-Wall', '-DDEBUG_ENABLED', '-DDEBUG_MEMORY_ENABLED']) if (not os.environ.has_key("OSXCROSS_ROOT")): - #regular native build - if (env["bits"]=="64"): + # regular native build + if (env["bits"] == "64"): env.Append(CCFLAGS=['-arch', 'x86_64']) env.Append(LINKFLAGS=['-arch', 'x86_64']) - elif (env["bits"]=="32"): + elif (env["bits"] == "32"): env.Append(CCFLAGS=['-arch', 'i386']) env.Append(LINKFLAGS=['-arch', 'i386']) else: env.Append(CCFLAGS=['-arch', 'i386', '-arch', 'x86_64']) env.Append(LINKFLAGS=['-arch', 'i386', '-arch', 'x86_64']) else: - #osxcross build - root=os.environ.get("OSXCROSS_ROOT",0) - if env["bits"]=="64": - basecmd=root+"/target/bin/x86_64-apple-"+env["osxcross_sdk"]+"-" + # osxcross build + root = os.environ.get("OSXCROSS_ROOT", 0) + if env["bits"] == "64": + basecmd = root + "/target/bin/x86_64-apple-" + env["osxcross_sdk"] + "-" else: - basecmd=root+"/target/bin/i386-apple-"+env["osxcross_sdk"]+"-" + basecmd = root + "/target/bin/i386-apple-" + env["osxcross_sdk"] + "-" - env['CC'] = basecmd+"cc" - env['CXX'] = basecmd+"c++" - env['AR'] = basecmd+"ar" - env['RANLIB'] = basecmd+"ranlib" - env['AS'] = basecmd+"as" + env['CC'] = basecmd + "cc" + env['CXX'] = basecmd + "c++" + env['AR'] = basecmd + "ar" + env['RANLIB'] = basecmd + "ranlib" + env['AS'] = basecmd + "as" env.Append(CPPFLAGS=["-DAPPLE_STYLE_KEYS"]) - env.Append(CPPFLAGS=['-DUNIX_ENABLED','-DGLES2_ENABLED','-DOSX_ENABLED']) + env.Append(CPPFLAGS=['-DUNIX_ENABLED', '-DGLES2_ENABLED', '-DOSX_ENABLED']) env.Append(LIBS=['pthread']) #env.Append(CPPFLAGS=['-F/Developer/SDKs/MacOSX10.4u.sdk/System/Library/Frameworks', '-isysroot', '/Developer/SDKs/MacOSX10.4u.sdk', '-mmacosx-version-min=10.4']) #env.Append(LINKFLAGS=['-mmacosx-version-min=10.4', '-isysroot', '/Developer/SDKs/MacOSX10.4u.sdk', '-Wl,-syslibroot,/Developer/SDKs/MacOSX10.4u.sdk']) - env.Append(LINKFLAGS=['-framework', 'Cocoa', '-framework', 'Carbon', '-framework', 'OpenGL', '-framework', 'AGL', '-framework', 'AudioUnit','-lz', '-framework', 'IOKit', '-framework', 'ForceFeedback']) + env.Append(LINKFLAGS=['-framework', 'Cocoa', '-framework', 'Carbon', '-framework', 'OpenGL', '-framework', 'AGL', '-framework', 'AudioUnit', '-lz', '-framework', 'IOKit', '-framework', 'ForceFeedback']) - if (env["CXX"]=="clang++"): + if (env["CXX"] == "clang++"): env.Append(CPPFLAGS=['-DTYPED_METHOD_BIND']) - env["CC"]="clang" - env["LD"]="clang++" + env["CC"] = "clang" + env["LD"] = "clang++" import methods - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append(BUILDERS={'GLSL120': env.Builder(action=methods.build_legacygl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL': env.Builder(action=methods.build_glsl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL120GLES': env.Builder(action=methods.build_gles2_headers, suffix='glsl.h', src_suffix='.glsl')}) #env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) - env["x86_libtheora_opt_gcc"]=True + env["x86_libtheora_opt_gcc"] = True diff --git a/platform/server/SCsub b/platform/server/SCsub index d53a003288f..30195bb9082 100644 --- a/platform/server/SCsub +++ b/platform/server/SCsub @@ -3,8 +3,8 @@ Import('env') -common_server=[\ +common_server = [\ "os_server.cpp",\ ] -env.Program('#bin/godot_server',['godot_server.cpp']+common_server) +env.Program('#bin/godot_server', ['godot_server.cpp'] + common_server) diff --git a/platform/server/detect.py b/platform/server/detect.py index c6e9d45f93f..9d23ceda706 100644 --- a/platform/server/detect.py +++ b/platform/server/detect.py @@ -12,16 +12,16 @@ def get_name(): def can_build(): - if (os.name!="posix"): + if (os.name != "posix"): return False - return True # enabled + return True # enabled def get_opts(): return [ - ('use_llvm','Use llvm compiler','no'), - ('force_32_bits','Force 32 bits binary','no') + ('use_llvm', 'Use llvm compiler', 'no'), + ('force_32_bits', 'Force 32 bits binary', 'no') ] def get_flags(): @@ -34,43 +34,43 @@ def get_flags(): def configure(env): env.Append(CPPPATH=['#platform/server']) - if (env["use_llvm"]=="yes"): - env["CC"]="clang" - env["CXX"]="clang++" - env["LD"]="clang++" + if (env["use_llvm"] == "yes"): + env["CC"] = "clang" + env["CXX"] = "clang++" + env["LD"] = "clang++" - is64=sys.maxsize > 2**32 + is64 = sys.maxsize > 2**32 - if (env["bits"]=="default"): + if (env["bits"] == "default"): if (is64): - env["bits"]="64" + env["bits"] = "64" else: - env["bits"]="32" + env["bits"] = "32" - #if (env["tools"]=="no"): + # if (env["tools"]=="no"): # #no tools suffix # env['OBJSUFFIX'] = ".nt"+env['OBJSUFFIX'] # env['LIBSUFFIX'] = ".nt"+env['LIBSUFFIX'] - if (env["target"]=="release"): + if (env["target"] == "release"): - env.Append(CCFLAGS=['-O2','-ffast-math','-fomit-frame-pointer']) + env.Append(CCFLAGS=['-O2', '-ffast-math', '-fomit-frame-pointer']) - elif (env["target"]=="release_debug"): + elif (env["target"] == "release_debug"): - env.Append(CCFLAGS=['-O2','-ffast-math','-DDEBUG_ENABLED']) + env.Append(CCFLAGS=['-O2', '-ffast-math', '-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): + elif (env["target"] == "debug"): - env.Append(CCFLAGS=['-g2', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) + env.Append(CCFLAGS=['-g2', '-Wall', '-DDEBUG_ENABLED', '-DDEBUG_MEMORY_ENABLED']) - env.Append(CPPFLAGS=['-DSERVER_ENABLED','-DUNIX_ENABLED']) - env.Append(LIBS=['pthread','z']) #TODO detect linux/BSD! + env.Append(CPPFLAGS=['-DSERVER_ENABLED', '-DUNIX_ENABLED']) + env.Append(LIBS=['pthread', 'z']) # TODO detect linux/BSD! - if (env["CXX"]=="clang++"): + if (env["CXX"] == "clang++"): env.Append(CPPFLAGS=['-DTYPED_METHOD_BIND']) - env["CC"]="clang" - env["LD"]="clang++" + env["CC"] = "clang" + env["LD"] = "clang++" diff --git a/platform/windows/SCsub b/platform/windows/SCsub index 00568952ea1..32c23b906ad 100644 --- a/platform/windows/SCsub +++ b/platform/windows/SCsub @@ -3,7 +3,7 @@ Import('env') -common_win=[ +common_win = [ "context_gl_win.cpp", "os_windows.cpp", "ctxgl_procaddr.cpp", @@ -14,16 +14,16 @@ common_win=[ "joystick.cpp", ] -restarget="godot_res"+env["OBJSUFFIX"] +restarget = "godot_res" + env["OBJSUFFIX"] -obj = env.RES(restarget,'godot_res.rc') +obj = env.RES(restarget, 'godot_res.rc') common_win.append(obj) -env.Program('#bin/godot',['godot_win.cpp']+common_win,PROGSUFFIX=env["PROGSUFFIX"]) +env.Program('#bin/godot', ['godot_win.cpp'] + common_win, PROGSUFFIX=env["PROGSUFFIX"]) # Microsoft Visual Studio Project Generation -if (env['vsproj'])=="yes": +if (env['vsproj']) == "yes": env.vs_srcs = env.vs_srcs + ["platform/windows/godot_win.cpp"] for x in common_win: env.vs_srcs = env.vs_srcs + ["platform/windows/" + str(x)] diff --git a/platform/windows/detect.py b/platform/windows/detect.py index 325f2838b23..4bb6dc4a966 100644 --- a/platform/windows/detect.py +++ b/platform/windows/detect.py @@ -85,7 +85,7 @@ ##### # TODO : -# +# # - finish to cleanup this script to remove all the remains of previous hacks and workarounds # - make it work with the Windows7 SDK that is supposed to enable 64bits compilation for MSVC2010-Express # - confirm it works well with other Visual Studio versions. @@ -107,21 +107,21 @@ def get_name(): def can_build(): - if (os.name=="nt"): - #building natively on windows! - if ( os.getenv("VCINSTALLDIR") ): + if (os.name == "nt"): + # building natively on windows! + if (os.getenv("VCINSTALLDIR")): return True else: print("\nMSVC not detected, attempting Mingw.") mingw32 = "" mingw64 = "" - if ( os.getenv("MINGW32_PREFIX") ) : + if (os.getenv("MINGW32_PREFIX")): mingw32 = os.getenv("MINGW32_PREFIX") - if ( os.getenv("MINGW64_PREFIX") ) : + if (os.getenv("MINGW64_PREFIX")): mingw64 = os.getenv("MINGW64_PREFIX") test = "gcc --version > NUL 2>&1" - if os.system(test)!= 0 and os.system(mingw32+test)!=0 and os.system(mingw64+test)!=0 : + if os.system(test) != 0 and os.system(mingw32 + test) != 0 and os.system(mingw64 + test) != 0: print("- could not detect gcc.") print("Please, make sure a path to a Mingw /bin directory is accessible into the environment PATH.\n") return False @@ -130,57 +130,57 @@ def can_build(): return True - if (os.name=="posix"): + if (os.name == "posix"): mingw = "i586-mingw32msvc-" mingw64 = "x86_64-w64-mingw32-" mingw32 = "i686-w64-mingw32-" if (os.getenv("MINGW32_PREFIX")): - mingw32=os.getenv("MINGW32_PREFIX") + mingw32 = os.getenv("MINGW32_PREFIX") mingw = mingw32 if (os.getenv("MINGW64_PREFIX")): - mingw64=os.getenv("MINGW64_PREFIX") + mingw64 = os.getenv("MINGW64_PREFIX") test = "gcc --version &>/dev/null" - if (os.system(mingw+test) == 0 or os.system(mingw64+test) == 0 or os.system(mingw32+test) == 0): + if (os.system(mingw + test) == 0 or os.system(mingw64 + test) == 0 or os.system(mingw32 + test) == 0): return True return False def get_opts(): - mingw="" - mingw32="" - mingw64="" - if ( os.name == "posix" ): + mingw = "" + mingw32 = "" + mingw64 = "" + if (os.name == "posix"): mingw = "i586-mingw32msvc-" mingw32 = "i686-w64-mingw32-" mingw64 = "x86_64-w64-mingw32-" - if os.system(mingw32+"gcc --version &>/dev/null") != 0 : + if os.system(mingw32 + "gcc --version &>/dev/null") != 0: mingw32 = mingw if (os.getenv("MINGW32_PREFIX")): - mingw32=os.getenv("MINGW32_PREFIX") + mingw32 = os.getenv("MINGW32_PREFIX") mingw = mingw32 if (os.getenv("MINGW64_PREFIX")): - mingw64=os.getenv("MINGW64_PREFIX") + mingw64 = os.getenv("MINGW64_PREFIX") return [ - ('mingw_prefix','Mingw Prefix',mingw32), - ('mingw_prefix_64','Mingw Prefix 64 bits',mingw64), + ('mingw_prefix', 'Mingw Prefix', mingw32), + ('mingw_prefix_64', 'Mingw Prefix 64 bits', mingw64), ] def get_flags(): return [ ('builtin_zlib', 'yes'), - ('openssl','builtin'), #use builtin openssl + ('openssl', 'builtin'), # use builtin openssl ] -def build_res_file( target, source, env ): +def build_res_file(target, source, env): cmdbase = "" if (env["bits"] == "32"): @@ -193,7 +193,7 @@ def build_res_file( target, source, env ): for x in range(len(source)): cmd = cmdbase + '-i ' + str(source[x]) + ' -o ' + str(target[x]) try: - out = subprocess.Popen(cmd,shell = True,stderr = subprocess.PIPE).communicate() + out = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE).communicate() if len(out[1]): return 1 except: @@ -203,66 +203,66 @@ def build_res_file( target, source, env ): def configure(env): env.Append(CPPPATH=['#platform/windows']) - env['is_mingw']=False - if (os.name=="nt" and os.getenv("VCINSTALLDIR") ): - #build using visual studio + env['is_mingw'] = False + if (os.name == "nt" and os.getenv("VCINSTALLDIR")): + # build using visual studio env['ENV']['TMP'] = os.environ['TMP'] env.Append(CPPPATH=['#platform/windows/include']) env.Append(LIBPATH=['#platform/windows/lib']) - if (env["target"]=="release"): + if (env["target"] == "release"): env.Append(CCFLAGS=['/O2']) env.Append(LINKFLAGS=['/SUBSYSTEM:WINDOWS']) env.Append(LINKFLAGS=['/ENTRY:mainCRTStartup']) - elif (env["target"]=="release_debug"): + elif (env["target"] == "release_debug"): - env.Append(CCFLAGS=['/O2','/DDEBUG_ENABLED']) + env.Append(CCFLAGS=['/O2', '/DDEBUG_ENABLED']) env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) - elif (env["target"]=="debug_release"): + elif (env["target"] == "debug_release"): - env.Append(CCFLAGS=['/Z7','/Od']) + env.Append(CCFLAGS=['/Z7', '/Od']) env.Append(LINKFLAGS=['/DEBUG']) env.Append(LINKFLAGS=['/SUBSYSTEM:WINDOWS']) env.Append(LINKFLAGS=['/ENTRY:mainCRTStartup']) - elif (env["target"]=="debug"): + elif (env["target"] == "debug"): - env.Append(CCFLAGS=['/Z7','/DDEBUG_ENABLED','/DDEBUG_MEMORY_ENABLED','/DD3D_DEBUG_INFO','/Od']) + env.Append(CCFLAGS=['/Z7', '/DDEBUG_ENABLED', '/DDEBUG_MEMORY_ENABLED', '/DD3D_DEBUG_INFO', '/Od']) env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) env.Append(LINKFLAGS=['/DEBUG']) - env.Append(CCFLAGS=['/MT','/Gd','/GR','/nologo']) + env.Append(CCFLAGS=['/MT', '/Gd', '/GR', '/nologo']) env.Append(CXXFLAGS=['/TP']) env.Append(CPPFLAGS=['/DMSVC', '/GR', ]) - env.Append(CCFLAGS=['/I'+os.getenv("WindowsSdkDir")+"/Include"]) + env.Append(CCFLAGS=['/I' + os.getenv("WindowsSdkDir") + "/Include"]) env.Append(CCFLAGS=['/DWINDOWS_ENABLED']) env.Append(CCFLAGS=['/DRTAUDIO_ENABLED']) env.Append(CCFLAGS=['/DWIN32']) env.Append(CCFLAGS=['/DTYPED_METHOD_BIND']) env.Append(CCFLAGS=['/DGLES2_ENABLED']) - LIBS=['winmm','opengl32','dsound','kernel32','ole32','oleaut32','user32','gdi32', 'IPHLPAPI','Shlwapi', 'wsock32','Ws2_32', 'shell32','advapi32','dinput8','dxguid'] - env.Append(LINKFLAGS=[p+env["LIBSUFFIX"] for p in LIBS]) + LIBS = ['winmm', 'opengl32', 'dsound', 'kernel32', 'ole32', 'oleaut32', 'user32', 'gdi32', 'IPHLPAPI', 'Shlwapi', 'wsock32', 'Ws2_32', 'shell32', 'advapi32', 'dinput8', 'dxguid'] + env.Append(LINKFLAGS=[p + env["LIBSUFFIX"] for p in LIBS]) - env.Append(LIBPATH=[os.getenv("WindowsSdkDir")+"/Lib"]) + env.Append(LIBPATH=[os.getenv("WindowsSdkDir") + "/Lib"]) if (os.getenv("DXSDK_DIR")): - DIRECTX_PATH=os.getenv("DXSDK_DIR") + DIRECTX_PATH = os.getenv("DXSDK_DIR") else: - DIRECTX_PATH="C:/Program Files/Microsoft DirectX SDK (March 2009)" + DIRECTX_PATH = "C:/Program Files/Microsoft DirectX SDK (March 2009)" if (os.getenv("VCINSTALLDIR")): - VC_PATH=os.getenv("VCINSTALLDIR") + VC_PATH = os.getenv("VCINSTALLDIR") else: - VC_PATH="" + VC_PATH = "" env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")]) env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")]) - env.Append(CCFLAGS=["/I"+DIRECTX_PATH+"/Include"]) - env.Append(LIBPATH=[DIRECTX_PATH+"/Lib/x86"]) + env.Append(CCFLAGS=["/I" + DIRECTX_PATH + "/Include"]) + env.Append(LIBPATH=[DIRECTX_PATH + "/Lib/x86"]) env['ENV'] = os.environ; # This detection function needs the tools env (that is env['ENV'], not SCons's env), and that is why it's this far bellow in the code @@ -272,26 +272,26 @@ def configure(env): if(env["bits"] != "default"): print "Error: bits argument is disabled for MSVC" print ("Bits argument is not supported for MSVC compilation. Architecture depends on the Native/Cross Compile Tools Prompt/Developer Console (or Visual Studio settings)" - +" that is being used to run SCons. As a consequence, bits argument is disabled. Run scons again without bits argument (example: scons p=windows) and SCons will attempt to detect what MSVC compiler" - +" will be executed and inform you.") + + " that is being used to run SCons. As a consequence, bits argument is disabled. Run scons again without bits argument (example: scons p=windows) and SCons will attempt to detect what MSVC compiler" + + " will be executed and inform you.") sys.exit() # Forcing bits argument because MSVC does not have a flag to set this through SCons... it's different compilers (cl.exe's) called from the propper command prompt # that decide the architecture that is build for. Scons can only detect the os.getenviron (because vsvarsall.bat sets a lot of stuff for cl.exe to work with) - env["bits"]="32" - env["x86_libtheora_opt_vc"]=True + env["bits"] = "32" + env["x86_libtheora_opt_vc"] = True - print "Detected MSVC compiler: "+compiler_version_str + print "Detected MSVC compiler: " + compiler_version_str # If building for 64bit architecture, disable assembly optimisations for 32 bit builds (theora as of writting)... vc compiler for 64bit can not compile _asm if(compiler_version_str == "amd64" or compiler_version_str == "x86_amd64"): - env["bits"]="64" - env["x86_libtheora_opt_vc"]=False + env["bits"] = "64" + env["x86_libtheora_opt_vc"] = False print "Compiled program architecture will be a 64 bit executable (forcing bits=64)." - elif (compiler_version_str=="x86" or compiler_version_str == "amd64_x86"): + elif (compiler_version_str == "x86" or compiler_version_str == "amd64_x86"): print "Compiled program architecture will be a 32 bit executable. (forcing bits=32)." else: print "Failed to detect MSVC compiler architecture version... Defaulting to 32bit executable settings (forcing bits=32). Compilation attempt will continue, but SCons can not detect for what architecture this build is compiled for. You should check your settings/compilation setup." - if env["bits"]=="64": + if env["bits"] == "64": env.Append(CCFLAGS=['/D_WIN64']) # Incremental linking fix @@ -304,32 +304,32 @@ def configure(env): # http://www.scons.org/wiki/LongCmdLinesOnWin32 env.use_windows_spawn_fix() - #build using mingw - if (os.name=="nt"): - env['ENV']['TMP'] = os.environ['TMP'] #way to go scons, you can be so stupid sometimes + # build using mingw + if (os.name == "nt"): + env['ENV']['TMP'] = os.environ['TMP'] # way to go scons, you can be so stupid sometimes else: - env["PROGSUFFIX"]=env["PROGSUFFIX"]+".exe" # for linux cross-compilation + env["PROGSUFFIX"] = env["PROGSUFFIX"] + ".exe" # for linux cross-compilation - mingw_prefix="" + mingw_prefix = "" - if (env["bits"]=="default"): - env["bits"]="32" + if (env["bits"] == "default"): + env["bits"] = "32" - if (env["bits"]=="32"): + if (env["bits"] == "32"): env.Append(LINKFLAGS=['-static']) env.Append(LINKFLAGS=['-static-libgcc']) env.Append(LINKFLAGS=['-static-libstdc++']) - mingw_prefix=env["mingw_prefix"]; + mingw_prefix = env["mingw_prefix"]; else: env.Append(LINKFLAGS=['-static']) - mingw_prefix=env["mingw_prefix_64"]; + mingw_prefix = env["mingw_prefix_64"]; - nulstr="" + nulstr = "" - if (os.name=="posix"): - nulstr=">/dev/null" + if (os.name == "posix"): + nulstr = ">/dev/null" else: - nulstr=">nul" + nulstr = ">nul" @@ -338,42 +338,42 @@ def configure(env): # print("Can't find Windows compiler: "+mingw_prefix) # sys.exit(255) - if (env["target"]=="release"): + if (env["target"] == "release"): env.Append(CCFLAGS=['-msse2']) - if (env["bits"]=="64"): + if (env["bits"] == "64"): env.Append(CCFLAGS=['-O3']) else: env.Append(CCFLAGS=['-O2']) env.Append(LINKFLAGS=['-Wl,--subsystem,windows']) - elif (env["target"]=="release_debug"): + elif (env["target"] == "release_debug"): - env.Append(CCFLAGS=['-O2','-DDEBUG_ENABLED']) + env.Append(CCFLAGS=['-O2', '-DDEBUG_ENABLED']) - elif (env["target"]=="debug"): + elif (env["target"] == "debug"): - env.Append(CCFLAGS=['-g', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) + env.Append(CCFLAGS=['-g', '-Wall', '-DDEBUG_ENABLED', '-DDEBUG_MEMORY_ENABLED']) - env["CC"]=mingw_prefix+"gcc" - env['AS']=mingw_prefix+"as" - env['CXX'] = mingw_prefix+"g++" - env['AR'] = mingw_prefix+"ar" - env['RANLIB'] = mingw_prefix+"ranlib" - env['LD'] = mingw_prefix+"g++" - env["x86_libtheora_opt_gcc"]=True + env["CC"] = mingw_prefix + "gcc" + env['AS'] = mingw_prefix + "as" + env['CXX'] = mingw_prefix + "g++" + env['AR'] = mingw_prefix + "ar" + env['RANLIB'] = mingw_prefix + "ranlib" + env['LD'] = mingw_prefix + "g++" + env["x86_libtheora_opt_gcc"] = True #env['CC'] = "winegcc" #env['CXX'] = "wineg++" - env.Append(CCFLAGS=['-DWINDOWS_ENABLED','-mwindows']) + env.Append(CCFLAGS=['-DWINDOWS_ENABLED', '-mwindows']) env.Append(CPPFLAGS=['-DRTAUDIO_ENABLED']) env.Append(CCFLAGS=['-DGLES2_ENABLED']) - env.Append(LIBS=['mingw32','opengl32', 'dsound', 'ole32', 'd3d9','winmm','gdi32','iphlpapi','shlwapi','wsock32','ws2_32','kernel32', 'oleaut32', 'dinput8', 'dxguid']) + env.Append(LIBS=['mingw32', 'opengl32', 'dsound', 'ole32', 'd3d9', 'winmm', 'gdi32', 'iphlpapi', 'shlwapi', 'wsock32', 'ws2_32', 'kernel32', 'oleaut32', 'dinput8', 'dxguid']) # if (env["bits"]=="32"): # env.Append(LIBS=['gcc_s']) @@ -386,13 +386,13 @@ def configure(env): #'d3dx9d' env.Append(CPPFLAGS=['-DMINGW_ENABLED']) - #env.Append(LINKFLAGS=['-g']) + # env.Append(LINKFLAGS=['-g']) # resrc - env['is_mingw']=True - env.Append( BUILDERS = { 'RES' : env.Builder(action = build_res_file, suffix = '.o',src_suffix = '.rc') } ) + env['is_mingw'] = True + env.Append(BUILDERS={'RES': env.Builder(action=build_res_file, suffix='.o', src_suffix='.rc')}) - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append(BUILDERS={'GLSL120': env.Builder(action=methods.build_legacygl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL': env.Builder(action=methods.build_glsl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'HLSL9': env.Builder(action=methods.build_hlsl_dx9_headers, suffix='hlsl.h', src_suffix='.hlsl')}) + env.Append(BUILDERS={'GLSL120GLES': env.Builder(action=methods.build_gles2_headers, suffix='glsl.h', src_suffix='.glsl')}) diff --git a/platform/winrt/detect.py b/platform/winrt/detect.py index 06bae980e95..19a6324e1e8 100644 --- a/platform/winrt/detect.py +++ b/platform/winrt/detect.py @@ -12,8 +12,8 @@ def get_name(): return "WinRT" def can_build(): - if (os.name=="nt"): - #building natively on windows! + if (os.name == "nt"): + # building natively on windows! if (os.getenv("VSINSTALLDIR")): if (os.getenv("ANGLE_SRC_PATH") == None): @@ -40,8 +40,8 @@ def configure(env): if(env["bits"] != "default"): print "Error: bits argument is disabled for MSVC" print ("Bits argument is not supported for MSVC compilation. Architecture depends on the Native/Cross Compile Tools Prompt/Developer Console (or Visual Studio settings)" - +" that is being used to run SCons. As a consequence, bits argument is disabled. Run scons again without bits argument (example: scons p=winrt) and SCons will attempt to detect what MSVC compiler" - +" will be executed and inform you.") + + " that is being used to run SCons. As a consequence, bits argument is disabled. Run scons again without bits argument (example: scons p=winrt) and SCons will attempt to detect what MSVC compiler" + + " will be executed and inform you.") sys.exit() arch = "" @@ -60,8 +60,8 @@ def configure(env): print "Compiled program architecture will be an ARM executable. (forcing bits=32)." - arch="arm" - env["bits"]="32" + arch = "arm" + env["bits"] = "32" env.Append(LINKFLAGS=['/MACHINE:ARM']) env.Append(LIBPATH=[os.environ['VCINSTALLDIR'] + 'lib/store/arm']) @@ -74,14 +74,14 @@ def configure(env): compiler_version_str = methods.detect_visual_c_compiler_version(env['ENV']) if(compiler_version_str == "amd64" or compiler_version_str == "x86_amd64"): - env["bits"]="64" + env["bits"] = "64" print "Compiled program architecture will be a x64 executable (forcing bits=64)." - elif (compiler_version_str=="x86" or compiler_version_str == "amd64_x86"): - env["bits"]="32" + elif (compiler_version_str == "x86" or compiler_version_str == "amd64_x86"): + env["bits"] = "32" print "Compiled program architecture will be a x86 executable. (forcing bits=32)." else: print "Failed to detect MSVC compiler architecture version... Defaulting to 32bit executable settings (forcing bits=32). Compilation attempt will continue, but SCons can not detect for what architecture this build is compiled for. You should check your settings/compilation setup." - env["bits"]="32" + env["bits"] = "32" if (env["bits"] == "32"): arch = "x86" @@ -102,29 +102,29 @@ def configure(env): env.Append(LIBPATH=[os.environ['VCINSTALLDIR'] + 'lib/store/amd64']) env.Append(LIBPATH=[angle_root + '/winrt/10/src/Release_x64/lib']) - env.Append(CPPPATH=['#platform/winrt','#drivers/windows']) + env.Append(CPPPATH=['#platform/winrt', '#drivers/windows']) env.Append(LINKFLAGS=['/MANIFEST:NO', '/NXCOMPAT', '/DYNAMICBASE', '/WINMD', '/APPCONTAINER', '/ERRORREPORT:PROMPT', '/NOLOGO', '/TLBID:1', '/NODEFAULTLIB:"kernel32.lib"', '/NODEFAULTLIB:"ole32.lib"']) - env.Append(CPPFLAGS=['/D','__WRL_NO_DEFAULT_LIB__','/D','WIN32']) + env.Append(CPPFLAGS=['/D', '__WRL_NO_DEFAULT_LIB__', '/D', 'WIN32']) env.Append(CPPFLAGS=['/FU', os.environ['VCINSTALLDIR'] + 'lib/store/references/platform.winmd']) env.Append(CPPFLAGS=['/AI', os.environ['VCINSTALLDIR'] + 'lib/store/references']) env.Append(LIBPATH=[os.environ['VCINSTALLDIR'] + 'lib/store/references']) - if (env["target"]=="release"): + if (env["target"] == "release"): env.Append(CPPFLAGS=['/O2', '/GL']) env.Append(CPPFLAGS=['/MD']) env.Append(LINKFLAGS=['/SUBSYSTEM:WINDOWS', '/LTCG']) - elif (env["target"]=="release_debug"): + elif (env["target"] == "release_debug"): - env.Append(CCFLAGS=['/O2','/Zi','/DDEBUG_ENABLED']) + env.Append(CCFLAGS=['/O2', '/Zi', '/DDEBUG_ENABLED']) env.Append(CPPFLAGS=['/MD']) env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) - elif (env["target"]=="debug"): + elif (env["target"] == "debug"): - env.Append(CCFLAGS=['/Zi','/DDEBUG_ENABLED','/DDEBUG_MEMORY_ENABLED']) + env.Append(CCFLAGS=['/Zi', '/DDEBUG_ENABLED', '/DDEBUG_MEMORY_ENABLED']) env.Append(CPPFLAGS=['/MDd']) env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) env.Append(LINKFLAGS=['/DEBUG']) @@ -132,18 +132,18 @@ def configure(env): env.Append(CCFLAGS=string.split('/FS /MP /GS /wd"4453" /wd"28204" /wd"4291" /Zc:wchar_t /Gm- /fp:precise /D "_UNICODE" /D "UNICODE" /D "WINAPI_FAMILY=WINAPI_FAMILY_APP" /errorReport:prompt /WX- /Zc:forScope /Gd /EHsc /nologo')) env.Append(CXXFLAGS=string.split('/ZW /FS')) - env.Append(CCFLAGS=['/AI', os.environ['VCINSTALLDIR']+'\\vcpackages', '/AI', os.environ['WINDOWSSDKDIR']+'\\References\\CommonConfiguration\\Neutral']) + env.Append(CCFLAGS=['/AI', os.environ['VCINSTALLDIR'] + '\\vcpackages', '/AI', os.environ['WINDOWSSDKDIR'] + '\\References\\CommonConfiguration\\Neutral']) - env["PROGSUFFIX"]="."+arch+env["PROGSUFFIX"] - env["OBJSUFFIX"]="."+arch+env["OBJSUFFIX"] - env["LIBSUFFIX"]="."+arch+env["LIBSUFFIX"] + env["PROGSUFFIX"] = "." + arch + env["PROGSUFFIX"] + env["OBJSUFFIX"] = "." + arch + env["OBJSUFFIX"] + env["LIBSUFFIX"] = "." + arch + env["LIBSUFFIX"] env.Append(CCFLAGS=['/DWINRT_ENABLED']) env.Append(CCFLAGS=['/DWINDOWS_ENABLED']) env.Append(CCFLAGS=['/DTYPED_METHOD_BIND']) - env.Append(CCFLAGS=['/DGLES2_ENABLED','/DGL_GLEXT_PROTOTYPES','/DEGL_EGLEXT_PROTOTYPES','/DANGLE_ENABLED']) + env.Append(CCFLAGS=['/DGLES2_ENABLED', '/DGL_GLEXT_PROTOTYPES', '/DEGL_EGLEXT_PROTOTYPES', '/DANGLE_ENABLED']) LIBS = [ 'WindowsApp', @@ -152,15 +152,15 @@ def configure(env): 'libEGL', 'libGLESv2', ] - env.Append(LINKFLAGS=[p+".lib" for p in LIBS]) + env.Append(LINKFLAGS=[p + ".lib" for p in LIBS]) # Incremental linking fix env['BUILDERS']['ProgramOriginal'] = env['BUILDERS']['Program'] env['BUILDERS']['Program'] = methods.precious_program - env.Append( BUILDERS = { 'ANGLE' : env.Builder(action = angle_build_cmd) } ) + env.Append(BUILDERS={'ANGLE': env.Builder(action=angle_build_cmd)}) - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append(BUILDERS={'GLSL120': env.Builder(action=methods.build_legacygl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL': env.Builder(action=methods.build_glsl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'HLSL9': env.Builder(action=methods.build_hlsl_dx9_headers, suffix='hlsl.h', src_suffix='.hlsl')}) + env.Append(BUILDERS={'GLSL120GLES': env.Builder(action=methods.build_gles2_headers, suffix='glsl.h', src_suffix='.glsl')}) diff --git a/platform/x11/SCsub b/platform/x11/SCsub index 93cd2ca267b..0defd4f0255 100644 --- a/platform/x11/SCsub +++ b/platform/x11/SCsub @@ -3,11 +3,11 @@ Import('env') -common_x11=[\ +common_x11 = [\ "context_gl_x11.cpp",\ "os_x11.cpp",\ "key_mapping_x11.cpp",\ "joystick_linux.cpp",\ ] -env.Program('#bin/godot',['godot_x11.cpp']+common_x11) +env.Program('#bin/godot', ['godot_x11.cpp'] + common_x11) diff --git a/platform/x11/detect.py b/platform/x11/detect.py index 593e3fbb9d5..37c787179fe 100644 --- a/platform/x11/detect.py +++ b/platform/x11/detect.py @@ -13,56 +13,56 @@ def get_name(): def can_build(): - if (os.name!="posix"): + if (os.name != "posix"): return False if sys.platform == "darwin": - return False # no x11 on mac for now + return False # no x11 on mac for now - errorval=os.system("pkg-config --version > /dev/null") + errorval = os.system("pkg-config --version > /dev/null") if (errorval): print("pkg-config not found.. x11 disabled.") return False - x11_error=os.system("pkg-config x11 --modversion > /dev/null ") + x11_error = os.system("pkg-config x11 --modversion > /dev/null ") if (x11_error): print("X11 not found.. x11 disabled.") return False - ssl_error=os.system("pkg-config openssl --modversion > /dev/null ") + ssl_error = os.system("pkg-config openssl --modversion > /dev/null ") if (ssl_error): print("OpenSSL not found.. x11 disabled.") return False - x11_error=os.system("pkg-config xcursor --modversion > /dev/null ") + x11_error = os.system("pkg-config xcursor --modversion > /dev/null ") if (x11_error): print("xcursor not found.. x11 disabled.") return False - x11_error=os.system("pkg-config xinerama --modversion > /dev/null ") + x11_error = os.system("pkg-config xinerama --modversion > /dev/null ") if (x11_error): print("xinerama not found.. x11 disabled.") return False - x11_error=os.system("pkg-config xrandr --modversion > /dev/null ") + x11_error = os.system("pkg-config xrandr --modversion > /dev/null ") if (x11_error): print("xrandr not found.. x11 disabled.") return False - return True # X11 enabled + return True # X11 enabled def get_opts(): return [ - ('use_llvm','Use llvm compiler','no'), - ('use_static_cpp','link stdc++ statically','no'), - ('use_sanitizer','Use llvm compiler sanitize address','no'), - ('use_leak_sanitizer','Use llvm compiler sanitize memory leaks','no'), - ('pulseaudio','Detect & Use pulseaudio','yes'), - ('udev','Use udev for gamepad connection callbacks','no'), - ('debug_release', 'Add debug symbols to release version','no'), + ('use_llvm', 'Use llvm compiler', 'no'), + ('use_static_cpp', 'link stdc++ statically', 'no'), + ('use_sanitizer', 'Use llvm compiler sanitize address', 'no'), + ('use_leak_sanitizer', 'Use llvm compiler sanitize memory leaks', 'no'), + ('pulseaudio', 'Detect & Use pulseaudio', 'yes'), + ('udev', 'Use udev for gamepad connection callbacks', 'no'), + ('debug_release', 'Add debug symbols to release version', 'no'), ] def get_flags(): @@ -77,56 +77,56 @@ def get_flags(): def configure(env): - is64=sys.maxsize > 2**32 + is64 = sys.maxsize > 2**32 - if (env["bits"]=="default"): + if (env["bits"] == "default"): if (is64): - env["bits"]="64" + env["bits"] = "64" else: - env["bits"]="32" + env["bits"] = "32" env.Append(CPPPATH=['#platform/x11']) - if (env["use_llvm"]=="yes"): + if (env["use_llvm"] == "yes"): if 'clang++' not in env['CXX']: - env["CC"]="clang" - env["CXX"]="clang++" - env["LD"]="clang++" + env["CC"] = "clang" + env["CXX"] = "clang++" + env["LD"] = "clang++" env.Append(CPPFLAGS=['-DTYPED_METHOD_BIND']) - env.extra_suffix=".llvm" + env.extra_suffix = ".llvm" - if (env["use_sanitizer"]=="yes"): - env.Append(CXXFLAGS=['-fsanitize=address','-fno-omit-frame-pointer']) + if (env["use_sanitizer"] == "yes"): + env.Append(CXXFLAGS=['-fsanitize=address', '-fno-omit-frame-pointer']) env.Append(LINKFLAGS=['-fsanitize=address']) - env.extra_suffix+="s" + env.extra_suffix += "s" - if (env["use_leak_sanitizer"]=="yes"): - env.Append(CXXFLAGS=['-fsanitize=address','-fno-omit-frame-pointer']) + if (env["use_leak_sanitizer"] == "yes"): + env.Append(CXXFLAGS=['-fsanitize=address', '-fno-omit-frame-pointer']) env.Append(LINKFLAGS=['-fsanitize=address']) - env.extra_suffix+="s" + env.extra_suffix += "s" - #if (env["tools"]=="no"): + # if (env["tools"]=="no"): # #no tools suffix # env['OBJSUFFIX'] = ".nt"+env['OBJSUFFIX'] # env['LIBSUFFIX'] = ".nt"+env['LIBSUFFIX'] - if (env["target"]=="release"): + if (env["target"] == "release"): - if (env["debug_release"]=="yes"): + if (env["debug_release"] == "yes"): env.Append(CCFLAGS=['-g2']) else: - env.Append(CCFLAGS=['-O3','-ffast-math']) + env.Append(CCFLAGS=['-O3', '-ffast-math']) - elif (env["target"]=="release_debug"): + elif (env["target"] == "release_debug"): - env.Append(CCFLAGS=['-O2','-ffast-math','-DDEBUG_ENABLED']) - if (env["debug_release"]=="yes"): + env.Append(CCFLAGS=['-O2', '-ffast-math', '-DDEBUG_ENABLED']) + if (env["debug_release"] == "yes"): env.Append(CCFLAGS=['-g2']) - elif (env["target"]=="debug"): + elif (env["target"] == "debug"): - env.Append(CCFLAGS=['-g2', '-Wall','-DDEBUG_ENABLED','-DDEBUG_MEMORY_ENABLED']) + env.Append(CCFLAGS=['-g2', '-Wall', '-DDEBUG_ENABLED', '-DDEBUG_MEMORY_ENABLED']) env.ParseConfig('pkg-config x11 --cflags --libs') env.ParseConfig('pkg-config xinerama --cflags --libs') @@ -180,7 +180,7 @@ def configure(env): if (env["glew"] == "system"): env.ParseConfig('pkg-config glew --cflags --libs') - if os.system("pkg-config --exists alsa")==0: + if os.system("pkg-config --exists alsa") == 0: print("Enabling ALSA") env.Append(CPPFLAGS=["-DALSA_ENABLED"]) env.ParseConfig('pkg-config alsa --cflags --libs') @@ -189,7 +189,7 @@ def configure(env): if (platform.system() == "Linux"): env.Append(CPPFLAGS=["-DJOYDEV_ENABLED"]) - if (env["udev"]=="yes"): + if (env["udev"] == "yes"): # pkg-config returns 0 when the lib exists... found_udev = not os.system("pkg-config --exists libudev") @@ -200,7 +200,7 @@ def configure(env): else: print("libudev development libraries not found, disabling udev support") - if (env["pulseaudio"]=="yes"): + if (env["pulseaudio"] == "yes"): if not os.system("pkg-config --exists libpulse-simple"): print("Enabling PulseAudio") env.Append(CPPFLAGS=["-DPULSEAUDIO_ENABLED"]) @@ -208,33 +208,33 @@ def configure(env): else: print("PulseAudio development libraries not found, disabling driver") - env.Append(CPPFLAGS=['-DX11_ENABLED','-DUNIX_ENABLED','-DGLES2_ENABLED','-DGLES_OVER_GL']) + env.Append(CPPFLAGS=['-DX11_ENABLED', '-DUNIX_ENABLED', '-DGLES2_ENABLED', '-DGLES_OVER_GL']) env.Append(LIBS=['GL', 'pthread', 'z']) if (platform.system() == "Linux"): env.Append(LIBS='dl') - #env.Append(CPPFLAGS=['-DMPC_FIXED_POINT']) + # env.Append(CPPFLAGS=['-DMPC_FIXED_POINT']) -#host compiler is default.. +# host compiler is default.. - if (is64 and env["bits"]=="32"): + if (is64 and env["bits"] == "32"): env.Append(CPPFLAGS=['-m32']) - env.Append(LINKFLAGS=['-m32','-L/usr/lib/i386-linux-gnu']) - elif (not is64 and env["bits"]=="64"): + env.Append(LINKFLAGS=['-m32', '-L/usr/lib/i386-linux-gnu']) + elif (not is64 and env["bits"] == "64"): env.Append(CPPFLAGS=['-m64']) - env.Append(LINKFLAGS=['-m64','-L/usr/lib/i686-linux-gnu']) + env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu']) import methods - env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) - env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } ) + env.Append(BUILDERS={'GLSL120': env.Builder(action=methods.build_legacygl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL': env.Builder(action=methods.build_glsl_headers, suffix='glsl.h', src_suffix='.glsl')}) + env.Append(BUILDERS={'GLSL120GLES': env.Builder(action=methods.build_gles2_headers, suffix='glsl.h', src_suffix='.glsl')}) #env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } ) - if (env["use_static_cpp"]=="yes"): + if (env["use_static_cpp"] == "yes"): env.Append(LINKFLAGS=['-static-libstdc++']) list_of_x86 = ['x86_64', 'x86', 'i386', 'i586'] if any(platform.machine() in s for s in list_of_x86): - env["x86_libtheora_opt_gcc"]=True + env["x86_libtheora_opt_gcc"] = True diff --git a/scene/2d/SCsub b/scene/2d/SCsub index 9fa89edbf75..bf9125be7f0 100644 --- a/scene/2d/SCsub +++ b/scene/2d/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.scene_sources,"*.cpp") +env.add_source_files(env.scene_sources, "*.cpp") Export('env') diff --git a/scene/3d/SCsub b/scene/3d/SCsub index 47825ea6c26..90e78ba8d38 100644 --- a/scene/3d/SCsub +++ b/scene/3d/SCsub @@ -3,11 +3,11 @@ Import('env') -if (env["disable_3d"]=="yes"): +if (env["disable_3d"] == "yes"): env.scene_sources.append("3d/spatial.cpp") env.scene_sources.append("3d/skeleton.cpp") else: - env.add_source_files(env.scene_sources,"*.cpp") + env.add_source_files(env.scene_sources, "*.cpp") Export('env') diff --git a/scene/SCsub b/scene/SCsub index 79da3651927..4a89eef4698 100644 --- a/scene/SCsub +++ b/scene/SCsub @@ -2,8 +2,8 @@ Import('env') -env.scene_sources=[] -env.add_source_files(env.scene_sources,"*.cpp") +env.scene_sources = [] +env.add_source_files(env.scene_sources, "*.cpp") Export('env') @@ -17,6 +17,6 @@ SConscript('resources/SCsub'); SConscript('io/SCsub'); -lib = env.Library("scene",env.scene_sources) +lib = env.Library("scene", env.scene_sources) env.Prepend(LIBS=[lib]) diff --git a/scene/animation/SCsub b/scene/animation/SCsub index 9fa89edbf75..bf9125be7f0 100644 --- a/scene/animation/SCsub +++ b/scene/animation/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.scene_sources,"*.cpp") +env.add_source_files(env.scene_sources, "*.cpp") Export('env') diff --git a/scene/audio/SCsub b/scene/audio/SCsub index 9fa89edbf75..bf9125be7f0 100644 --- a/scene/audio/SCsub +++ b/scene/audio/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.scene_sources,"*.cpp") +env.add_source_files(env.scene_sources, "*.cpp") Export('env') diff --git a/scene/gui/SCsub b/scene/gui/SCsub index 9fa89edbf75..bf9125be7f0 100644 --- a/scene/gui/SCsub +++ b/scene/gui/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.scene_sources,"*.cpp") +env.add_source_files(env.scene_sources, "*.cpp") Export('env') diff --git a/scene/io/SCsub b/scene/io/SCsub index 9fa89edbf75..bf9125be7f0 100644 --- a/scene/io/SCsub +++ b/scene/io/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.scene_sources,"*.cpp") +env.add_source_files(env.scene_sources, "*.cpp") Export('env') diff --git a/scene/main/SCsub b/scene/main/SCsub index 9fa89edbf75..bf9125be7f0 100644 --- a/scene/main/SCsub +++ b/scene/main/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.scene_sources,"*.cpp") +env.add_source_files(env.scene_sources, "*.cpp") Export('env') diff --git a/scene/resources/SCsub b/scene/resources/SCsub index 702ed1a9bf0..9c1e72f04f1 100644 --- a/scene/resources/SCsub +++ b/scene/resources/SCsub @@ -2,8 +2,8 @@ Import('env') -env.add_source_files(env.scene_sources,"*.cpp") -env.add_source_files(env.scene_sources,"*.c") +env.add_source_files(env.scene_sources, "*.cpp") +env.add_source_files(env.scene_sources, "*.c") Export('env') diff --git a/scene/resources/default_theme/SCsub b/scene/resources/default_theme/SCsub index 9fa89edbf75..bf9125be7f0 100644 --- a/scene/resources/default_theme/SCsub +++ b/scene/resources/default_theme/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.scene_sources,"*.cpp") +env.add_source_files(env.scene_sources, "*.cpp") Export('env') diff --git a/scene/resources/default_theme/make_header.py b/scene/resources/default_theme/make_header.py index ce053570959..d8799fd7da7 100644 --- a/scene/resources/default_theme/make_header.py +++ b/scene/resources/default_theme/make_header.py @@ -4,15 +4,15 @@ import glob; import string; -#Generate include files +# Generate include files -f=open("theme_data.h","wb") +f = open("theme_data.h", "wb") f.write("// THIS FILE HAS BEEN AUTOGENERATED, DONT EDIT!!\n"); f.write("\n\n"); -#Generate png image block +# Generate png image block pixmaps = glob.glob("*.png"); @@ -22,23 +22,23 @@ f.write("\n\n\n"); for x in pixmaps: - var_str=x[:-4]+"_png"; + var_str = x[:-4] + "_png"; - f.write("static const unsigned char "+ var_str +"[]={\n"); + f.write("static const unsigned char " + var_str + "[]={\n"); - pngf=open(x,"rb"); + pngf = open(x, "rb"); - b=pngf.read(1); - while(len(b)==1): + b = pngf.read(1); + while(len(b) == 1): f.write(hex(ord(b))) - b=pngf.read(1); - if (len(b)==1): + b = pngf.read(1); + if (len(b) == 1): f.write(",") f.write("\n};\n\n\n"); pngf.close(); -#Generate shaders block +# Generate shaders block shaders = glob.glob("*.gsl") @@ -48,22 +48,22 @@ f.write("\n\n\n"); for x in shaders: - var_str=x[:-4]+"_shader_code"; + var_str = x[:-4] + "_shader_code"; - f.write("static const char *"+ var_str +"=\n"); + f.write("static const char *" + var_str + "=\n"); - sf=open(x,"rb"); + sf = open(x, "rb"); - b=sf.readline(); - while(b!=""): + b = sf.readline(); + while(b != ""): if (b.endswith("\r\n")): - b=b[:-2] + b = b[:-2] if (b.endswith("\n")): - b=b[:-1] - f.write(" \""+b) - b=sf.readline(); - if (b!=""): + b = b[:-1] + f.write(" \"" + b) + b = sf.readline(); + if (b != ""): f.write("\"\n") f.write("\";\n\n\n"); diff --git a/servers/SCsub b/servers/SCsub index 57b5b367589..8fab6c1dc2f 100644 --- a/servers/SCsub +++ b/servers/SCsub @@ -2,8 +2,8 @@ Import('env') -env.servers_sources=[] -env.add_source_files(env.servers_sources,"*.cpp") +env.servers_sources = [] +env.add_source_files(env.servers_sources, "*.cpp") Export('env') @@ -14,6 +14,6 @@ SConscript('audio/SCsub'); SConscript('spatial_sound/SCsub'); SConscript('spatial_sound_2d/SCsub'); -lib = env.Library("servers",env.servers_sources) +lib = env.Library("servers", env.servers_sources) env.Prepend(LIBS=[lib]) diff --git a/servers/audio/SCsub b/servers/audio/SCsub index 4684bd6cf38..ccc76e823f6 100644 --- a/servers/audio/SCsub +++ b/servers/audio/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.servers_sources,"*.cpp") +env.add_source_files(env.servers_sources, "*.cpp") Export('env') diff --git a/servers/physics/SCsub b/servers/physics/SCsub index b527f38ceb0..c0ee2cc739b 100644 --- a/servers/physics/SCsub +++ b/servers/physics/SCsub @@ -2,7 +2,7 @@ Import('env') -env.add_source_files(env.servers_sources,"*.cpp") +env.add_source_files(env.servers_sources, "*.cpp") Export('env') diff --git a/servers/physics/joints/SCsub b/servers/physics/joints/SCsub index 4684bd6cf38..ccc76e823f6 100644 --- a/servers/physics/joints/SCsub +++ b/servers/physics/joints/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.servers_sources,"*.cpp") +env.add_source_files(env.servers_sources, "*.cpp") Export('env') diff --git a/servers/physics_2d/SCsub b/servers/physics_2d/SCsub index 2114782faaf..d730144861b 100644 --- a/servers/physics_2d/SCsub +++ b/servers/physics_2d/SCsub @@ -2,4 +2,4 @@ Import('env') -env.add_source_files(env.servers_sources,"*.cpp") +env.add_source_files(env.servers_sources, "*.cpp") diff --git a/servers/spatial_sound/SCsub b/servers/spatial_sound/SCsub index 4684bd6cf38..ccc76e823f6 100644 --- a/servers/spatial_sound/SCsub +++ b/servers/spatial_sound/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.servers_sources,"*.cpp") +env.add_source_files(env.servers_sources, "*.cpp") Export('env') diff --git a/servers/spatial_sound_2d/SCsub b/servers/spatial_sound_2d/SCsub index 4684bd6cf38..ccc76e823f6 100644 --- a/servers/spatial_sound_2d/SCsub +++ b/servers/spatial_sound_2d/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.servers_sources,"*.cpp") +env.add_source_files(env.servers_sources, "*.cpp") Export('env') diff --git a/servers/visual/SCsub b/servers/visual/SCsub index 4684bd6cf38..ccc76e823f6 100644 --- a/servers/visual/SCsub +++ b/servers/visual/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.servers_sources,"*.cpp") +env.add_source_files(env.servers_sources, "*.cpp") Export('env') diff --git a/tools/SCsub b/tools/SCsub index a54b8f48a21..87ed836ae05 100644 --- a/tools/SCsub +++ b/tools/SCsub @@ -2,18 +2,18 @@ Import('env') -env.tool_sources=[] -env.add_source_files(env.tool_sources,"*.cpp") +env.tool_sources = [] +env.add_source_files(env.tool_sources, "*.cpp") Export('env') -def make_translations_header(target,source,env): +def make_translations_header(target, source, env): dst = target[0].srcnode().abspath - g = open(dst,"wb") + g = open(dst, "wb") """" @@ -29,24 +29,24 @@ def make_translations_header(target,source,env): paths = [node.srcnode().abspath for node in source] sorted_paths = sorted(paths, key=lambda path: os.path.splitext(os.path.basename(path))[0]) - xl_names=[] + xl_names = [] for i in range(len(sorted_paths)): - print("Appending translation: "+sorted_paths[i]) - f = open(sorted_paths[i],"rb") + print("Appending translation: " + sorted_paths[i]) + f = open(sorted_paths[i], "rb") buf = f.read() decomp_size = len(buf) buf = zlib.compress(buf) - name = os.path.splitext(os.path.basename(sorted_paths[i]))[0] + name = os.path.splitext(os.path.basename(sorted_paths[i]))[0] #g.write("static const int _translation_"+name+"_compressed_size="+str(len(buf))+";\n") #g.write("static const int _translation_"+name+"_uncompressed_size="+str(decomp_size)+";\n") - g.write("static const unsigned char _translation_"+name+"_compressed[]={\n") + g.write("static const unsigned char _translation_" + name + "_compressed[]={\n") for i in range(len(buf)): - g.write(str(ord(buf[i]))+",\n") + g.write(str(ord(buf[i])) + ",\n") g.write("};\n") - xl_names.append([name,len(buf),str(decomp_size)]) + xl_names.append([name, len(buf), str(decomp_size)]) g.write("struct EditorTranslationList {\n") g.write("\tconst char* lang;\n"); @@ -56,19 +56,19 @@ def make_translations_header(target,source,env): g.write("};\n\n"); g.write("static EditorTranslationList _editor_translations[]={\n") for x in xl_names: - g.write("\t{ \""+x[0]+"\", "+str(x[1])+", "+str(x[2])+",_translation_"+x[0]+"_compressed},\n") + g.write("\t{ \"" + x[0] + "\", " + str(x[1]) + ", " + str(x[2]) + ",_translation_" + x[0] + "_compressed},\n") g.write("\t{NULL,0,0,NULL}\n") g.write("};\n") g.write("#endif") -def make_fonts_header(target,source,env): +def make_fonts_header(target, source, env): dst = target[0].srcnode().abspath - g = open(dst,"wb") + g = open(dst, "wb") """" @@ -78,48 +78,48 @@ def make_fonts_header(target,source,env): g.write("#ifndef _EDITOR_FONTS_H\n") g.write("#define _EDITOR_FONTS_H\n") - #saving uncompressed, since freetype will reference from memory pointer - xl_names=[] + # saving uncompressed, since freetype will reference from memory pointer + xl_names = [] for i in range(len(source)): - print("Appending font: "+source[i].srcnode().abspath) - f = open(source[i].srcnode().abspath,"rb") + print("Appending font: " + source[i].srcnode().abspath) + f = open(source[i].srcnode().abspath, "rb") buf = f.read() import os.path - name = os.path.splitext(os.path.basename(source[i].srcnode().abspath))[0] + name = os.path.splitext(os.path.basename(source[i].srcnode().abspath))[0] - g.write("static const int _font_"+name+"_size="+str(len(buf))+";\n") - g.write("static const unsigned char _font_"+name+"[]={\n") + g.write("static const int _font_" + name + "_size=" + str(len(buf)) + ";\n") + g.write("static const unsigned char _font_" + name + "[]={\n") for i in range(len(buf)): - g.write(str(ord(buf[i]))+",\n") + g.write(str(ord(buf[i])) + ",\n") g.write("};\n") g.write("#endif") -if (env["tools"]!="no"): +if (env["tools"] != "no"): import glob dir = env.Dir('.').abspath tlist = glob.glob(dir + "/translations/*.po") - print("translations: ",tlist) - env.Depends('#tools/editor/translations.h',tlist) - env.Command('#tools/editor/translations.h',tlist,make_translations_header) + print("translations: ", tlist) + env.Depends('#tools/editor/translations.h', tlist) + env.Command('#tools/editor/translations.h', tlist, make_translations_header) flist = glob.glob(dir + "/editor_fonts/*.ttf") - flist.append( glob.glob(dir + "/editor_fonts/*.otf") ) + flist.append(glob.glob(dir + "/editor_fonts/*.otf")) - print("fonts: ",flist) - env.Depends('#tools/editor/builtin_fonts.h',flist) - env.Command('#tools/editor/builtin_fonts.h',flist,make_fonts_header) + print("fonts: ", flist) + env.Depends('#tools/editor/builtin_fonts.h', flist) + env.Command('#tools/editor/builtin_fonts.h', flist, make_fonts_header) SConscript('editor/SCsub'); SConscript('collada/SCsub'); SConscript('doc/SCsub') - lib = env.Library("tool",env.tool_sources) + lib = env.Library("tool", env.tool_sources) env.Prepend(LIBS=[lib]) diff --git a/tools/collada/SCsub b/tools/collada/SCsub index 473474201af..4bc64ffdc2d 100644 --- a/tools/collada/SCsub +++ b/tools/collada/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.tool_sources,"*.cpp") +env.add_source_files(env.tool_sources, "*.cpp") Export('env') diff --git a/tools/doc/SCsub b/tools/doc/SCsub index 473474201af..4bc64ffdc2d 100644 --- a/tools/doc/SCsub +++ b/tools/doc/SCsub @@ -2,6 +2,6 @@ Import('env') -env.add_source_files(env.tool_sources,"*.cpp") +env.add_source_files(env.tool_sources, "*.cpp") Export('env') diff --git a/tools/editor/SCsub b/tools/editor/SCsub index 41b4386e21c..0dbb7d3f206 100644 --- a/tools/editor/SCsub +++ b/tools/editor/SCsub @@ -3,12 +3,12 @@ Import('env') -def make_doc_header(target,source,env): +def make_doc_header(target, source, env): src = source[0].srcnode().abspath dst = target[0].srcnode().abspath - f = open(src,"rb") - g = open(dst,"wb") + f = open(src, "rb") + g = open(dst, "wb") buf = f.read() decomp_size = len(buf) import zlib @@ -18,22 +18,22 @@ def make_doc_header(target,source,env): g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") g.write("#ifndef _DOC_DATA_RAW_H\n") g.write("#define _DOC_DATA_RAW_H\n") - g.write("static const int _doc_data_compressed_size="+str(len(buf))+";\n") - g.write("static const int _doc_data_uncompressed_size="+str(decomp_size)+";\n") + g.write("static const int _doc_data_compressed_size=" + str(len(buf)) + ";\n") + g.write("static const int _doc_data_uncompressed_size=" + str(decomp_size) + ";\n") g.write("static const unsigned char _doc_data_compressed[]={\n") for i in range(len(buf)): - g.write(str(ord(buf[i]))+",\n") + g.write(str(ord(buf[i])) + ",\n") g.write("};\n") g.write("#endif") -def make_certs_header(target,source,env): +def make_certs_header(target, source, env): src = source[0].srcnode().abspath dst = target[0].srcnode().abspath - f = open(src,"rb") - g = open(dst,"wb") + f = open(src, "rb") + g = open(dst, "wb") buf = f.read() decomp_size = len(buf) import zlib @@ -43,11 +43,11 @@ def make_certs_header(target,source,env): g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") g.write("#ifndef _CERTS_RAW_H\n") g.write("#define _CERTS_RAW_H\n") - g.write("static const int _certs_compressed_size="+str(len(buf))+";\n") - g.write("static const int _certs_uncompressed_size="+str(decomp_size)+";\n") + g.write("static const int _certs_compressed_size=" + str(len(buf)) + ";\n") + g.write("static const int _certs_uncompressed_size=" + str(decomp_size) + ";\n") g.write("static const unsigned char _certs_compressed[]={\n") for i in range(len(buf)): - g.write(str(ord(buf[i]))+",\n") + g.write(str(ord(buf[i])) + ",\n") g.write("};\n") g.write("#endif") @@ -57,29 +57,29 @@ def make_certs_header(target,source,env): -if (env["tools"]=="yes"): +if (env["tools"] == "yes"): - reg_exporters_inc='#include "register_exporters.h"\n' - reg_exporters='void register_exporters() {\n' + reg_exporters_inc = '#include "register_exporters.h"\n' + reg_exporters = 'void register_exporters() {\n' for e in env.platform_exporters: - env.tool_sources.append("#platform/"+e+"/export/export.cpp") - reg_exporters+='\tregister_'+e+'_exporter();\n' - reg_exporters_inc+='#include "platform/'+e+'/export/export.h"\n' - reg_exporters+='}\n' - f = open("register_exporters.cpp","wb") + env.tool_sources.append("#platform/" + e + "/export/export.cpp") + reg_exporters += '\tregister_' + e + '_exporter();\n' + reg_exporters_inc += '#include "platform/' + e + '/export/export.h"\n' + reg_exporters += '}\n' + f = open("register_exporters.cpp", "wb") f.write(reg_exporters_inc) f.write(reg_exporters) f.close() - env.Depends("#tools/editor/doc_data_compressed.h","#doc/base/classes.xml") - env.Command("#tools/editor/doc_data_compressed.h","#doc/base/classes.xml",make_doc_header) + env.Depends("#tools/editor/doc_data_compressed.h", "#doc/base/classes.xml") + env.Command("#tools/editor/doc_data_compressed.h", "#doc/base/classes.xml", make_doc_header) - env.Depends("#tools/editor/certs_compressed.h","#tools/certs/ca-certificates.crt") - env.Command("#tools/editor/certs_compressed.h","#tools/certs/ca-certificates.crt",make_certs_header) + env.Depends("#tools/editor/certs_compressed.h", "#tools/certs/ca-certificates.crt") + env.Command("#tools/editor/certs_compressed.h", "#tools/certs/ca-certificates.crt", make_certs_header) - #make_doc_header(env.File("#tools/editor/doc_data_raw.h").srcnode().abspath,env.File("#doc/base/classes.xml").srcnode().abspath,env) + # make_doc_header(env.File("#tools/editor/doc_data_raw.h").srcnode().abspath,env.File("#doc/base/classes.xml").srcnode().abspath,env) - env.add_source_files(env.tool_sources,"*.cpp") + env.add_source_files(env.tool_sources, "*.cpp") Export('env') SConscript('icons/SCsub'); diff --git a/tools/editor/fileserver/SCsub b/tools/editor/fileserver/SCsub index 6299fd416c3..4bf55189ccb 100644 --- a/tools/editor/fileserver/SCsub +++ b/tools/editor/fileserver/SCsub @@ -2,4 +2,4 @@ Import('env') Export('env') -env.add_source_files(env.tool_sources,"*.cpp") +env.add_source_files(env.tool_sources, "*.cpp") diff --git a/tools/editor/icons/SCsub b/tools/editor/icons/SCsub index 4806a89df20..ac942b20e36 100644 --- a/tools/editor/icons/SCsub +++ b/tools/editor/icons/SCsub @@ -16,47 +16,47 @@ def make_editor_icons_action(target, source, env): s.write("#include \"editor_scale.h\"\n\n") s.write("#include \"scene/resources/theme.h\"\n\n") - hidpi_list=[] + hidpi_list = [] for x in pixmaps: - x=str(x) - var_str=os.path.basename(x)[:-4]+"_png"; - #print(var_str) + x = str(x) + var_str = os.path.basename(x)[:-4] + "_png"; + # print(var_str) - s.write("static const unsigned char "+ var_str +"[]={\n"); + s.write("static const unsigned char " + var_str + "[]={\n"); - pngf=open(x,"rb"); + pngf = open(x, "rb"); - b=pngf.read(1); - while(len(b)==1): + b = pngf.read(1); + while(len(b) == 1): s.write(hex(ord(b))) - b=pngf.read(1); - if (len(b)==1): + b = pngf.read(1); + if (len(b) == 1): s.write(",") s.write("\n};\n\n"); pngf.close(); - var_str=os.path.basename(x)[:-4]+"_hidpi_png"; + var_str = os.path.basename(x)[:-4] + "_hidpi_png"; try: - pngf = open(os.path.dirname(x)+"/2x/"+os.path.basename(x), "rb") + pngf = open(os.path.dirname(x) + "/2x/" + os.path.basename(x), "rb") - s.write("static const unsigned char "+ var_str +"[]={\n"); + s.write("static const unsigned char " + var_str + "[]={\n"); - b=pngf.read(1); - while(len(b)==1): + b = pngf.read(1); + while(len(b) == 1): s.write(hex(ord(b))) - b=pngf.read(1); - if (len(b)==1): + b = pngf.read(1); + if (len(b) == 1): s.write(",") s.write("\n};\n\n\n"); hidpi_list.append(x) except: - s.write("static const unsigned char* "+ var_str +"=NULL;\n\n\n"); + s.write("static const unsigned char* " + var_str + "=NULL;\n\n\n"); @@ -75,24 +75,24 @@ def make_editor_icons_action(target, source, env): for x in pixmaps: - x=os.path.basename(str(x)) - type=x[5:-4].title().replace("_",""); - var_str=x[:-4]+"_png"; - var_str_hidpi=x[:-4]+"_hidpi_png"; - s.write("\tp_theme->set_icon(\""+type+"\",\"EditorIcons\",make_icon("+var_str+","+var_str_hidpi+"));\n"); + x = os.path.basename(str(x)) + type = x[5:-4].title().replace("_", ""); + var_str = x[:-4] + "_png"; + var_str_hidpi = x[:-4] + "_hidpi_png"; + s.write("\tp_theme->set_icon(\"" + type + "\",\"EditorIcons\",make_icon(" + var_str + "," + var_str_hidpi + "));\n"); s.write("\n\n}\n\n"); - f = open(dst,"wb") + f = open(dst, "wb") f.write(s.getvalue()) f.close() s.close() make_editor_icons_builder = Builder(action=make_editor_icons_action, - suffix = '.cpp', - src_suffix = '.png') -env['BUILDERS']['MakeEditorIconsBuilder']=make_editor_icons_builder -env.Alias('editor_icons',[env.MakeEditorIconsBuilder('#tools/editor/editor_icons.cpp',Glob("*.png"))]) + suffix='.cpp', + src_suffix='.png') +env['BUILDERS']['MakeEditorIconsBuilder'] = make_editor_icons_builder +env.Alias('editor_icons', [env.MakeEditorIconsBuilder('#tools/editor/editor_icons.cpp', Glob("*.png"))]) env.tool_sources.append("#tools/editor/editor_icons.cpp") Export('env') diff --git a/tools/editor/io_plugins/SCsub b/tools/editor/io_plugins/SCsub index 6299fd416c3..4bf55189ccb 100644 --- a/tools/editor/io_plugins/SCsub +++ b/tools/editor/io_plugins/SCsub @@ -2,4 +2,4 @@ Import('env') Export('env') -env.add_source_files(env.tool_sources,"*.cpp") +env.add_source_files(env.tool_sources, "*.cpp") diff --git a/tools/editor/plugins/SCsub b/tools/editor/plugins/SCsub index 6299fd416c3..4bf55189ccb 100644 --- a/tools/editor/plugins/SCsub +++ b/tools/editor/plugins/SCsub @@ -2,4 +2,4 @@ Import('env') Export('env') -env.add_source_files(env.tool_sources,"*.cpp") +env.add_source_files(env.tool_sources, "*.cpp") diff --git a/tools/scripts/addheader.py b/tools/scripts/addheader.py index fac35372de7..573d5182f58 100644 --- a/tools/scripts/addheader.py +++ b/tools/scripts/addheader.py @@ -1,4 +1,4 @@ -header="""\ +header = """\ /*************************************************************************/ /* $filename */ /*************************************************************************/ @@ -29,44 +29,44 @@ header="""\ /*************************************************************************/ """ -f = open("files","rb") +f = open("files", "rb") fname = f.readline() -while (fname!=""): +while (fname != ""): - fr = open(fname.strip(),"rb") + fr = open(fname.strip(), "rb") l = fr.readline() - bc=False + bc = False fsingle = fname.strip() - if (fsingle.find("/")!=-1): - fsingle=fsingle[fsingle.rfind("/")+1:] - rep_fl="$filename" - rep_fi=fsingle - len_fl=len(rep_fl) - len_fi=len(rep_fi) - if (len_fi0): - cstr+=", " - cstr+=c + c = font_chars[i] + if (i > 0): + cstr += ", " + cstr += c -cstr+=("};") +cstr += ("};") print(cstr) diff --git a/tools/scripts/make_glwrapper.py b/tools/scripts/make_glwrapper.py index 8da4e28c89c..9f26f8430e5 100644 --- a/tools/scripts/make_glwrapper.py +++ b/tools/scripts/make_glwrapper.py @@ -1,31 +1,31 @@ #! /usr/bin/env python import sys -if (len(sys.argv)<2): +if (len(sys.argv) < 2): print("usage: make_glwrapper.py ") sys.exit(255) -functions=[] -types=[] -constants=[] +functions = [] +types = [] +constants = [] -READ_FUNCTIONS=0 -READ_TYPES=1 -READ_CONSTANTS=2 +READ_FUNCTIONS = 0 +READ_TYPES = 1 +READ_CONSTANTS = 2 -read_what=READ_TYPES +read_what = READ_TYPES -for x in (range(len(sys.argv)-1)): - f=open(sys.argv[x+1],"r") +for x in (range(len(sys.argv) - 1)): + f = open(sys.argv[x + 1], "r") while(True): - line=f.readline() - if (line==""): + line = f.readline() + if (line == ""): break - line=line.replace("\n","").strip() + line = line.replace("\n", "").strip() """ if (line.find("[types]")!=-1): read_what=READ_TYPES @@ -38,67 +38,67 @@ for x in (range(len(sys.argv)-1)): continue """ - if (line.find("#define")!=-1): - if (line.find("0x")==-1 and line.find("GL_VERSION")==-1): + if (line.find("#define") != -1): + if (line.find("0x") == -1 and line.find("GL_VERSION") == -1): continue constants.append(line) - elif (line.find("typedef")!=-1): - if (line.find("(")!=-1 or line.find(")")!=-1 or line.find("ARB")!=-1 or line.find("EXT")!=-1 or line.find("GL")==-1): + elif (line.find("typedef") != -1): + if (line.find("(") != -1 or line.find(")") != -1 or line.find("ARB") != -1 or line.find("EXT") != -1 or line.find("GL") == -1): continue types.append(line) - elif (line.find("APIENTRY")!=-1 and line.find("GLAPI")!=-1): + elif (line.find("APIENTRY") != -1 and line.find("GLAPI") != -1): - if (line.find("ARB")!=-1 or line.find("EXT")!=-1 or line.find("NV")!=-1): + if (line.find("ARB") != -1 or line.find("EXT") != -1 or line.find("NV") != -1): continue - line=line.replace("APIENTRY","") - line=line.replace("GLAPI","") + line = line.replace("APIENTRY", "") + line = line.replace("GLAPI", "") - glpos=line.find(" gl") - if (glpos==-1): + glpos = line.find(" gl") + if (glpos == -1): - glpos=line.find("\tgl") - if (glpos==-1): + glpos = line.find("\tgl") + if (glpos == -1): continue - ret=line[:glpos].strip(); + ret = line[:glpos].strip(); - line=line[glpos:].strip() - namepos=line.find("(") + line = line[glpos:].strip() + namepos = line.find("(") - if (namepos==-1): + if (namepos == -1): continue - name=line[:namepos].strip() - line=line[namepos:] + name = line[:namepos].strip() + line = line[namepos:] - argpos=line.rfind(")") - if (argpos==-1): + argpos = line.rfind(")") + if (argpos == -1): continue - args=line[1:argpos] + args = line[1:argpos] - funcdata={} - funcdata["ret"]=ret - funcdata["name"]=name - funcdata["args"]=args + funcdata = {} + funcdata["ret"] = ret + funcdata["name"] = name + funcdata["args"] = args functions.append(funcdata) print(funcdata) -#print(types) -#print(constants) -#print(functions) +# print(types) +# print(constants) +# print(functions) -f=open("glwrapper.h","w") +f = open("glwrapper.h", "w") f.write("#ifndef GL_WRAPPER\n") f.write("#define GL_WRAPPER\n\n\n") -header_code="""\ +header_code = """\ #if defined(__gl_h_) || defined(__GL_H__) #error gl.h included before glwrapper.h #endif @@ -137,18 +137,18 @@ f.write("#else\n"); f.write("#define GLWRP_APIENTRY \n") f.write("#endif\n\n"); for x in types: - f.write(x+"\n") + f.write(x + "\n") f.write("\n\n") for x in constants: - f.write(x+"\n") + f.write(x + "\n") f.write("\n\n") for x in functions: - f.write("extern "+x["ret"]+" GLWRP_APIENTRY (*__wrapper_"+x["name"]+")("+x["args"]+");\n") - f.write("#define "+x["name"]+" __wrapper_"+x["name"]+"\n") + f.write("extern " + x["ret"] + " GLWRP_APIENTRY (*__wrapper_" + x["name"] + ")(" + x["args"] + ");\n") + f.write("#define " + x["name"] + " __wrapper_" + x["name"] + "\n") f.write("\n\n") f.write("typedef void (*GLWrapperFuncPtr)(void);\n\n"); @@ -158,21 +158,21 @@ f.write("#ifdef __cplusplus\n}\n#endif\n") f.write("#endif\n\n") -f=open("glwrapper.c","w") +f = open("glwrapper.c", "w") f.write("\n\n") f.write("#include \"glwrapper.h\"\n") f.write("\n\n") for x in functions: - f.write(x["ret"]+" GLWRP_APIENTRY (*__wrapper_"+x["name"]+")("+x["args"]+")=NULL;\n") + f.write(x["ret"] + " GLWRP_APIENTRY (*__wrapper_" + x["name"] + ")(" + x["args"] + ")=NULL;\n") f.write("\n\n") f.write("void glWrapperInit( GLWrapperFuncPtr (*wrapperFunc)(const char*) ) {\n") f.write("\n") for x in functions: - f.write("\t__wrapper_"+x["name"]+"=("+x["ret"]+" GLWRP_APIENTRY (*)("+x["args"]+"))wrapperFunc(\""+x["name"]+"\");\n") + f.write("\t__wrapper_" + x["name"] + "=(" + x["ret"] + " GLWRP_APIENTRY (*)(" + x["args"] + "))wrapperFunc(\"" + x["name"] + "\");\n") f.write("\n\n") f.write("}\n") diff --git a/tools/scripts/makeargs.py b/tools/scripts/makeargs.py index 7db9da82a89..a52812eaff2 100644 --- a/tools/scripts/makeargs.py +++ b/tools/scripts/makeargs.py @@ -1,5 +1,5 @@ -text=""" +text = """ #define FUNC$numR(m_r,m_func,$argt)\\ virtual m_r m_func($argtp) { \\ if (Thread::get_caller_ID()!=server_thread) {\\ @@ -65,21 +65,21 @@ text=""" -for i in range(1,8): +for i in range(1, 8): - tp="" - p="" - t="" + tp = "" + p = "" + t = "" for j in range(i): - if (j>0): - tp+=", " - p+=", " - t+=", " - tp +=("m_arg"+str(j+1)+" p"+str(j+1)) - p+=("p"+str(j+1)) - t+=("m_arg"+str(j+1)) + if (j > 0): + tp += ", " + p += ", " + t += ", " + tp += ("m_arg" + str(j + 1) + " p" + str(j + 1)) + p += ("p" + str(j + 1)) + t += ("m_arg" + str(j + 1)) - t = text.replace("$argtp",tp).replace("$argp",p).replace("$argt",t).replace("$num",str(i)) + t = text.replace("$argtp", tp).replace("$argp", p).replace("$argt", t).replace("$num", str(i)) print(t) diff --git a/tools/scripts/memsort.py b/tools/scripts/memsort.py index 15b4e7a84b1..fb636b0f78e 100644 --- a/tools/scripts/memsort.py +++ b/tools/scripts/memsort.py @@ -1,35 +1,35 @@ import sys -arg="memdump.txt" +arg = "memdump.txt" -if (len(sys.argv)>1): - arg=sys.argv[1] +if (len(sys.argv) > 1): + arg = sys.argv[1] -f = open(arg,"rb") +f = open(arg, "rb") -l=f.readline() +l = f.readline() sum = {} -cnt={} +cnt = {} -while(l!=""): +while(l != ""): - s=l.split("-") + s = l.split("-") amount = int(s[1]) - what=s[2] + what = s[2] if (what in sum): - sum[what]+=amount - cnt[what]+=1 + sum[what] += amount + cnt[what] += 1 else: - sum[what]=amount - cnt[what]=1 + sum[what] = amount + cnt[what] = 1 - l=f.readline() + l = f.readline() for x in sum: - print(x.strip()+"("+str(cnt[x])+"):\n: "+str(sum[x])) + print(x.strip() + "(" + str(cnt[x]) + "):\n: " + str(sum[x])) diff --git a/tools/scripts/svgs_2_pngs.py b/tools/scripts/svgs_2_pngs.py index 4de69a14b41..b55fd97d0aa 100644 --- a/tools/scripts/svgs_2_pngs.py +++ b/tools/scripts/svgs_2_pngs.py @@ -51,7 +51,7 @@ def export_icons(): # name without extensions name_only = file_name.replace('.svg', '') - out_icon_names = [name_only] # export to a png with the same file name + out_icon_names = [name_only] # export to a png with the same file name theme_out_icon_names = [] # special cases if special_icons.has_key(name_only): @@ -82,7 +82,7 @@ def export_theme(): # name without extensions name_only = file_name.replace('.svg', '') - out_icon_names = [name_only] # export to a png with the same file name + out_icon_names = [name_only] # export to a png with the same file name # special cases if theme_icons.has_key(name_only): special_icon = theme_icons[name_only] @@ -102,30 +102,30 @@ special_icons = { output_names=['icon_add'], theme_output_names=['icon_add', 'icon_zoom_more'] ), - 'icon_new': dict( output_names=['icon_file'] ), - 'icon_animation_tree_player': dict( output_names=['icon_animation_tree'] ), + 'icon_new': dict(output_names=['icon_file']), + 'icon_animation_tree_player': dict(output_names=['icon_animation_tree']), 'icon_tool_rotate': dict( output_names=['icon_reload'], - theme_output_names= ['icon_reload'] + theme_output_names=['icon_reload'] ), - 'icon_multi_edit': dict( output_names=['icon_multi_node_edit'] ), + 'icon_multi_edit': dict(output_names=['icon_multi_node_edit']), 'icon_folder': dict( output_names=['icon_load', 'icon_open'], - theme_output_names= ['icon_folder'] + theme_output_names=['icon_folder'] ), - 'icon_file_list': dict( output_names=['icon_enum'] ), - 'icon_collision_2d': dict( output_names=['icon_collision_polygon_2d', 'icon_polygon_2d'] ), - 'icon_class_list': dict( output_names=['icon_filesystem'] ), - 'icon_color_ramp': dict( output_names=['icon_graph_color_ramp'] ), - 'icon_translation': dict( output_names=['icon_p_hash_translation'] ), - 'icon_shader': dict( output_names=['icon_shader_material', 'icon_material_shader'] ), - 'icon_canvas_item_shader_graph': dict( output_names=['icon_material_shader_graph'] ), + 'icon_file_list': dict(output_names=['icon_enum']), + 'icon_collision_2d': dict(output_names=['icon_collision_polygon_2d', 'icon_polygon_2d']), + 'icon_class_list': dict(output_names=['icon_filesystem']), + 'icon_color_ramp': dict(output_names=['icon_graph_color_ramp']), + 'icon_translation': dict(output_names=['icon_p_hash_translation']), + 'icon_shader': dict(output_names=['icon_shader_material', 'icon_material_shader']), + 'icon_canvas_item_shader_graph': dict(output_names=['icon_material_shader_graph']), - 'icon_color_pick': dict( theme_output_names= ['icon_color_pick'], avoid_self=True ), - 'icon_play': dict( theme_output_names= ['icon_play'] ), - 'icon_stop': dict( theme_output_names= ['icon_stop'] ), - 'icon_zoom_less': dict( theme_output_names= ['icon_zoom_less'], avoid_self=True ), - 'icon_zoom_reset': dict( theme_output_names= ['icon_zoom_reset'], avoid_self=True ), + 'icon_color_pick': dict(theme_output_names=['icon_color_pick'], avoid_self=True), + 'icon_play': dict(theme_output_names=['icon_play']), + 'icon_stop': dict(theme_output_names=['icon_stop']), + 'icon_zoom_less': dict(theme_output_names=['icon_zoom_less'], avoid_self=True), + 'icon_zoom_reset': dict(theme_output_names=['icon_zoom_reset'], avoid_self=True), 'icon_snap': dict(theme_output_names=['icon_snap']) } diff --git a/tools/translations/extract.py b/tools/translations/extract.py index f2ee1ecb198..bd6f03237ba 100755 --- a/tools/translations/extract.py +++ b/tools/translations/extract.py @@ -78,7 +78,7 @@ for fname in matches: msg += l[pos] pos += 1 - location = os.path.relpath(fname).replace('\\','/') + location = os.path.relpath(fname).replace('\\', '/') if (line_nb): location += ":" + str(lc) @@ -115,7 +115,7 @@ shutil.move("tools.pot", "tools/translations/tools.pot") # TODO: Make that in a portable way, if we care; if not, kudos to Unix users if (os.name == "posix"): - added = subprocess.check_output("git diff tools/translations/tools.pot | grep \+msgid | wc -l", shell = True) - removed = subprocess.check_output("git diff tools/translations/tools.pot | grep \\\-msgid | wc -l", shell = True) + added = subprocess.check_output("git diff tools/translations/tools.pot | grep \+msgid | wc -l", shell=True) + removed = subprocess.check_output("git diff tools/translations/tools.pot | grep \\\-msgid | wc -l", shell=True) print("\n# Template changes compared to the staged status:") print("# Additions: %s msgids.\n# Deletions: %s msgids." % (int(added), int(removed))) diff --git a/version.py b/version.py index bf1d33c1cf1..d014a538c97 100644 --- a/version.py +++ b/version.py @@ -1,5 +1,5 @@ -short_name="godot" -name="Godot Engine" -major=2 -minor=2 -status="alpha" +short_name = "godot" +name = "Godot Engine" +major = 2 +minor = 2 +status = "alpha" From 817dd7ccbb166b27c93706dffc5c0c0d59fd87f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Sun, 30 Oct 2016 19:05:14 +0100 Subject: [PATCH 043/223] style: Fix PEP8 blank lines issues in Python files Done with `autopep8 --select=E3,W3`, fixes: - E301 - Add missing blank line. - E302 - Add missing 2 blank lines. - E303 - Remove extra blank lines. - E304 - Remove blank line following function decorator. - E309 - Add missing blank line. - W391 - Remove trailing blank lines. --- SConstruct | 5 -- core/make_binders.py | 1 - doc/tools/doc_merge.py | 12 ++-- doc/tools/doc_status.py | 13 ++-- doc/tools/makedoku.py | 15 ----- doc/tools/makehtml.py | 39 ++---------- doc/tools/makemd.py | 1 - doc/tools/makerst.py | 6 +- methods.py | 63 ++++++++------------ modules/chibi/config.py | 1 + modules/cscript/config.py | 3 - modules/dds/config.py | 1 + modules/enet/config.py | 1 + modules/etc1/config.py | 1 + modules/freetype/config.py | 1 + modules/gdscript/config.py | 3 - modules/gridmap/config.py | 3 - modules/ik/config.py | 3 - modules/jpg/config.py | 1 + modules/mpc/config.py | 1 + modules/ogg/config.py | 1 + modules/openssl/config.py | 1 + modules/opus/config.py | 1 + modules/pbm/config.py | 1 + modules/pvr/config.py | 1 + modules/regex/config.py | 3 +- modules/squish/config.py | 1 + modules/theora/config.py | 1 + modules/visual_script/config.py | 3 - modules/vorbis/config.py | 1 + modules/webm/config.py | 1 + modules/webp/config.py | 1 + platform/android/detect.py | 7 ++- platform/bb10/detect.py | 6 +- platform/haiku/SCsub | 1 + platform/haiku/detect.py | 6 ++ platform/iphone/detect.py | 5 +- platform/javascript/detect.py | 6 +- platform/osx/detect.py | 10 ++-- platform/server/detect.py | 7 +-- platform/windows/detect.py | 17 +++--- platform/winrt/detect.py | 6 +- platform/x11/detect.py | 10 +--- scene/resources/default_theme/make_header.py | 1 - tools/SCsub | 4 -- tools/editor/SCsub | 8 --- tools/editor/icons/SCsub | 4 +- tools/scripts/addheader.py | 2 - tools/scripts/file-hex-array.py | 3 + tools/scripts/make_bmfhdr.py | 3 - tools/scripts/make_glwrapper.py | 5 -- tools/scripts/makeargs.py | 6 -- tools/scripts/svgs_2_pngs.py | 1 - 53 files changed, 115 insertions(+), 193 deletions(-) diff --git a/SConstruct b/SConstruct index ee3a54b5ab5..a1c013422e3 100644 --- a/SConstruct +++ b/SConstruct @@ -85,7 +85,6 @@ env_base.use_ptrcall = False env_base.split_drivers = False - env_base.__class__.android_add_maven_repository = methods.android_add_maven_repository env_base.__class__.android_add_dependency = methods.android_add_dependency env_base.__class__.android_add_java_dir = methods.android_add_java_dir @@ -199,7 +198,6 @@ elif env_base['p'] != "": env_base["platform"] = selected_platform - if selected_platform in platform_list: sys.path.append("./platform/" + selected_platform) @@ -298,7 +296,6 @@ if selected_platform in platform_list: sys.path.remove("./platform/" + selected_platform) sys.modules.pop('detect') - env.module_list = [] for x in module_list: @@ -314,7 +311,6 @@ if selected_platform in platform_list: sys.path.remove(tmppath) sys.modules.pop('config') - if (env.use_ptrcall): env.Append(CPPFLAGS=['-DPTRCALL_ENABLED']); @@ -371,7 +367,6 @@ if selected_platform in platform_list: # has a purpose. # env['MSVS_VERSION']='9.0' - # Calls a CMD with /C(lose) and /V(delayed environment variable expansion) options. # And runs vcvarsall bat for the propper arhitecture and scons for propper configuration env['MSVSBUILDCOM'] = 'cmd /V /C set "plat=$(PlatformTarget)" ^& (if "$(PlatformTarget)"=="x64" (set "plat=x86_amd64")) ^& set "tools=yes" ^& (if "$(Configuration)"=="release" (set "tools=no")) ^& call "$(VCInstallDir)vcvarsall.bat" !plat! ^& scons platform=windows target=$(Configuration) tools=!tools! -j2' diff --git a/core/make_binders.py b/core/make_binders.py index 5debf1345ce..ef71c4264b8 100644 --- a/core/make_binders.py +++ b/core/make_binders.py @@ -251,7 +251,6 @@ def run(target, source, env): else: text += t - f = open(target[0].path, "w") f.write(text) f.close() diff --git a/doc/tools/doc_merge.py b/doc/tools/doc_merge.py index 747a8703784..d9f7cc9daac 100644 --- a/doc/tools/doc_merge.py +++ b/doc/tools/doc_merge.py @@ -16,6 +16,7 @@ tab = 0 old_classes = {} + def write_string(_f, text, newline=True): for t in range(tab): _f.write("\t") @@ -23,6 +24,7 @@ def write_string(_f, text, newline=True): if (newline): _f.write("\n") + def escape(ret): ret = ret.replace("&", "&"); ret = ret.replace("<", ">"); @@ -36,6 +38,7 @@ def inc_tab(): global tab tab += 1 + def dec_tab(): global tab tab -= 1 @@ -43,12 +46,14 @@ def dec_tab(): write_string(f, '') write_string(f, '') + def get_tag(node, name): tag = "" if (name in node.attrib): tag = ' ' + name + '="' + escape(node.attrib[name]) + '" ' return tag + def find_method_descr(old_class, name): methods = old_class.find("methods") @@ -61,6 +66,7 @@ def find_method_descr(old_class, name): return None + def find_signal_descr(old_class, name): signals = old_class.find("signals") @@ -73,6 +79,7 @@ def find_signal_descr(old_class, name): return None + def find_constant_descr(old_class, name): if (old_class == None): @@ -85,6 +92,7 @@ def find_constant_descr(old_class, name): return m.text return None + def write_class(c): class_name = c.attrib["name"] print("Parsing Class: " + class_name) @@ -93,7 +101,6 @@ def write_class(c): else: old_class = None - category = get_tag(c, "category") inherits = get_tag(c, "inherits") write_string(f, '') @@ -106,7 +113,6 @@ def write_class(c): if (old_brief_descr != None): write_string(f, escape(old_brief_descr.text.strip())) - write_string(f, "") write_string(f, "") @@ -207,5 +213,3 @@ for c in list(old_doc): for c in list(new_doc): write_class(c) write_string(f, '\n') - - diff --git a/doc/tools/doc_status.py b/doc/tools/doc_status.py index 4e108f352ea..e05bcd63e78 100755 --- a/doc/tools/doc_status.py +++ b/doc/tools/doc_status.py @@ -75,7 +75,6 @@ colors = { overall_progress_description_weigth = 10 - ################################################################################ # Utils # ################################################################################ @@ -85,6 +84,7 @@ def validate_tag(elem, tag): print('Tag mismatch, expected "' + tag + '", got ' + elem.tag) sys.exit(255) + def color(color, string): if flags['c']: color_format = '' @@ -95,16 +95,18 @@ def color(color, string): return string ansi_escape = re.compile(r'\x1b[^m]*m') + + def nonescape_len(s): return len(ansi_escape.sub('', s)) - ################################################################################ # Classes # ################################################################################ class ClassStatusProgress: + def __init__(self, described=0, total=0): self.described = described self.total = total @@ -146,6 +148,7 @@ class ClassStatusProgress: class ClassStatus: + def __init__(self, name=''): self.name = name self.has_brief_description = True @@ -239,7 +242,6 @@ class ClassStatus: return status - ################################################################################ # Arguments # ################################################################################ @@ -304,7 +306,6 @@ if len(input_file_list) < 1 or flags['h']: sys.exit(0) - ################################################################################ # Parse class list # ################################################################################ @@ -334,7 +335,6 @@ if len(input_class_list) < 1: input_class_list = class_names - ################################################################################ # Make output table # ################################################################################ @@ -374,8 +374,6 @@ for cn in input_class_list: table.append(row) - - ################################################################################ # Print output table # ################################################################################ @@ -427,4 +425,3 @@ print(divider_string) if total_status.is_ok() and not flags['g']: print('All listed classes are ' + color('part_good', 'OK') + '!') - diff --git a/doc/tools/makedoku.py b/doc/tools/makedoku.py index 32a52a584af..49a960c43a8 100644 --- a/doc/tools/makedoku.py +++ b/doc/tools/makedoku.py @@ -53,14 +53,12 @@ def make_class_list(class_list, columns): indexers += [n] last_initial = n[:1] - row_max = 0 for n in range(0, columns): if (len(fit_columns[n]) > row_max): row_max = len(fit_columns[n]) - for r in range(0, row_max): s = "|" for c in range(0, columns): @@ -139,7 +137,6 @@ def dokuize_text(text): else: tag_text = "[" + tag_text + "]" - text = pre_text + tag_text + post_text pos = len(pre_text) + len(tag_text) @@ -173,8 +170,6 @@ def make_method(f, name, m, declare, event=False): mdata["argidx"].append(idx) mdata[idx] = a - - if (not event): if (-1 in mdata["argidx"]): s += make_type(mdata[-1].attrib["type"]) @@ -212,7 +207,6 @@ def make_method(f, name, m, declare, event=False): if ("default" in arg.attrib): s += "=" + arg.attrib["default"] - argfound = True if (argfound): @@ -270,8 +264,6 @@ def make_doku_class(node): s += " - " + c.text.strip() f.write(s + "\n") - - constants = node.find("constants") if(constants != None and len(list(constants)) > 0): f.write("===== Numeric Constants ======\n") @@ -284,7 +276,6 @@ def make_doku_class(node): s += " - " + c.text.strip() f.write(s + "\n") - descr = node.find("description") if (descr != None and descr.text.strip() != ""): f.write("===== Description ======\n") @@ -305,10 +296,6 @@ def make_doku_class(node): f.write(dokuize_text(d.text.strip())) f.write("\n") - - - - """ div=ET.Element("div") div.attrib["class"]="class"; @@ -510,5 +497,3 @@ make_class_list(class_names, 4) for cn in class_names: c = classes[cn] make_doku_class(c) - - diff --git a/doc/tools/makehtml.py b/doc/tools/makehtml.py index 95a35960b74..369f559f25b 100644 --- a/doc/tools/makehtml.py +++ b/doc/tools/makehtml.py @@ -12,9 +12,11 @@ html_escape_table = { html_unescape_table = {v: k for k, v in html_escape_table.items()} + def html_escape(text): return escape(text, html_escape_table) + def html_unescape(text): return unescape(text, html_unescape_table) @@ -41,12 +43,14 @@ def validate_tag(elem, tag): print("Tag mismatch, expected '" + tag + "', got " + elem.tag); sys.exit(255) + def make_html_bottom(body): # make_html_top(body,True) ET.SubElement(body, "hr") copyright = ET.SubElement(body, "span") copyright.text = "Copyright 2008-2010 Codenix SRL" + def make_html_top(body, bottom=False): if (bottom): @@ -83,8 +87,6 @@ def make_html_top(body, bottom=False): ET.SubElement(body, "hr") - - def make_html_class_list(class_list, columns): div = ET.Element("div") @@ -128,7 +130,6 @@ def make_html_class_list(class_list, columns): if (len(fit_columns[n]) > row_max): row_max = len(fit_columns[n]) - for r in range(0, row_max): tr = ET.SubElement(table, "tr") for c in range(0, columns): @@ -155,7 +156,6 @@ def make_html_class_list(class_list, columns): a.attrib["href"] = link a.text = classname - if (not single_page): cat_class_list = ET.Element("html") csscc = ET.SubElement(cat_class_list, "link") @@ -169,9 +169,6 @@ def make_html_class_list(class_list, columns): else: cat_class_parent = div - - - h1 = ET.SubElement(cat_class_parent, "h2") h1.text = "Class List By Category" @@ -227,7 +224,6 @@ def make_html_class_list(class_list, columns): catet_out = ET.ElementTree(cat_class_list) catet_out.write("category.html") - if (not single_page): inh_class_list = ET.Element("html") cssic = ET.SubElement(inh_class_list, "link") @@ -240,9 +236,6 @@ def make_html_class_list(class_list, columns): else: inh_class_parent = div - - - h1 = ET.SubElement(inh_class_parent, "h2") h1.text = "Class List By Inheritance" @@ -268,7 +261,6 @@ def make_html_class_list(class_list, columns): else: parent_list = parent_list.find("list") - else: parent_list = itemlist @@ -277,7 +269,6 @@ def make_html_class_list(class_list, columns): class_inh_table[clss.attrib["name"]] = item make_type(clss.attrib["name"], item) - for c in class_list: add_class(classes[c]) @@ -286,10 +277,6 @@ def make_html_class_list(class_list, columns): catet_out = ET.ElementTree(inh_class_list) catet_out.write("inheritance.html") - - - - # h1=ET.SubElement(div,"h2") #h1.text="Class List By Inheritance" @@ -314,7 +301,6 @@ def make_type(p_type, p_parent): span.text = p_type + " " - def make_text_def(class_name, parent, text): text = html_escape(text) pos = 0 @@ -370,7 +356,6 @@ def make_text_def(class_name, parent, text): else: tag_text = "[" + tag_text + "]" - text = pre_text + tag_text + post_text pos = len(pre_text) + len(tag_text) @@ -384,17 +369,13 @@ def make_text_def(class_name, parent, text): print("Error parsing description text: '" + text + "'") sys.exit(255) - return tnode - - def make_method_def(name, m, declare, event=False): mdata = {} - if (not declare): div = ET.Element("tr") div.attrib["class"] = "method" @@ -457,7 +438,6 @@ def make_method_def(name, m, declare, event=False): span = ET.SubElement(func_parent, "span") span.text = " " - make_type(arg.attrib["type"], func_parent) span = ET.SubElement(func_parent, "span") @@ -465,7 +445,6 @@ def make_method_def(name, m, declare, event=False): if ("default" in arg.attrib): span.text = span.text + "=" + arg.attrib["default"] - span = ET.SubElement(func_parent, "span") span.attrib["class"] = "symbol" if (len(mdata["argidx"])): @@ -533,7 +512,6 @@ def make_html_class(node): else: a.attrib["href"] = "category.html#" + catname - methods = node.find("methods") if(methods != None and len(list(methods)) > 0): @@ -561,7 +539,6 @@ def make_html_class(node): #li = ET.SubElement(div2, "li") event_table.append(make_method_def(node.attrib["name"], m, False, True)) - members = node.find("members") if(members != None and len(list(members)) > 0): @@ -583,7 +560,6 @@ def make_html_class(node): span.attrib["class"] = "member_description" span.text = c.text - constants = node.find("constants") if(constants != None and len(list(constants)) > 0): @@ -613,7 +589,6 @@ def make_html_class(node): # ET.SubElement(div,"br") - descr = node.find("description") if (descr != None and descr.text.strip() != ""): h4 = ET.SubElement(div, "h4") @@ -624,8 +599,6 @@ def make_html_class(node): # div2.attrib["class"]="description"; # div2.text=descr.text - - if(methods != None or events != None): h4 = ET.SubElement(div, "h4") @@ -646,7 +619,6 @@ def make_html_class(node): div2 = ET.SubElement(div, "div") div2.attrib["class"] = "method_doc"; - div2.append(make_method_def(node.attrib["name"], m, True)) #anchor = ET.SubElement(div2, "a") # anchor.attrib["name"] = @@ -655,7 +627,6 @@ def make_html_class(node): # div3.attrib["class"]="description"; # div3.text=descr.text - return div class_names = [] @@ -688,7 +659,6 @@ if (not single_page): make_html_top(body) - class_names.sort() body.append(make_html_class_list(class_names, 5)) @@ -717,4 +687,3 @@ if (single_page): else: make_html_bottom(body) et_out.write("alphabetical.html") - diff --git a/doc/tools/makemd.py b/doc/tools/makemd.py index 0693291f35f..bd0d4c6819f 100644 --- a/doc/tools/makemd.py +++ b/doc/tools/makemd.py @@ -343,4 +343,3 @@ make_class_list(class_names, 2) for cn in class_names: c = classes[cn] make_doku_class(c) - diff --git a/doc/tools/makerst.py b/doc/tools/makerst.py index 2f3b9c269d1..6b6780ce1e4 100644 --- a/doc/tools/makerst.py +++ b/doc/tools/makerst.py @@ -24,6 +24,7 @@ def validate_tag(elem, tag): class_names = [] classes = {} + def ul_string(str, ul): str += "\n" for i in range(len(str) - 1): @@ -31,6 +32,7 @@ def ul_string(str, ul): str += "\n" return str + def make_class_list(class_list, columns): f = codecs.open('class_list.rst', 'wb', 'utf-8') @@ -343,7 +345,6 @@ def make_heading(title, underline): return title + '\n' + underline * len(title) + "\n\n" - def make_rst_class(node): name = node.attrib['name'] @@ -375,7 +376,6 @@ def make_rst_class(node): else: inh = None - f.write("\n\n") inherited = [] @@ -436,7 +436,6 @@ def make_rst_class(node): f.write(sep) f.write('\n') - events = node.find('signals') if events != None and len(list(events)) > 0: f.write(make_heading('Signals', '-')) @@ -516,4 +515,3 @@ class_names.sort() for cn in class_names: c = classes[cn] make_rst_class(c) - diff --git a/methods.py b/methods.py index 38dee11a353..269e076409a 100755 --- a/methods.py +++ b/methods.py @@ -1,5 +1,6 @@ import os + def add_source_files(self, sources, filetype, lib_env=None, shared=False): import glob; import string; @@ -27,7 +28,6 @@ def build_shader_header(target, source, env): name = name[name.rfind("\\") + 1:] name = name.replace(".", "_") - fs = open(str(x), "r") fd = open(str(x) + ".h", "w") fd.write("/* this file has been generated by SCons, do not edit! */\n") @@ -94,7 +94,6 @@ def build_glsl_header(filename): if (not ifdefline in conditionals): conditionals += [ifdefline] - import re if re.search(r"^\s*uniform", line): @@ -137,8 +136,6 @@ def build_glsl_header(filename): ubos += [(x, uboidx)] ubo_names += [x] - - else: uline = line.replace("uniform", ""); uline = uline.replace(";", ""); @@ -160,7 +157,6 @@ def build_glsl_header(filename): uline = uline.replace(";", ""); uline = uline[uline.find(" "):].strip() - if (uline.find("//") != -1): name, bind = uline.split("//") if (bind.find("attrib:") != -1): @@ -173,7 +169,6 @@ def build_glsl_header(filename): uline = uline.replace(";", ""); uline = uline[uline.find(" "):].strip() - if (uline.find("//") != -1): name, bind = uline.split("//") if (bind.find("drawbuffer:") != -1): @@ -330,7 +325,6 @@ def build_glsl_header(filename): fd.write("\n\n#undef _FU\n\n\n"); - fd.write("\tvirtual void init() {\n\n"); if (len(conditionals)): @@ -361,8 +355,6 @@ def build_glsl_header(filename): else: fd.write("\t\tstatic AttributePair *_attribute_pairs=NULL;\n") - - if (len(fbos)): fd.write("\t\tstatic FBOPair _fbo_pairs[]={\n") for x in fbos: @@ -394,7 +386,6 @@ def build_glsl_header(filename): fd.write("\t\tstatic const int _vertex_code_start=" + str(vertex_offset) + ";\n") - fd.write("\t\tstatic const char* _fragment_code=\"\\\n") for x in fragment_lines: fd.write("\t\t\t" + x + "\n"); @@ -416,11 +407,9 @@ def build_glsl_headers(target, source, env): build_glsl_header(str(x)); - return 0 - def build_hlsl_dx9_header(filename): fs = open(filename, "r") @@ -527,7 +516,6 @@ def build_hlsl_dx9_header(filename): fd.write("\t\t" + x.upper() + ",\n"); fd.write("\t};\n\n"); - if (len(conditionals)): fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n"); @@ -597,7 +585,6 @@ def build_hlsl_dx9_header(filename): fd.write("\n\n#undef _FU\n\n\n"); - fd.write("\tvirtual void init(IDirect3DDevice9 *p_device,ShaderSupport p_version) {\n\n"); if (len(conditionals)): @@ -630,7 +617,6 @@ def build_hlsl_dx9_header(filename): fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") fd.write("\t\tstatic const bool *_fragment_uniforms=NULL;\n") - fd.write("\t\tstatic const char* _vertex_code=\"\\\n") for x in vertex_lines: fd.write("\t\t\t" + x + "\n"); @@ -638,7 +624,6 @@ def build_hlsl_dx9_header(filename): fd.write("\t\tstatic const int _vertex_code_start=" + str(vertex_offset) + ";\n") - fd.write("\t\tstatic const char* _fragment_code=\"\\\n") for x in fragment_lines: fd.write("\t\t\t" + x + "\n"); @@ -660,11 +645,11 @@ def build_hlsl_dx9_headers(target, source, env): build_hlsl_dx9_header(str(x)); - return 0 class LegacyGLHeaderStruct: + def __init__(self): self.vertex_lines = [] self.fragment_lines = [] @@ -686,6 +671,7 @@ class LegacyGLHeaderStruct: self.vertex_offset = 0 self.fragment_offset = 0 + def include_file_in_legacygl_header(filename, header_data, depth): fs = open(filename, "r") line = fs.readline() @@ -767,8 +753,6 @@ def include_file_in_legacygl_header(filename, header_data, depth): header_data.texunits += [(x, texunit)] header_data.texunit_names += [x] - - elif (line.find("uniform") != -1): uline = line.replace("uniform", ""); uline = uline.replace(";", ""); @@ -784,7 +768,6 @@ def include_file_in_legacygl_header(filename, header_data, depth): if (not x in header_data.uniforms): header_data.uniforms += [x] - if ((line.strip().find("in ") == 0 or line.strip().find("attribute ") == 0) and line.find("attrib:") != -1): uline = line.replace("in ", ""); uline = uline.replace("attribute ", ""); @@ -792,7 +775,6 @@ def include_file_in_legacygl_header(filename, header_data, depth): uline = uline.replace(";", ""); uline = uline[uline.find(" "):].strip() - if (uline.find("//") != -1): name, bind = uline.split("//") if (bind.find("attrib:") != -1): @@ -800,7 +782,6 @@ def include_file_in_legacygl_header(filename, header_data, depth): bind = bind.replace("attrib:", "").strip() header_data.attributes += [(name, bind)] - line = line.replace("\r", "") line = line.replace("\n", "") # line=line.replace("\\","\\\\") @@ -820,7 +801,6 @@ def include_file_in_legacygl_header(filename, header_data, depth): return header_data - def build_legacygl_header(filename, include, class_suffix, output_attribs): header_data = LegacyGLHeaderStruct() @@ -847,7 +827,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("class " + out_file_class + " : public Shader" + class_suffix + " {\n\n"); fd.write("\t virtual String get_shader_name() const { return \"" + out_file_class + "\"; }\n"); - fd.write("public:\n\n"); if (len(header_data.conditionals)): @@ -856,7 +835,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("\t\t" + x.upper() + ",\n"); fd.write("\t};\n\n"); - if (len(header_data.uniforms)): fd.write("\tenum Uniforms {\n"); for x in header_data.uniforms: @@ -965,10 +943,8 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("\n\n#undef _FU\n\n\n"); - fd.write("\tvirtual void init() {\n\n"); - enum_value_count = 0; if (len(header_data.enums)): @@ -1002,7 +978,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("\t\t\t{(uint64_t(1<<" + str(bits) + ")-1)<<" + str(bitofs) + "," + str(bitofs) + "," + strs + "},\n"); bitofs += bits - fd.write("\t\t};\n\n"); fd.write("\t\tstatic const EnumValue _enum_values[]={\n") @@ -1046,7 +1021,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): else: fd.write("\t\tstatic AttributePair *_attribute_pairs=NULL;\n") - if (len(header_data.texunits)): fd.write("\t\tstatic TexUnitPair _texunit_pairs[]={\n") for x in header_data.texunits: @@ -1065,7 +1039,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("\t\tstatic const int _vertex_code_start=" + str(header_data.vertex_offset) + ";\n") - fd.write("\t\tstatic const char _fragment_code[]={\n") for x in header_data.fragment_lines: for i in range(len(x)): @@ -1083,7 +1056,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("\t};\n\n") - if (len(enum_constants)): fd.write("\tenum EnumConditionals {\n") @@ -1092,7 +1064,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("\t};\n\n"); fd.write("\tvoid set_enum_conditional(EnumConditionals p_cond) { _set_enum_conditional(p_cond); }\n") - fd.write("};\n\n"); fd.write("#endif\n\n"); fd.close(); @@ -1104,14 +1075,15 @@ def build_legacygl_headers(target, source, env): build_legacygl_header(str(x), include="drivers/legacygl/shader_lgl.h", class_suffix="LGL", output_attribs=False); - return 0 + def build_gles2_headers(target, source, env): for x in source: build_legacygl_header(str(x), include="drivers/gles2/shader_gles2.h", class_suffix="GLES2", output_attribs=True) + def update_version(): rev = "custom_build" @@ -1121,7 +1093,6 @@ def update_version(): print("Using custom revision: " + rev) import version - f = open("core/version.h", "wb") f.write("#define VERSION_SHORT_NAME " + str(version.short_name) + "\n") f.write("#define VERSION_NAME " + str(version.name) + "\n") @@ -1134,6 +1105,7 @@ def update_version(): import datetime f.write("#define VERSION_YEAR " + str(datetime.datetime.now().year) + "\n") + def parse_cg_file(fname, uniforms, sizes, conditionals): import re @@ -1204,8 +1176,9 @@ def build_cg_shader(sname): fd.write("\t};\n"); - import glob + + def detect_modules(): module_list = [] @@ -1260,7 +1233,6 @@ void unregister_module_types() { return module_list - def win32_spawn(sh, escape, cmd, args, env): import subprocess newargs = ' '.join(args[1:]) @@ -1309,44 +1281,61 @@ def win32_spawn(sh, escape, cmd, args, spawnenv): return exit_code """ + def android_add_maven_repository(self, url): self.android_maven_repos.append(url) + def android_add_dependency(self, depline): self.android_dependencies.append(depline) + def android_add_java_dir(self, subpath): base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + subpath self.android_java_dirs.append(base_path) + def android_add_res_dir(self, subpath): base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + subpath self.android_res_dirs.append(base_path) + + def android_add_aidl_dir(self, subpath): base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + subpath self.android_aidl_dirs.append(base_path) + + def android_add_jni_dir(self, subpath): base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + subpath self.android_jni_dirs.append(base_path) + + def android_add_default_config(self, config): self.android_default_config.append(config) + def android_add_to_manifest(self, file): base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + file f = open(base_path, "rb") self.android_manifest_chunk += f.read() + + def android_add_to_permissions(self, file): base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + file f = open(base_path, "rb") self.android_permission_chunk += f.read() + + def android_add_to_attributes(self, file): base_path = self.Dir(".").abspath + "/modules/" + self.current_module + "/" + file f = open(base_path, "rb") self.android_appattributes_chunk += f.read() + def disable_module(self): self.disabled_modules.append(self.current_module) + def use_windows_spawn_fix(self, platform=None): if (os.name != "nt"): @@ -1503,6 +1492,7 @@ def no_verbose(sys, env): env.Append(JARCOMSTR=[java_library_message]) env.Append(JAVACCOMSTR=[java_compile_source_message]) + def detect_visual_c_compiler_version(tools_env): # tools_env is the variable scons uses to call tools that execute tasks, SCons's env['ENV'] that executes tasks... # (see the SCons documentation for more information on what it does)... @@ -1543,7 +1533,6 @@ def detect_visual_c_compiler_version(tools_env): vc_chosen_compiler_index = vc_amd64_x86_compiler_detection_index vc_chosen_compiler_str = "amd64_x86" - # Now check the 32 bit compilers vc_x86_compiler_detection_index = tools_env["PATH"].find(tools_env["VCINSTALLDIR"] + "BIN;") if(vc_x86_compiler_detection_index > -1 diff --git a/modules/chibi/config.py b/modules/chibi/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/chibi/config.py +++ b/modules/chibi/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/cscript/config.py b/modules/cscript/config.py index ae0a3d75eed..5698a372954 100644 --- a/modules/cscript/config.py +++ b/modules/cscript/config.py @@ -6,6 +6,3 @@ def can_build(platform): def configure(env): pass - - - diff --git a/modules/dds/config.py b/modules/dds/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/dds/config.py +++ b/modules/dds/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/enet/config.py b/modules/enet/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/enet/config.py +++ b/modules/enet/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/etc1/config.py b/modules/etc1/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/etc1/config.py +++ b/modules/etc1/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/freetype/config.py b/modules/freetype/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/freetype/config.py +++ b/modules/freetype/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/gdscript/config.py b/modules/gdscript/config.py index ae0a3d75eed..5698a372954 100644 --- a/modules/gdscript/config.py +++ b/modules/gdscript/config.py @@ -6,6 +6,3 @@ def can_build(platform): def configure(env): pass - - - diff --git a/modules/gridmap/config.py b/modules/gridmap/config.py index ae0a3d75eed..5698a372954 100644 --- a/modules/gridmap/config.py +++ b/modules/gridmap/config.py @@ -6,6 +6,3 @@ def can_build(platform): def configure(env): pass - - - diff --git a/modules/ik/config.py b/modules/ik/config.py index ae0a3d75eed..5698a372954 100644 --- a/modules/ik/config.py +++ b/modules/ik/config.py @@ -6,6 +6,3 @@ def can_build(platform): def configure(env): pass - - - diff --git a/modules/jpg/config.py b/modules/jpg/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/jpg/config.py +++ b/modules/jpg/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/mpc/config.py b/modules/mpc/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/mpc/config.py +++ b/modules/mpc/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/ogg/config.py b/modules/ogg/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/ogg/config.py +++ b/modules/ogg/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/openssl/config.py b/modules/openssl/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/openssl/config.py +++ b/modules/openssl/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/opus/config.py b/modules/opus/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/opus/config.py +++ b/modules/opus/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/pbm/config.py b/modules/pbm/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/pbm/config.py +++ b/modules/pbm/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/pvr/config.py b/modules/pvr/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/pvr/config.py +++ b/modules/pvr/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/regex/config.py b/modules/regex/config.py index 667b5d8ba65..5347cfd2438 100644 --- a/modules/regex/config.py +++ b/modules/regex/config.py @@ -1,8 +1,9 @@ #!/usr/bin/env python + def can_build(platform): return True + def configure(env): pass - diff --git a/modules/squish/config.py b/modules/squish/config.py index 81e6163a9b2..cc8f0980103 100644 --- a/modules/squish/config.py +++ b/modules/squish/config.py @@ -2,6 +2,7 @@ def can_build(platform): return True + def configure(env): # Tools only, disabled for non-tools # TODO: Find a cleaner way to achieve that diff --git a/modules/theora/config.py b/modules/theora/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/theora/config.py +++ b/modules/theora/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/visual_script/config.py b/modules/visual_script/config.py index ae0a3d75eed..5698a372954 100644 --- a/modules/visual_script/config.py +++ b/modules/visual_script/config.py @@ -6,6 +6,3 @@ def can_build(platform): def configure(env): pass - - - diff --git a/modules/vorbis/config.py b/modules/vorbis/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/vorbis/config.py +++ b/modules/vorbis/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/webm/config.py b/modules/webm/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/webm/config.py +++ b/modules/webm/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/modules/webp/config.py b/modules/webp/config.py index 3de0425119c..fb920482f5f 100644 --- a/modules/webp/config.py +++ b/modules/webp/config.py @@ -2,5 +2,6 @@ def can_build(platform): return True + def configure(env): pass diff --git a/platform/android/detect.py b/platform/android/detect.py index f1331d24d5f..5de4f114451 100644 --- a/platform/android/detect.py +++ b/platform/android/detect.py @@ -3,12 +3,15 @@ import sys import string import platform + def is_active(): return True + def get_name(): return "Android" + def can_build(): import os @@ -17,6 +20,7 @@ def can_build(): return True + def get_opts(): return [ @@ -47,6 +51,7 @@ def create(env): env.Tool('gcc') return env.Clone(tools=tools); + def configure(env): # Workaround for MinGW. See: @@ -102,7 +107,6 @@ def configure(env): env.Tool('gcc') env['SHLIBSUFFIX'] = '.so' - neon_text = "" if env["android_arch"] == "armv7" and env['android_neon'] == 'yes': neon_text = " (with neon)" @@ -241,7 +245,6 @@ def configure(env): env.Append(LIBS=['gnustl_static']) env.Append(CCFLAGS=["-fno-exceptions", '-DNO_SAFE_CAST']) - import methods env.Append(BUILDERS={'GLSL120': env.Builder(action=methods.build_legacygl_headers, suffix='glsl.h', src_suffix='.glsl')}) env.Append(BUILDERS={'GLSL': env.Builder(action=methods.build_glsl_headers, suffix='glsl.h', src_suffix='.glsl')}) diff --git a/platform/bb10/detect.py b/platform/bb10/detect.py index ed320bebc17..95720794b2b 100644 --- a/platform/bb10/detect.py +++ b/platform/bb10/detect.py @@ -7,9 +7,11 @@ import methods def is_active(): return True + def get_name(): return "BlackBerry 10" + def can_build(): import os @@ -17,6 +19,7 @@ def can_build(): return False return True + def get_opts(): return [ @@ -29,6 +32,7 @@ def get_opts(): ('bb10_exceptions', 'Use exceptions when compiling on bb10', 'no'), ] + def get_flags(): return [ @@ -37,6 +41,7 @@ def get_flags(): ('module_theora_enabled', 'no'), ] + def configure(env): if env['PLATFORM'] == 'win32': @@ -86,4 +91,3 @@ def configure(env): env.Append(LINKFLAGS=['-g']) env.Append(LIBS=['bps', 'pps', 'screen', 'socket', 'EGL', 'GLESv2', 'GLESv1_CM', 'm', 'asound']) - diff --git a/platform/haiku/SCsub b/platform/haiku/SCsub index 23e5e2e7119..d0c244a1945 100644 --- a/platform/haiku/SCsub +++ b/platform/haiku/SCsub @@ -20,6 +20,7 @@ target = env.Program( command = env.Command('#bin/godot.rsrc', '#platform/haiku/godot.rdef', ['rc -o $TARGET $SOURCE']) + def addResourcesAction(target=None, source=None, env=None): return env.Execute('xres -o ' + File(target)[0].path + ' bin/godot.rsrc') diff --git a/platform/haiku/detect.py b/platform/haiku/detect.py index e64be0a188f..71202a9a495 100644 --- a/platform/haiku/detect.py +++ b/platform/haiku/detect.py @@ -1,12 +1,15 @@ import os import sys + def is_active(): return True + def get_name(): return "Haiku" + def can_build(): if (os.name != "posix"): return False @@ -16,15 +19,18 @@ def can_build(): return True + def get_opts(): return [ ('debug_release', 'Add debug symbols to release version', 'no') ] + def get_flags(): return [ ] + def configure(env): is64 = sys.maxsize > 2**32 diff --git a/platform/iphone/detect.py b/platform/iphone/detect.py index 541487d5868..229038697c2 100644 --- a/platform/iphone/detect.py +++ b/platform/iphone/detect.py @@ -5,9 +5,11 @@ import sys def is_active(): return True + def get_name(): return "iOS" + def can_build(): import sys @@ -17,6 +19,7 @@ def can_build(): return False + def get_opts(): return [ @@ -33,6 +36,7 @@ def get_opts(): ('ios_sim', 'Build simulator binary', 'no'), ] + def get_flags(): return [ @@ -43,7 +47,6 @@ def get_flags(): ] - def configure(env): env.Append(CPPPATH=['#platform/iphone']) diff --git a/platform/javascript/detect.py b/platform/javascript/detect.py index 56b41a49aca..04a6bc9cc3c 100644 --- a/platform/javascript/detect.py +++ b/platform/javascript/detect.py @@ -2,12 +2,15 @@ import os import sys import string + def is_active(): return True + def get_name(): return "JavaScript" + def can_build(): import os @@ -15,6 +18,7 @@ def can_build(): return False return True + def get_opts(): return [ @@ -22,6 +26,7 @@ def get_opts(): ['javascript_eval', 'Enable JavaScript eval interface', 'yes'], ] + def get_flags(): return [ @@ -33,7 +38,6 @@ def get_flags(): ] - def configure(env): env['ENV'] = os.environ; env.use_windows_spawn_fix('javascript') diff --git a/platform/osx/detect.py b/platform/osx/detect.py index ddb27742a0a..9191f1aabca 100644 --- a/platform/osx/detect.py +++ b/platform/osx/detect.py @@ -6,17 +6,19 @@ import sys def is_active(): return True + def get_name(): return "OSX" + def can_build(): if (sys.platform == "darwin" or os.environ.has_key("OSXCROSS_ROOT")): return True - return False + def get_opts(): return [ @@ -25,13 +27,13 @@ def get_opts(): ] + def get_flags(): return [ ] - def configure(env): env.Append(CPPPATH=['#platform/osx']) @@ -51,8 +53,6 @@ def configure(env): env.Append(CCFLAGS=['-g3', '-Wall', '-DDEBUG_ENABLED', '-DDEBUG_MEMORY_ENABLED']) - - if (not os.environ.has_key("OSXCROSS_ROOT")): # regular native build if (env["bits"] == "64"): @@ -72,14 +72,12 @@ def configure(env): else: basecmd = root + "/target/bin/i386-apple-" + env["osxcross_sdk"] + "-" - env['CC'] = basecmd + "cc" env['CXX'] = basecmd + "c++" env['AR'] = basecmd + "ar" env['RANLIB'] = basecmd + "ranlib" env['AS'] = basecmd + "as" - env.Append(CPPFLAGS=["-DAPPLE_STYLE_KEYS"]) env.Append(CPPFLAGS=['-DUNIX_ENABLED', '-DGLES2_ENABLED', '-DOSX_ENABLED']) env.Append(LIBS=['pthread']) diff --git a/platform/server/detect.py b/platform/server/detect.py index 9d23ceda706..b367e1f2c31 100644 --- a/platform/server/detect.py +++ b/platform/server/detect.py @@ -6,6 +6,7 @@ import sys def is_active(): return True + def get_name(): return "Server" @@ -17,6 +18,7 @@ def can_build(): return True # enabled + def get_opts(): return [ @@ -24,13 +26,13 @@ def get_opts(): ('force_32_bits', 'Force 32 bits binary', 'no') ] + def get_flags(): return [ ] - def configure(env): env.Append(CPPPATH=['#platform/server']) @@ -47,13 +49,11 @@ def configure(env): else: env["bits"] = "32" - # if (env["tools"]=="no"): # #no tools suffix # env['OBJSUFFIX'] = ".nt"+env['OBJSUFFIX'] # env['LIBSUFFIX'] = ".nt"+env['LIBSUFFIX'] - if (env["target"] == "release"): env.Append(CCFLAGS=['-O2', '-ffast-math', '-fomit-frame-pointer']) @@ -73,4 +73,3 @@ def configure(env): env.Append(CPPFLAGS=['-DTYPED_METHOD_BIND']) env["CC"] = "clang" env["LD"] = "clang++" - diff --git a/platform/windows/detect.py b/platform/windows/detect.py index 4bb6dc4a966..93d29a36066 100644 --- a/platform/windows/detect.py +++ b/platform/windows/detect.py @@ -99,12 +99,15 @@ import sys import methods + def is_active(): return True + def get_name(): return "Windows" + def can_build(): if (os.name == "nt"): @@ -148,6 +151,7 @@ def can_build(): return False + def get_opts(): mingw = "" @@ -167,12 +171,12 @@ def get_opts(): if (os.getenv("MINGW64_PREFIX")): mingw64 = os.getenv("MINGW64_PREFIX") - return [ ('mingw_prefix', 'Mingw Prefix', mingw32), ('mingw_prefix_64', 'Mingw Prefix 64 bits', mingw64), ] + def get_flags(): return [ @@ -180,6 +184,7 @@ def get_flags(): ('openssl', 'builtin'), # use builtin openssl ] + def build_res_file(target, source, env): cmdbase = "" @@ -200,6 +205,7 @@ def build_res_file(target, source, env): return 1 return 0 + def configure(env): env.Append(CPPPATH=['#platform/windows']) @@ -210,7 +216,6 @@ def configure(env): env.Append(CPPPATH=['#platform/windows/include']) env.Append(LIBPATH=['#platform/windows/lib']) - if (env["target"] == "release"): env.Append(CCFLAGS=['/O2']) @@ -234,7 +239,6 @@ def configure(env): env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) env.Append(LINKFLAGS=['/DEBUG']) - env.Append(CCFLAGS=['/MT', '/Gd', '/GR', '/nologo']) env.Append(CXXFLAGS=['/TP']) env.Append(CPPFLAGS=['/DMSVC', '/GR', ]) @@ -331,8 +335,6 @@ def configure(env): else: nulstr = ">nul" - - # if os.system(mingw_prefix+"gcc --version"+nulstr)!=0: # #not really super consistent but.. # print("Can't find Windows compiler: "+mingw_prefix) @@ -357,8 +359,6 @@ def configure(env): env.Append(CCFLAGS=['-g', '-Wall', '-DDEBUG_ENABLED', '-DDEBUG_MEMORY_ENABLED']) - - env["CC"] = mingw_prefix + "gcc" env['AS'] = mingw_prefix + "as" env['CXX'] = mingw_prefix + "g++" @@ -381,9 +381,6 @@ def configure(env): # env.Append(CPPFLAGS=['-march=i686']) # env.Append(LINKFLAGS=['-march=i686']) - - - #'d3dx9d' env.Append(CPPFLAGS=['-DMINGW_ENABLED']) # env.Append(LINKFLAGS=['-g']) diff --git a/platform/winrt/detect.py b/platform/winrt/detect.py index 19a6324e1e8..15ac429687d 100644 --- a/platform/winrt/detect.py +++ b/platform/winrt/detect.py @@ -8,9 +8,11 @@ import methods def is_active(): return True + def get_name(): return "WinRT" + def can_build(): if (os.name == "nt"): # building natively on windows! @@ -22,9 +24,11 @@ def can_build(): return True return False + def get_opts(): return [] + def get_flags(): return [ @@ -129,12 +133,10 @@ def configure(env): env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE']) env.Append(LINKFLAGS=['/DEBUG']) - env.Append(CCFLAGS=string.split('/FS /MP /GS /wd"4453" /wd"28204" /wd"4291" /Zc:wchar_t /Gm- /fp:precise /D "_UNICODE" /D "UNICODE" /D "WINAPI_FAMILY=WINAPI_FAMILY_APP" /errorReport:prompt /WX- /Zc:forScope /Gd /EHsc /nologo')) env.Append(CXXFLAGS=string.split('/ZW /FS')) env.Append(CCFLAGS=['/AI', os.environ['VCINSTALLDIR'] + '\\vcpackages', '/AI', os.environ['WINDOWSSDKDIR'] + '\\References\\CommonConfiguration\\Neutral']) - env["PROGSUFFIX"] = "." + arch + env["PROGSUFFIX"] env["OBJSUFFIX"] = "." + arch + env["OBJSUFFIX"] env["LIBSUFFIX"] = "." + arch + env["LIBSUFFIX"] diff --git a/platform/x11/detect.py b/platform/x11/detect.py index 37c787179fe..935a9fcc26c 100644 --- a/platform/x11/detect.py +++ b/platform/x11/detect.py @@ -7,6 +7,7 @@ import platform def is_active(): return True + def get_name(): return "X11" @@ -50,9 +51,9 @@ def can_build(): print("xrandr not found.. x11 disabled.") return False - return True # X11 enabled + def get_opts(): return [ @@ -65,6 +66,7 @@ def get_opts(): ('debug_release', 'Add debug symbols to release version', 'no'), ] + def get_flags(): return [ @@ -74,7 +76,6 @@ def get_flags(): ] - def configure(env): is64 = sys.maxsize > 2**32 @@ -104,13 +105,11 @@ def configure(env): env.Append(LINKFLAGS=['-fsanitize=address']) env.extra_suffix += "s" - # if (env["tools"]=="no"): # #no tools suffix # env['OBJSUFFIX'] = ".nt"+env['OBJSUFFIX'] # env['LIBSUFFIX'] = ".nt"+env['LIBSUFFIX'] - if (env["target"] == "release"): if (env["debug_release"] == "yes"): @@ -174,7 +173,6 @@ def configure(env): if (env["libogg"] == "system"): env.ParseConfig('pkg-config ogg --cflags --libs') - env.Append(CPPFLAGS=['-DOPENGL_ENABLED']) if (env["glew"] == "system"): @@ -223,7 +221,6 @@ def configure(env): env.Append(CPPFLAGS=['-m64']) env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu']) - import methods env.Append(BUILDERS={'GLSL120': env.Builder(action=methods.build_legacygl_headers, suffix='glsl.h', src_suffix='.glsl')}) @@ -237,4 +234,3 @@ def configure(env): list_of_x86 = ['x86_64', 'x86', 'i386', 'i586'] if any(platform.machine() in s for s in list_of_x86): env["x86_libtheora_opt_gcc"] = True - diff --git a/scene/resources/default_theme/make_header.py b/scene/resources/default_theme/make_header.py index d8799fd7da7..41d5e2e259b 100644 --- a/scene/resources/default_theme/make_header.py +++ b/scene/resources/default_theme/make_header.py @@ -54,7 +54,6 @@ for x in shaders: sf = open(x, "rb"); - b = sf.readline(); while(b != ""): if (b.endswith("\r\n")): diff --git a/tools/SCsub b/tools/SCsub index 87ed836ae05..f11aa34ee69 100644 --- a/tools/SCsub +++ b/tools/SCsub @@ -10,12 +10,10 @@ Export('env') def make_translations_header(target, source, env): - dst = target[0].srcnode().abspath g = open(dst, "wb") - """" """ @@ -65,12 +63,10 @@ def make_translations_header(target, source, env): def make_fonts_header(target, source, env): - dst = target[0].srcnode().abspath g = open(dst, "wb") - """" """ diff --git a/tools/editor/SCsub b/tools/editor/SCsub index 0dbb7d3f206..625103eaeea 100644 --- a/tools/editor/SCsub +++ b/tools/editor/SCsub @@ -14,7 +14,6 @@ def make_doc_header(target, source, env): import zlib buf = zlib.compress(buf) - g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") g.write("#ifndef _DOC_DATA_RAW_H\n") g.write("#define _DOC_DATA_RAW_H\n") @@ -27,7 +26,6 @@ def make_doc_header(target, source, env): g.write("#endif") - def make_certs_header(target, source, env): src = source[0].srcnode().abspath @@ -39,7 +37,6 @@ def make_certs_header(target, source, env): import zlib buf = zlib.compress(buf) - g.write("/* THIS FILE IS GENERATED DO NOT EDIT */\n") g.write("#ifndef _CERTS_RAW_H\n") g.write("#define _CERTS_RAW_H\n") @@ -52,11 +49,6 @@ def make_certs_header(target, source, env): g.write("#endif") - - - - - if (env["tools"] == "yes"): reg_exporters_inc = '#include "register_exporters.h"\n' diff --git a/tools/editor/icons/SCsub b/tools/editor/icons/SCsub index ac942b20e36..f27350a1841 100644 --- a/tools/editor/icons/SCsub +++ b/tools/editor/icons/SCsub @@ -2,6 +2,7 @@ Import('env') + def make_editor_icons_action(target, source, env): import os @@ -58,9 +59,6 @@ def make_editor_icons_action(target, source, env): except: s.write("static const unsigned char* " + var_str + "=NULL;\n\n\n"); - - - s.write("static Ref make_icon(const uint8_t* p_png,const uint8_t* p_hidpi_png) {\n") s.write("\tRef texture( memnew( ImageTexture ) );\n") s.write("\tbool use_hidpi_image=(editor_get_scale()>1.0&&p_hidpi_png);\n") diff --git a/tools/scripts/addheader.py b/tools/scripts/addheader.py index 573d5182f58..7838e16ae0c 100644 --- a/tools/scripts/addheader.py +++ b/tools/scripts/addheader.py @@ -56,7 +56,6 @@ while (fname != ""): else: text = header.replace("$filename", fsingle) - while (l != ""): if ((l.find("//") != 0 and l.find("/*") != 0 and l.strip() != "") or bc): text += l @@ -69,4 +68,3 @@ while (fname != ""): fr.close() # print(text) fname = f.readline() - diff --git a/tools/scripts/file-hex-array.py b/tools/scripts/file-hex-array.py index 8b7c3edd7fb..da95c9505f8 100755 --- a/tools/scripts/file-hex-array.py +++ b/tools/scripts/file-hex-array.py @@ -2,6 +2,7 @@ import binascii import os.path import sys + def tof(filepath): with open(filepath, 'r') as f: content = f.read() @@ -15,6 +16,7 @@ def tof(filepath): print(os.path.basename(filepath) + ".file created.") exit(0) + def toa(filepath): with open(filepath, 'rb') as f: content = f.read() @@ -28,6 +30,7 @@ def toa(filepath): print(os.path.basename(filepath) + ".array created.") exit(0) + def usage(): print("========================================================\n\ #\n\ diff --git a/tools/scripts/make_bmfhdr.py b/tools/scripts/make_bmfhdr.py index e6aa43f6bbb..1d3c40f9c6d 100644 --- a/tools/scripts/make_bmfhdr.py +++ b/tools/scripts/make_bmfhdr.py @@ -33,7 +33,6 @@ while(l != ""): s = x.split("=") d[s[0]] = s[1] - if (t == "common"): font_height = d["lineHeight"] font_ascent = d["base"] @@ -49,8 +48,6 @@ while(l != ""): font_chars.append(d["xadvance"]) font_cc += 1 - - l = f.readline() diff --git a/tools/scripts/make_glwrapper.py b/tools/scripts/make_glwrapper.py index 9f26f8430e5..bae0eeeadc9 100644 --- a/tools/scripts/make_glwrapper.py +++ b/tools/scripts/make_glwrapper.py @@ -87,7 +87,6 @@ for x in (range(len(sys.argv) - 1)): print(funcdata) - # print(types) # print(constants) # print(functions) @@ -177,7 +176,3 @@ for x in functions: f.write("\n\n") f.write("}\n") f.write("\n\n") - - - - diff --git a/tools/scripts/makeargs.py b/tools/scripts/makeargs.py index a52812eaff2..2cd47ae087d 100644 --- a/tools/scripts/makeargs.py +++ b/tools/scripts/makeargs.py @@ -64,7 +64,6 @@ text = """ """ - for i in range(1, 8): tp = "" @@ -81,8 +80,3 @@ for i in range(1, 8): t = text.replace("$argtp", tp).replace("$argp", p).replace("$argt", t).replace("$num", str(i)) print(t) - - - - - diff --git a/tools/scripts/svgs_2_pngs.py b/tools/scripts/svgs_2_pngs.py index b55fd97d0aa..b24324dcd7e 100644 --- a/tools/scripts/svgs_2_pngs.py +++ b/tools/scripts/svgs_2_pngs.py @@ -73,7 +73,6 @@ def export_icons(): svg_to_png(source_path, theme_dir_base + theme_out_icon_name, 90) - def export_theme(): svgs_path = theme_dir_source file_names = [f for f in listdir(svgs_path) if isfile(join(svgs_path, f))] From f34151ff0f91e8f0df8eaf829334b2205eb7da3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Tue, 1 Nov 2016 00:24:30 +0100 Subject: [PATCH 044/223] style: Various other PEP8 fixes in Python files Done with `autopep8 --select=E7`, fixes: - E701 - Put colon-separated compound statement on separate lines. - E702 - Put semicolon-separated compound statement on separate lines. - E703 - Put semicolon-separated compound statement on separate lines. - E711 - Fix comparison with None. - E712 - Fix (trivial case of) comparison with boolean. - E713 - Fix (trivial case of) non-membership check. - E721 - Fix various deprecated code (via lib2to3). --- SConstruct | 22 +- bin/SCsub | 2 +- core/SCsub | 8 +- doc/tools/doc_merge.py | 30 +- doc/tools/makedoku.py | 2 +- doc/tools/makehtml.py | 28 +- drivers/SCsub | 22 +- drivers/gles2/shaders/SCsub | 10 +- drivers/png/SCsub | 2 +- methods.py | 444 +++++++++---------- modules/openssl/SCsub | 2 +- platform/android/detect.py | 4 +- platform/iphone/SCsub | 2 +- platform/javascript/detect.py | 2 +- platform/windows/detect.py | 6 +- platform/winrt/detect.py | 2 +- scene/SCsub | 16 +- scene/resources/SCsub | 2 +- scene/resources/default_theme/make_header.py | 50 +-- servers/SCsub | 12 +- tools/SCsub | 14 +- tools/editor/SCsub | 8 +- tools/editor/icons/SCsub | 36 +- tools/scripts/file-hex-array.py | 3 +- tools/scripts/make_glwrapper.py | 10 +- 25 files changed, 370 insertions(+), 369 deletions(-) diff --git a/SConstruct b/SConstruct index a1c013422e3..0d8a84b6e81 100644 --- a/SConstruct +++ b/SConstruct @@ -1,6 +1,6 @@ #!/usr/bin/env python -EnsureSConsVersion(0, 14); +EnsureSConsVersion(0, 14) import string @@ -37,7 +37,7 @@ for x in glob.glob("platform/*"): global_defaults.append(x[9:]) if (detect.is_active()): active_platforms.append(detect.get_name()) - active_platform_ids.append(x); + active_platform_ids.append(x) if (detect.can_build()): x = x.replace("platform/", "") # rest of world x = x.replace("platform\\", "") # win32 @@ -64,7 +64,7 @@ elif (os.name == "nt"): if (os.getenv("VCINSTALLDIR") == None or platform_arg == "android"): custom_tools = ['mingw'] -env_base = Environment(tools=custom_tools); +env_base = Environment(tools=custom_tools) if 'TERM' in os.environ: env_base['ENV']['TERM'] = os.environ['TERM'] env_base.AppendENVPath('PATH', os.getenv('PATH')) @@ -140,9 +140,9 @@ opts.Add('glew', 'GLEW library for the gl_context (system/builtin)', 'builtin') opts.Add('xaudio2', 'XAudio2 audio driver (yes/no)', 'no') opts.Add("CXX", "C++ Compiler") opts.Add("CC", "C Compiler") -opts.Add("CCFLAGS", "Custom flags for the C/C++ compiler"); -opts.Add("CFLAGS", "Custom flags for the C compiler"); -opts.Add("LINKFLAGS", "Custom flags for the linker"); +opts.Add("CCFLAGS", "Custom flags for the C/C++ compiler") +opts.Add("CFLAGS", "Custom flags for the C compiler") +opts.Add("LINKFLAGS", "Custom flags for the linker") opts.Add('unix_global_settings_path', 'unix-specific path to system-wide settings. Currently only used by templates.', '') opts.Add('disable_3d', 'Disable 3D nodes for smaller executable (yes/no)', "no") opts.Add('disable_advanced_gui', 'Disable advance 3D gui nodes and behaviors (yes/no)', "no") @@ -180,11 +180,11 @@ sys.modules.pop('detect') """ if (env_base['target'] == 'debug'): - env_base.Append(CPPFLAGS=['-DDEBUG_MEMORY_ALLOC']); + env_base.Append(CPPFLAGS=['-DDEBUG_MEMORY_ALLOC']) env_base.Append(CPPFLAGS=['-DSCI_NAMESPACE']) if (env_base['deprecated'] != 'no'): - env_base.Append(CPPFLAGS=['-DENABLE_DEPRECATED']); + env_base.Append(CPPFLAGS=['-DENABLE_DEPRECATED']) env_base.platforms = {} @@ -264,7 +264,7 @@ if selected_platform in platform_list: sys.exit(255) suffix += ".opt" - env.Append(CCFLAGS=['-DNDEBUG']); + env.Append(CCFLAGS=['-DNDEBUG']) elif (env["target"] == "release_debug"): if (env["tools"] == "yes"): @@ -312,7 +312,7 @@ if selected_platform in platform_list: sys.modules.pop('config') if (env.use_ptrcall): - env.Append(CPPFLAGS=['-DPTRCALL_ENABLED']); + env.Append(CPPFLAGS=['-DPTRCALL_ENABLED']) # to test 64 bits compiltion # env.Append(CPPFLAGS=['-m64']) @@ -349,7 +349,7 @@ if selected_platform in platform_list: SConscript("modules/SCsub") SConscript("main/SCsub") - SConscript("platform/" + selected_platform + "/SCsub"); # build selected platform + SConscript("platform/" + selected_platform + "/SCsub") # build selected platform # Microsoft Visual Studio Project Generation if (env['vsproj']) == "yes": diff --git a/bin/SCsub b/bin/SCsub index 6b29a44fd91..52f7e3bb399 100644 --- a/bin/SCsub +++ b/bin/SCsub @@ -3,4 +3,4 @@ Import('env') Export('env') -SConscript('tests/SCsub'); +SConscript('tests/SCsub') diff --git a/core/SCsub b/core/SCsub index 558df83d05a..caae3a1c9be 100644 --- a/core/SCsub +++ b/core/SCsub @@ -57,10 +57,10 @@ Export('env') import make_binders env.Command(['method_bind.inc', 'method_bind_ext.inc'], 'make_binders.py', make_binders.run) -SConscript('os/SCsub'); -SConscript('math/SCsub'); -SConscript('io/SCsub'); -SConscript('bind/SCsub'); +SConscript('os/SCsub') +SConscript('math/SCsub') +SConscript('io/SCsub') +SConscript('bind/SCsub') lib = env.Library("core", env.core_sources) diff --git a/doc/tools/doc_merge.py b/doc/tools/doc_merge.py index d9f7cc9daac..57ac4bdcddf 100644 --- a/doc/tools/doc_merge.py +++ b/doc/tools/doc_merge.py @@ -26,11 +26,11 @@ def write_string(_f, text, newline=True): def escape(ret): - ret = ret.replace("&", "&"); - ret = ret.replace("<", ">"); - ret = ret.replace(">", "<"); - ret = ret.replace("'", "'"); - ret = ret.replace("\"", """); + ret = ret.replace("&", "&") + ret = ret.replace("<", ">") + ret = ret.replace(">", "<") + ret = ret.replace("'", "'") + ret = ret.replace("\"", """) return ret @@ -138,22 +138,22 @@ def write_class(c): for a in list(m): if (a.tag == "return"): typ = get_tag(a, "type") - write_string(f, ''); - write_string(f, ''); + write_string(f, '') + write_string(f, '') elif (a.tag == "argument"): default = get_tag(a, "default") - write_string(f, ''); - write_string(f, ''); + write_string(f, '') + write_string(f, '') - write_string(f, ''); + write_string(f, '') if (old_class != None): old_method_descr = find_method_descr(old_class, m.attrib["name"]) if (old_method_descr): write_string(f, escape(escape(old_method_descr.strip()))) - write_string(f, ''); + write_string(f, '') dec_tab() write_string(f, "") dec_tab() @@ -173,15 +173,15 @@ def write_class(c): for a in list(m): if (a.tag == "argument"): - write_string(f, ''); - write_string(f, ''); + write_string(f, '') + write_string(f, '') - write_string(f, ''); + write_string(f, '') if (old_class != None): old_signal_descr = find_signal_descr(old_class, m.attrib["name"]) if (old_signal_descr): write_string(f, escape(old_signal_descr.strip())) - write_string(f, ''); + write_string(f, '') dec_tab() write_string(f, "") dec_tab() diff --git a/doc/tools/makedoku.py b/doc/tools/makedoku.py index 49a960c43a8..ad3bfd791dc 100644 --- a/doc/tools/makedoku.py +++ b/doc/tools/makedoku.py @@ -17,7 +17,7 @@ if len(input_list) < 1: def validate_tag(elem, tag): if (elem.tag != tag): - print("Tag mismatch, expected '" + tag + "', got " + elem.tag); + print("Tag mismatch, expected '" + tag + "', got " + elem.tag) sys.exit(255) diff --git a/doc/tools/makehtml.py b/doc/tools/makehtml.py index 369f559f25b..3ecb8220cbb 100644 --- a/doc/tools/makehtml.py +++ b/doc/tools/makehtml.py @@ -40,7 +40,7 @@ if len(input_list) < 1: def validate_tag(elem, tag): if (elem.tag != tag): - print("Tag mismatch, expected '" + tag + "', got " + elem.tag); + print("Tag mismatch, expected '" + tag + "', got " + elem.tag) sys.exit(255) @@ -90,7 +90,7 @@ def make_html_top(body, bottom=False): def make_html_class_list(class_list, columns): div = ET.Element("div") - div.attrib["class"] = "ClassList"; + div.attrib["class"] = "ClassList" h1 = ET.SubElement(div, "h2") h1.text = "Alphabetical Class List" @@ -463,7 +463,7 @@ def make_method_def(name, m, declare, event=False): def make_html_class(node): div = ET.Element("div") - div.attrib["class"] = "class"; + div.attrib["class"] = "class" a = ET.SubElement(div, "a") a.attrib["name"] = node.attrib["name"] @@ -482,7 +482,7 @@ def make_html_class(node): ET.SubElement(div, "br") div2 = ET.SubElement(div, "div") - div2.attrib["class"] = "inheritance"; + div2.attrib["class"] = "inheritance" span = ET.SubElement(div2, "span") span.text = "Inherits: " @@ -493,7 +493,7 @@ def make_html_class(node): ET.SubElement(div, "br") div3 = ET.SubElement(div, "div") - div3.attrib["class"] = "category"; + div3.attrib["class"] = "category" span = ET.SubElement(div3, "span") span.attrib["class"] = "category" @@ -520,7 +520,7 @@ def make_html_class(node): h4.text = "Public Methods:" method_table = ET.SubElement(div, "table") - method_table.attrib["class"] = "method_list"; + method_table.attrib["class"] = "method_list" for m in list(methods): #li = ET.SubElement(div2, "li") @@ -533,7 +533,7 @@ def make_html_class(node): h4.text = "Events:" event_table = ET.SubElement(div, "table") - event_table.attrib["class"] = "method_list"; + event_table.attrib["class"] = "method_list" for m in list(events): #li = ET.SubElement(div2, "li") @@ -545,13 +545,13 @@ def make_html_class(node): h4 = ET.SubElement(div, "h4") h4.text = "Public Variables:" div2 = ET.SubElement(div, "div") - div2.attrib["class"] = "member_list"; + div2.attrib["class"] = "member_list" for c in list(members): li = ET.SubElement(div2, "li") div3 = ET.SubElement(li, "div") - div3.attrib["class"] = "member"; + div3.attrib["class"] = "member" make_type(c.attrib["type"], div3) span = ET.SubElement(div3, "span") span.attrib["class"] = "identifier member_name" @@ -566,12 +566,12 @@ def make_html_class(node): h4 = ET.SubElement(div, "h4") h4.text = "Constants:" div2 = ET.SubElement(div, "div") - div2.attrib["class"] = "constant_list"; + div2.attrib["class"] = "constant_list" for c in list(constants): li = ET.SubElement(div2, "li") div3 = ET.SubElement(li, "div") - div3.attrib["class"] = "constant"; + div3.attrib["class"] = "constant" span = ET.SubElement(div3, "span") span.attrib["class"] = "identifier constant_name" @@ -614,10 +614,10 @@ def make_html_class(node): descr = m.find("description") if (descr == None or descr.text.strip() == ""): - continue; + continue div2 = ET.SubElement(div, "div") - div2.attrib["class"] = "method_doc"; + div2.attrib["class"] = "method_doc" div2.append(make_method_def(node.attrib["name"], m, True)) #anchor = ET.SubElement(div2, "a") @@ -675,7 +675,7 @@ for cn in class_names: css.attrib["type"] = "text/css" body2 = ET.SubElement(html2, "body") make_html_top(body2) - body2.append(make_html_class(c)); + body2.append(make_html_class(c)) make_html_bottom(body2) et_out = ET.ElementTree(html2) et_out.write(c.attrib["name"] + ".html") diff --git a/drivers/SCsub b/drivers/SCsub index 4ff770c7d09..08a189272fd 100644 --- a/drivers/SCsub +++ b/drivers/SCsub @@ -5,31 +5,31 @@ Import('env') env.drivers_sources = [] if ("builtin_zlib" in env and env["builtin_zlib"] == "yes"): - SConscript("zlib/SCsub"); + SConscript("zlib/SCsub") # OS drivers -SConscript('unix/SCsub'); -SConscript('windows/SCsub'); +SConscript('unix/SCsub') +SConscript('windows/SCsub') # Sounds drivers -SConscript('alsa/SCsub'); -SConscript('pulseaudio/SCsub'); +SConscript('alsa/SCsub') +SConscript('pulseaudio/SCsub') if (env["platform"] == "windows"): - SConscript("rtaudio/SCsub"); + SConscript("rtaudio/SCsub") if (env["xaudio2"] == "yes"): - SConscript("xaudio2/SCsub"); + SConscript("xaudio2/SCsub") # Graphics drivers -SConscript('gles2/SCsub'); -SConscript('gl_context/SCsub'); +SConscript('gles2/SCsub') +SConscript('gl_context/SCsub') # Core dependencies -SConscript("png/SCsub"); +SConscript("png/SCsub") # Tools override # FIXME: Should likely be integrated in the tools/ codebase if (env["tools"] == "yes"): - SConscript("convex_decomp/SCsub"); + SConscript("convex_decomp/SCsub") if env['vsproj'] == "yes": env.AddToVSProject(env.drivers_sources) diff --git a/drivers/gles2/shaders/SCsub b/drivers/gles2/shaders/SCsub index 26b6f7f06a0..bf4ec9485d0 100644 --- a/drivers/gles2/shaders/SCsub +++ b/drivers/gles2/shaders/SCsub @@ -3,10 +3,10 @@ Import('env') if env['BUILDERS'].has_key('GLSL120GLES'): - env.GLSL120GLES('material.glsl'); - env.GLSL120GLES('canvas.glsl'); - env.GLSL120GLES('canvas_shadow.glsl'); - env.GLSL120GLES('blur.glsl'); - env.GLSL120GLES('copy.glsl'); + env.GLSL120GLES('material.glsl') + env.GLSL120GLES('canvas.glsl') + env.GLSL120GLES('canvas_shadow.glsl') + env.GLSL120GLES('blur.glsl') + env.GLSL120GLES('copy.glsl') Export('env') diff --git a/drivers/png/SCsub b/drivers/png/SCsub index d3ece651961..aedde3e2b74 100644 --- a/drivers/png/SCsub +++ b/drivers/png/SCsub @@ -33,7 +33,7 @@ if (env["libpng"] == "builtin"): import os if ("neon_enabled" in env and env["neon_enabled"]) and os.name != "nt": env_png.Append(CPPFLAGS=["-DPNG_ARM_NEON_OPT=2"]) - env_neon = env_png.Clone(); + env_neon = env_png.Clone() if "S_compiler" in env: env_neon['CC'] = env['S_compiler'] neon_sources = [] diff --git a/methods.py b/methods.py index 269e076409a..91afd9e70bb 100755 --- a/methods.py +++ b/methods.py @@ -2,8 +2,8 @@ import os def add_source_files(self, sources, filetype, lib_env=None, shared=False): - import glob; - import string; + import glob + import string # if not lib_objects: if not lib_env: lib_env = self @@ -101,8 +101,8 @@ def build_glsl_header(filename): # texture unit texunit = str(int(line[line.find(":") + 1:].strip())) uline = line[:line.lower().find("//")] - uline = uline.replace("uniform", ""); - uline = uline.replace(";", ""); + uline = uline.replace("uniform", "") + uline = uline.replace(";", "") lines = uline.split(",") for x in lines: @@ -120,9 +120,9 @@ def build_glsl_header(filename): # ubo uboidx = str(int(line[line.find(":") + 1:].strip())) uline = line[:line.lower().find("//")] - uline = uline[uline.find("uniform") + len("uniform"):]; - uline = uline.replace(";", ""); - uline = uline.replace("{", ""); + uline = uline[uline.find("uniform") + len("uniform"):] + uline = uline.replace(";", "") + uline = uline.replace("{", "") lines = uline.split(",") for x in lines: @@ -137,8 +137,8 @@ def build_glsl_header(filename): ubo_names += [x] else: - uline = line.replace("uniform", ""); - uline = uline.replace(";", ""); + uline = line.replace("uniform", "") + uline = uline.replace(";", "") lines = uline.split(",") for x in lines: @@ -152,9 +152,9 @@ def build_glsl_header(filename): uniforms += [x] if ((line.strip().find("in ") == 0 or line.strip().find("attribute ") == 0) and line.find("attrib:") != -1): - uline = line.replace("in ", ""); - uline = uline.replace("attribute ", ""); - uline = uline.replace(";", ""); + uline = line.replace("in ", "") + uline = uline.replace("attribute ", "") + uline = uline.replace(";", "") uline = uline[uline.find(" "):].strip() if (uline.find("//") != -1): @@ -165,8 +165,8 @@ def build_glsl_header(filename): attributes += [(name, bind)] if (line.strip().find("out ") == 0): - uline = line.replace("out", "").strip(); - uline = uline.replace(";", ""); + uline = line.replace("out", "").strip() + uline = uline.replace(";", "") uline = uline[uline.find(" "):].strip() if (uline.find("//") != -1): @@ -190,12 +190,12 @@ def build_glsl_header(filename): line = fs.readline() line_offset += 1 - fs.close(); + fs.close() out_file = filename + ".h" fd = open(out_file, "w") - fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); + fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n") out_file_base = out_file out_file_base = out_file_base[out_file_base.rfind("/") + 1:] @@ -205,47 +205,47 @@ def build_glsl_header(filename): fd.write("#ifndef " + out_file_ifdef + "\n") fd.write("#define " + out_file_ifdef + "\n") - out_file_class = out_file_base.replace(".glsl.h", "").title().replace("_", "").replace(".", "") + "ShaderGL"; - fd.write("\n\n"); - fd.write("#include \"drivers/opengl/shader_gl.h\"\n\n\n"); - fd.write("class " + out_file_class + " : public ShaderGL {\n\n"); - fd.write("\t virtual String get_shader_name() const { return \"" + out_file_class + "\"; }\n"); - fd.write("public:\n\n"); + out_file_class = out_file_base.replace(".glsl.h", "").title().replace("_", "").replace(".", "") + "ShaderGL" + fd.write("\n\n") + fd.write("#include \"drivers/opengl/shader_gl.h\"\n\n\n") + fd.write("class " + out_file_class + " : public ShaderGL {\n\n") + fd.write("\t virtual String get_shader_name() const { return \"" + out_file_class + "\"; }\n") + fd.write("public:\n\n") if (len(conditionals)): - fd.write("\tenum Conditionals {\n"); + fd.write("\tenum Conditionals {\n") for x in conditionals: - fd.write("\t\t" + x + ",\n"); - fd.write("\t};\n\n"); + fd.write("\t\t" + x + ",\n") + fd.write("\t};\n\n") if (len(uniforms)): - fd.write("\tenum Uniforms {\n"); + fd.write("\tenum Uniforms {\n") for x in uniforms: - fd.write("\t\t" + x.upper() + ",\n"); - fd.write("\t};\n\n"); + fd.write("\t\t" + x.upper() + ",\n") + fd.write("\t};\n\n") - fd.write("\t_FORCE_INLINE_ int get_uniform(Uniforms p_uniform) const { return _get_uniform(p_uniform); }\n\n"); + fd.write("\t_FORCE_INLINE_ int get_uniform(Uniforms p_uniform) const { return _get_uniform(p_uniform); }\n\n") if (len(conditionals)): - fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n"); - fd.write("\t#define _FU if (get_uniform(p_uniform)<0) return; ERR_FAIL_COND( get_active()!=this );\n\n "); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n") + fd.write("\t#define _FU if (get_uniform(p_uniform)<0) return; ERR_FAIL_COND( get_active()!=this );\n\n ") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, unsigned long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU GLfloat col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; glUniform4fv(get_uniform(p_uniform),1,col); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU GLfloat vec2[2]={p_vec2.x,p_vec2.y}; glUniform2fv(get_uniform(p_uniform),1,vec2); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU GLfloat vec3[3]={p_vec3.x,p_vec3.y,p_vec3.z}; glUniform3fv(get_uniform(p_uniform),1,vec3); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU glUniform2f(get_uniform(p_uniform),p_a,p_b); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU glUniform3f(get_uniform(p_uniform),p_a,p_b,p_c); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU glUniform4f(get_uniform(p_uniform),p_a,p_b,p_c,p_d); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, unsigned long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU GLfloat col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; glUniform4fv(get_uniform(p_uniform),1,col); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU GLfloat vec2[2]={p_vec2.x,p_vec2.y}; glUniform2fv(get_uniform(p_uniform),1,vec2); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU GLfloat vec3[3]={p_vec3.x,p_vec3.y,p_vec3.z}; glUniform3fv(get_uniform(p_uniform),1,vec3); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU glUniform2f(get_uniform(p_uniform),p_a,p_b); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU glUniform3f(get_uniform(p_uniform),p_a,p_b,p_c); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU glUniform4f(get_uniform(p_uniform),p_a,p_b,p_c,p_d); }\n\n") fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Transform& p_transform) { _FU @@ -276,7 +276,7 @@ def build_glsl_header(filename): } - """); + """) fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Matrix32& p_transform) { _FU @@ -307,7 +307,7 @@ def build_glsl_header(filename): } - """); + """) fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const CameraMatrix& p_matrix) { _FU @@ -321,18 +321,18 @@ def build_glsl_header(filename): } glUniformMatrix4fv(get_uniform(p_uniform),1,false,matrix); - }; """); + }; """) - fd.write("\n\n#undef _FU\n\n\n"); + fd.write("\n\n#undef _FU\n\n\n") - fd.write("\tvirtual void init() {\n\n"); + fd.write("\tvirtual void init() {\n\n") if (len(conditionals)): fd.write("\t\tstatic const char* _conditional_strings[]={\n") if (len(conditionals)): for x in conditionals: - fd.write("\t\t\t\"#define " + x + "\\n\",\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t\"#define " + x + "\\n\",\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") @@ -341,8 +341,8 @@ def build_glsl_header(filename): fd.write("\t\tstatic const char* _uniform_strings[]={\n") if (len(uniforms)): for x in uniforms: - fd.write("\t\t\t\"" + x + "\",\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t\"" + x + "\",\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") @@ -350,62 +350,62 @@ def build_glsl_header(filename): fd.write("\t\tstatic AttributePair _attribute_pairs[]={\n") for x in attributes: - fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic AttributePair *_attribute_pairs=NULL;\n") if (len(fbos)): fd.write("\t\tstatic FBOPair _fbo_pairs[]={\n") for x in fbos: - fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic FBOPair *_fbo_pairs=NULL;\n") if (len(ubos)): fd.write("\t\tstatic UBOPair _ubo_pairs[]={\n") for x in ubos: - fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic UBOPair *_ubo_pairs=NULL;\n") if (len(texunits)): fd.write("\t\tstatic TexUnitPair _texunit_pairs[]={\n") for x in texunits: - fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n") fd.write("\t\tstatic const char* _vertex_code=\"\\\n") for x in vertex_lines: - fd.write("\t\t\t" + x + "\n"); - fd.write("\t\t\";\n\n"); + fd.write("\t\t\t" + x + "\n") + fd.write("\t\t\";\n\n") fd.write("\t\tstatic const int _vertex_code_start=" + str(vertex_offset) + ";\n") fd.write("\t\tstatic const char* _fragment_code=\"\\\n") for x in fragment_lines: - fd.write("\t\t\t" + x + "\n"); - fd.write("\t\t\";\n\n"); + fd.write("\t\t\t" + x + "\n") + fd.write("\t\t\";\n\n") fd.write("\t\tstatic const int _fragment_code_start=" + str(fragment_offset) + ";\n") fd.write("\t\tsetup(_conditional_strings," + str(len(conditionals)) + ",_uniform_strings," + str(len(uniforms)) + ",_attribute_pairs," + str(len(attributes)) + ",_fbo_pairs," + str(len(fbos)) + ",_ubo_pairs," + str(len(ubos)) + ",_texunit_pairs," + str(len(texunits)) + ",_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") fd.write("\t};\n\n") - fd.write("};\n\n"); - fd.write("#endif\n\n"); - fd.close(); + fd.write("};\n\n") + fd.write("#endif\n\n") + fd.close() def build_glsl_headers(target, source, env): for x in source: - build_glsl_header(str(x)); + build_glsl_header(str(x)) return 0 @@ -455,8 +455,8 @@ def build_hlsl_dx9_header(filename): if (not ifdefline in conditionals): conditionals += [ifdefline] if (line.find("uniform") != -1): - uline = line.replace("uniform", ""); - uline = uline.replace(";", ""); + uline = line.replace("uniform", "") + uline = uline.replace(";", "") lines = uline.split(",") for x in lines: @@ -483,12 +483,12 @@ def build_hlsl_dx9_header(filename): line = fs.readline() line_offset += 1 - fs.close(); + fs.close() out_file = filename + ".h" fd = open(out_file, "w") - fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); + fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n") out_file_base = out_file out_file_base = out_file_base[out_file_base.rfind("/") + 1:] @@ -498,47 +498,47 @@ def build_hlsl_dx9_header(filename): fd.write("#ifndef " + out_file_ifdef + "\n") fd.write("#define " + out_file_ifdef + "\n") - out_file_class = out_file_base.replace(".hlsl.h", "").title().replace("_", "").replace(".", "") + "ShaderDX9"; - fd.write("\n\n"); - fd.write("#include \"drivers/directx9/shader_dx9.h\"\n\n\n"); - fd.write("class " + out_file_class + " : public ShaderDX9 {\n\n"); - fd.write("\t virtual String get_shader_name() const { return \"" + out_file_class + "\"; }\n"); - fd.write("public:\n\n"); + out_file_class = out_file_base.replace(".hlsl.h", "").title().replace("_", "").replace(".", "") + "ShaderDX9" + fd.write("\n\n") + fd.write("#include \"drivers/directx9/shader_dx9.h\"\n\n\n") + fd.write("class " + out_file_class + " : public ShaderDX9 {\n\n") + fd.write("\t virtual String get_shader_name() const { return \"" + out_file_class + "\"; }\n") + fd.write("public:\n\n") if (len(conditionals)): - fd.write("\tenum Conditionals {\n"); + fd.write("\tenum Conditionals {\n") for x in conditionals: - fd.write("\t\t" + x + ",\n"); - fd.write("\t};\n\n"); + fd.write("\t\t" + x + ",\n") + fd.write("\t};\n\n") if (len(uniforms)): - fd.write("\tenum Uniforms {\n"); + fd.write("\tenum Uniforms {\n") for x in uniforms: - fd.write("\t\t" + x.upper() + ",\n"); - fd.write("\t};\n\n"); + fd.write("\t\t" + x.upper() + ",\n") + fd.write("\t};\n\n") if (len(conditionals)): - fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n") - fd.write("\t#define _FU if (!_uniform_valid(p_uniform)) return; ERR_FAIL_COND( get_active()!=this );\n\n "); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, bool p_value) { _FU set_uniformb(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU set_uniformf(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU set_uniformf(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); + fd.write("\t#define _FU if (!_uniform_valid(p_uniform)) return; ERR_FAIL_COND( get_active()!=this );\n\n ") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, bool p_value) { _FU set_uniformb(p_uniform,p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU set_uniformf(p_uniform,p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU set_uniformf(p_uniform,p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n") #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint64_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int64_t p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, unsigned long p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, long p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU float col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; set_uniformfv(p_uniform,col); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU float vec2[4]={p_vec2.x,p_vec2.y,0,0}; set_uniformfv(p_uniform,vec2); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU float vec3[4]={p_vec3.x,p_vec3.y,p_vec3.z,0}; set_uniformfv(p_uniform,vec3); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU float vec2[4]={p_a,p_b,0,0}; set_uniformfv(p_uniform,vec2); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU float vec3[4]={p_a,p_b,p_c,0}; set_uniformfv(p_uniform,vec3); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU float vec4[4]={p_a,p_b,p_c,p_d}; set_uniformfv(p_uniform,vec4); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, unsigned long p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, long p_value) { _FU set_uniformi(p_uniform,p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU float col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; set_uniformfv(p_uniform,col); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU float vec2[4]={p_vec2.x,p_vec2.y,0,0}; set_uniformfv(p_uniform,vec2); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU float vec3[4]={p_vec3.x,p_vec3.y,p_vec3.z,0}; set_uniformfv(p_uniform,vec3); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU float vec2[4]={p_a,p_b,0,0}; set_uniformfv(p_uniform,vec2); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU float vec3[4]={p_a,p_b,p_c,0}; set_uniformfv(p_uniform,vec3); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU float vec4[4]={p_a,p_b,p_c,p_d}; set_uniformfv(p_uniform,vec4); }\n\n") fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Transform& p_transform) { _FU @@ -567,7 +567,7 @@ def build_hlsl_dx9_header(filename): } - """); + """) fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const CameraMatrix& p_matrix) { _FU @@ -581,18 +581,18 @@ def build_hlsl_dx9_header(filename): } set_uniformfv(p_uniform,&matrix[0],4); - }; """); + }; """) - fd.write("\n\n#undef _FU\n\n\n"); + fd.write("\n\n#undef _FU\n\n\n") - fd.write("\tvirtual void init(IDirect3DDevice9 *p_device,ShaderSupport p_version) {\n\n"); + fd.write("\tvirtual void init(IDirect3DDevice9 *p_device,ShaderSupport p_version) {\n\n") if (len(conditionals)): fd.write("\t\tstatic const char* _conditional_strings[]={\n") if (len(conditionals)): for x in conditionals: - fd.write("\t\t\t\"" + x + "\",\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t\"" + x + "\",\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") @@ -601,49 +601,49 @@ def build_hlsl_dx9_header(filename): fd.write("\t\tstatic const char* _uniform_strings[]={\n") if (len(uniforms)): for x in uniforms: - fd.write("\t\t\t\"" + x + "\",\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t\"" + x + "\",\n") + fd.write("\t\t};\n\n") fd.write("\t\tstatic const bool _fragment_uniforms[]={\n") if (len(uniforms)): for x in fragment_uniforms: if (x): - fd.write("\t\t\ttrue,\n"); + fd.write("\t\t\ttrue,\n") else: - fd.write("\t\t\tfalse,\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\tfalse,\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") fd.write("\t\tstatic const bool *_fragment_uniforms=NULL;\n") fd.write("\t\tstatic const char* _vertex_code=\"\\\n") for x in vertex_lines: - fd.write("\t\t\t" + x + "\n"); - fd.write("\t\t\";\n\n"); + fd.write("\t\t\t" + x + "\n") + fd.write("\t\t\";\n\n") fd.write("\t\tstatic const int _vertex_code_start=" + str(vertex_offset) + ";\n") fd.write("\t\tstatic const char* _fragment_code=\"\\\n") for x in fragment_lines: - fd.write("\t\t\t" + x + "\n"); - fd.write("\t\t\";\n\n"); + fd.write("\t\t\t" + x + "\n") + fd.write("\t\t\";\n\n") fd.write("\t\tstatic const int _fragment_code_start=" + str(fragment_offset) + ";\n") fd.write("\t\tsetup(p_device,p_version,_conditional_strings," + str(len(conditionals)) + ",_uniform_strings," + str(len(uniforms)) + ",_fragment_uniforms,_vertex_code,_fragment_code,_vertex_code_start,_fragment_code_start);\n") fd.write("\t};\n\n") - fd.write("};\n\n"); - fd.write("#endif\n\n"); - fd.close(); + fd.write("};\n\n") + fd.write("#endif\n\n") + fd.close() def build_hlsl_dx9_headers(target, source, env): for x in source: - build_hlsl_dx9_header(str(x)); + build_hlsl_dx9_header(str(x)) return 0 @@ -717,14 +717,14 @@ def include_file_in_legacygl_header(filename, header_data, depth): ifdefline = ifdefline.replace(")", "").strip() if (line.find("_EN_") != -1): - enumbase = ifdefline[:ifdefline.find("_EN_")]; + enumbase = ifdefline[:ifdefline.find("_EN_")] ifdefline = ifdefline.replace("_EN_", "_") line = line.replace("_EN_", "_") # print(enumbase+":"+ifdefline); if (enumbase not in header_data.enums): header_data.enums[enumbase] = [] if (ifdefline not in header_data.enums[enumbase]): - header_data.enums[enumbase].append(ifdefline); + header_data.enums[enumbase].append(ifdefline) elif (not ifdefline in header_data.conditionals): header_data.conditionals += [ifdefline] @@ -737,9 +737,9 @@ def include_file_in_legacygl_header(filename, header_data, depth): else: texunit = str(int(texunitstr)) uline = line[:line.lower().find("//")] - uline = uline.replace("uniform", ""); - uline = uline.replace("highp", ""); - uline = uline.replace(";", ""); + uline = uline.replace("uniform", "") + uline = uline.replace("highp", "") + uline = uline.replace(";", "") lines = uline.split(",") for x in lines: @@ -754,8 +754,8 @@ def include_file_in_legacygl_header(filename, header_data, depth): header_data.texunit_names += [x] elif (line.find("uniform") != -1): - uline = line.replace("uniform", ""); - uline = uline.replace(";", ""); + uline = line.replace("uniform", "") + uline = uline.replace(";", "") lines = uline.split(",") for x in lines: @@ -769,10 +769,10 @@ def include_file_in_legacygl_header(filename, header_data, depth): header_data.uniforms += [x] if ((line.strip().find("in ") == 0 or line.strip().find("attribute ") == 0) and line.find("attrib:") != -1): - uline = line.replace("in ", ""); - uline = uline.replace("attribute ", ""); - uline = uline.replace("highp ", ""); - uline = uline.replace(";", ""); + uline = line.replace("in ", "") + uline = uline.replace("attribute ", "") + uline = uline.replace("highp ", "") + uline = uline.replace(";", "") uline = uline[uline.find(" "):].strip() if (uline.find("//") != -1): @@ -796,7 +796,7 @@ def include_file_in_legacygl_header(filename, header_data, depth): line = fs.readline() header_data.line_offset += 1 - fs.close(); + fs.close() return header_data @@ -811,7 +811,7 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): enum_constants = [] - fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n"); + fd.write("/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */\n") out_file_base = out_file out_file_base = out_file_base[out_file_base.rfind("/") + 1:] @@ -821,49 +821,49 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("#ifndef " + out_file_ifdef + class_suffix + "_120\n") fd.write("#define " + out_file_ifdef + class_suffix + "_120\n") - out_file_class = out_file_base.replace(".glsl.h", "").title().replace("_", "").replace(".", "") + "Shader" + class_suffix; - fd.write("\n\n"); - fd.write("#include \"" + include + "\"\n\n\n"); - fd.write("class " + out_file_class + " : public Shader" + class_suffix + " {\n\n"); - fd.write("\t virtual String get_shader_name() const { return \"" + out_file_class + "\"; }\n"); + out_file_class = out_file_base.replace(".glsl.h", "").title().replace("_", "").replace(".", "") + "Shader" + class_suffix + fd.write("\n\n") + fd.write("#include \"" + include + "\"\n\n\n") + fd.write("class " + out_file_class + " : public Shader" + class_suffix + " {\n\n") + fd.write("\t virtual String get_shader_name() const { return \"" + out_file_class + "\"; }\n") - fd.write("public:\n\n"); + fd.write("public:\n\n") if (len(header_data.conditionals)): - fd.write("\tenum Conditionals {\n"); + fd.write("\tenum Conditionals {\n") for x in header_data.conditionals: - fd.write("\t\t" + x.upper() + ",\n"); - fd.write("\t};\n\n"); + fd.write("\t\t" + x.upper() + ",\n") + fd.write("\t};\n\n") if (len(header_data.uniforms)): - fd.write("\tenum Uniforms {\n"); + fd.write("\tenum Uniforms {\n") for x in header_data.uniforms: - fd.write("\t\t" + x.upper() + ",\n"); - fd.write("\t};\n\n"); + fd.write("\t\t" + x.upper() + ",\n") + fd.write("\t};\n\n") - fd.write("\t_FORCE_INLINE_ int get_uniform(Uniforms p_uniform) const { return _get_uniform(p_uniform); }\n\n"); + fd.write("\t_FORCE_INLINE_ int get_uniform(Uniforms p_uniform) const { return _get_uniform(p_uniform); }\n\n") if (len(header_data.conditionals)): - fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n"); - fd.write("\t#define _FU if (get_uniform(p_uniform)<0) return; ERR_FAIL_COND( get_active()!=this );\n\n "); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_conditional(Conditionals p_conditional,bool p_enable) { _set_conditional(p_conditional,p_enable); }\n\n") + fd.write("\t#define _FU if (get_uniform(p_uniform)<0) return; ERR_FAIL_COND( get_active()!=this );\n\n ") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, double p_value) { _FU glUniform1f(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int8_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int16_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int32_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n") #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, uint64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, int64_t p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, unsigned long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); #fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, long p_value) { _FU glUniform1i(get_uniform(p_uniform),p_value); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU GLfloat col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; glUniform4fv(get_uniform(p_uniform),1,col); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU GLfloat vec2[2]={p_vec2.x,p_vec2.y}; glUniform2fv(get_uniform(p_uniform),1,vec2); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU GLfloat vec3[3]={p_vec3.x,p_vec3.y,p_vec3.z}; glUniform3fv(get_uniform(p_uniform),1,vec3); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU glUniform2f(get_uniform(p_uniform),p_a,p_b); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU glUniform3f(get_uniform(p_uniform),p_a,p_b,p_c); }\n\n"); - fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU glUniform4f(get_uniform(p_uniform),p_a,p_b,p_c,p_d); }\n\n"); + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Color& p_color) { _FU GLfloat col[4]={p_color.r,p_color.g,p_color.b,p_color.a}; glUniform4fv(get_uniform(p_uniform),1,col); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector2& p_vec2) { _FU GLfloat vec2[2]={p_vec2.x,p_vec2.y}; glUniform2fv(get_uniform(p_uniform),1,vec2); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Vector3& p_vec3) { _FU GLfloat vec3[3]={p_vec3.x,p_vec3.y,p_vec3.z}; glUniform3fv(get_uniform(p_uniform),1,vec3); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b) { _FU glUniform2f(get_uniform(p_uniform),p_a,p_b); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c) { _FU glUniform3f(get_uniform(p_uniform),p_a,p_b,p_c); }\n\n") + fd.write("\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, float p_a, float p_b, float p_c, float p_d) { _FU glUniform4f(get_uniform(p_uniform),p_a,p_b,p_c,p_d); }\n\n") fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Transform& p_transform) { _FU @@ -894,7 +894,7 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): } - """); + """) fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const Matrix32& p_transform) { _FU @@ -925,7 +925,7 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): } - """); + """) fd.write("""\t_FORCE_INLINE_ void set_uniform(Uniforms p_uniform, const CameraMatrix& p_matrix) { _FU @@ -939,17 +939,17 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): } glUniformMatrix4fv(get_uniform(p_uniform),1,false,matrix); - }; """); + }; """) - fd.write("\n\n#undef _FU\n\n\n"); + fd.write("\n\n#undef _FU\n\n\n") - fd.write("\tvirtual void init() {\n\n"); + fd.write("\tvirtual void init() {\n\n") - enum_value_count = 0; + enum_value_count = 0 if (len(header_data.enums)): - fd.write("\t\t//Written using math, given nonstandarity of 64 bits integer constants..\n"); + fd.write("\t\t//Written using math, given nonstandarity of 64 bits integer constants..\n") fd.write("\t\tstatic const Enum _enums[]={\n") bitofs = len(header_data.conditionals) @@ -958,7 +958,7 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): for xv in header_data.enums: x = header_data.enums[xv] bits = 1 - amt = len(x); + amt = len(x) # print(x) while(2**bits < amt): bits += 1 @@ -975,18 +975,18 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): strs += "NULL}" - fd.write("\t\t\t{(uint64_t(1<<" + str(bits) + ")-1)<<" + str(bitofs) + "," + str(bitofs) + "," + strs + "},\n"); + fd.write("\t\t\t{(uint64_t(1<<" + str(bits) + ")-1)<<" + str(bitofs) + "," + str(bitofs) + "," + strs + "},\n") bitofs += bits - fd.write("\t\t};\n\n"); + fd.write("\t\t};\n\n") fd.write("\t\tstatic const EnumValue _enum_values[]={\n") - enum_value_count = len(enum_vals); + enum_value_count = len(enum_vals) for x in enum_vals: - fd.write("\t\t\t{" + x["set_mask"] + "," + x["clear_mask"] + "},\n"); + fd.write("\t\t\t{" + x["set_mask"] + "," + x["clear_mask"] + "},\n") - fd.write("\t\t};\n\n"); + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic const Enum *_enums=NULL;\n") fd.write("\t\tstatic const EnumValue *_enum_values=NULL;\n") @@ -996,8 +996,8 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("\t\tstatic const char* _conditional_strings[]={\n") if (len(header_data.conditionals)): for x in header_data.conditionals: - fd.write("\t\t\t\"#define " + x + "\\n\",\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t\"#define " + x + "\\n\",\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic const char **_conditional_strings=NULL;\n") @@ -1006,8 +1006,8 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("\t\tstatic const char* _uniform_strings[]={\n") if (len(header_data.uniforms)): for x in header_data.uniforms: - fd.write("\t\t\t\"" + x + "\",\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t\"" + x + "\",\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic const char **_uniform_strings=NULL;\n") @@ -1016,36 +1016,36 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("\t\tstatic AttributePair _attribute_pairs[]={\n") for x in header_data.attributes: - fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic AttributePair *_attribute_pairs=NULL;\n") if (len(header_data.texunits)): fd.write("\t\tstatic TexUnitPair _texunit_pairs[]={\n") for x in header_data.texunits: - fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n"); - fd.write("\t\t};\n\n"); + fd.write("\t\t\t{\"" + x[0] + "\"," + x[1] + "},\n") + fd.write("\t\t};\n\n") else: fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n") fd.write("\t\tstatic const char _vertex_code[]={\n") for x in header_data.vertex_lines: for i in range(len(x)): - fd.write(str(ord(x[i])) + ","); + fd.write(str(ord(x[i])) + ",") - fd.write(str(ord('\n')) + ","); - fd.write("\t\t0};\n\n"); + fd.write(str(ord('\n')) + ",") + fd.write("\t\t0};\n\n") fd.write("\t\tstatic const int _vertex_code_start=" + str(header_data.vertex_offset) + ";\n") fd.write("\t\tstatic const char _fragment_code[]={\n") for x in header_data.fragment_lines: for i in range(len(x)): - fd.write(str(ord(x[i])) + ","); + fd.write(str(ord(x[i])) + ",") - fd.write(str(ord('\n')) + ","); - fd.write("\t\t0};\n\n"); + fd.write(str(ord('\n')) + ",") + fd.write("\t\t0};\n\n") fd.write("\t\tstatic const int _fragment_code_start=" + str(header_data.fragment_offset) + ";\n") @@ -1060,20 +1060,20 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs): fd.write("\tenum EnumConditionals {\n") for x in enum_constants: - fd.write("\t\t" + x.upper() + ",\n"); - fd.write("\t};\n\n"); + fd.write("\t\t" + x.upper() + ",\n") + fd.write("\t};\n\n") fd.write("\tvoid set_enum_conditional(EnumConditionals p_cond) { _set_enum_conditional(p_cond); }\n") - fd.write("};\n\n"); - fd.write("#endif\n\n"); - fd.close(); + fd.write("};\n\n") + fd.write("#endif\n\n") + fd.close() def build_legacygl_headers(target, source, env): for x in source: - build_legacygl_header(str(x), include="drivers/legacygl/shader_lgl.h", class_suffix="LGL", output_attribs=False); + build_legacygl_header(str(x), include="drivers/legacygl/shader_lgl.h", class_suffix="LGL", output_attribs=False) return 0 @@ -1120,22 +1120,22 @@ def parse_cg_file(fname, uniforms, sizes, conditionals): type = res.groups(1) name = res.groups(2) - uniforms.append(name); + uniforms.append(name) if (type.find("texobj") != -1): - sizes.append(1); + sizes.append(1) else: - t = re.match(r"float(\d)x(\d)", type); + t = re.match(r"float(\d)x(\d)", type) if t: sizes.append(int(t.groups(1)) * int(t.groups(2))) else: - t = re.match(r"float(\d)", type); + t = re.match(r"float(\d)", type) sizes.append(int(t.groups(1))) if line.find("[branch]") != -1: - conditionals.append(name); + conditionals.append(name) - line = fs.readline(); + line = fs.readline() def build_cg_shader(sname): @@ -1144,36 +1144,36 @@ def build_cg_shader(sname): vp_uniform_sizes = [] vp_conditionals = [] - parse_cg_file("vp_" + sname + ".cg", vp_uniforms, vp_uniform_sizes, vp_conditionals); + parse_cg_file("vp_" + sname + ".cg", vp_uniforms, vp_uniform_sizes, vp_conditionals) fp_uniforms = [] fp_uniform_sizes = [] fp_conditionals = [] - parse_cg_file("fp_" + sname + ".cg", fp_uniforms, fp_uniform_sizes, fp_conditionals); + parse_cg_file("fp_" + sname + ".cg", fp_uniforms, fp_uniform_sizes, fp_conditionals) - fd = open("shader_" + sname + ".cg.h", "w"); + fd = open("shader_" + sname + ".cg.h", "w") - fd.write('\n#include "shader_cell.h"\n'); - fd.write("\nclass Shader_" + sname + " : public ShaderCell {\n"); - fd.write("\n\tstatic struct VertexUniforms[] = {\n"); + fd.write('\n#include "shader_cell.h"\n') + fd.write("\nclass Shader_" + sname + " : public ShaderCell {\n") + fd.write("\n\tstatic struct VertexUniforms[] = {\n") - offset = 0; + offset = 0 for i in range(0, len(vp_uniforms)): fd.write('\t\t{ "%s", %d, %d },\n' % (vp_uniforms[i], offset, vp_uniform_sizes[i])) - offset = offset + vp_uniform_sizes[i]; - fd.write("\t};\n\n"); + offset = offset + vp_uniform_sizes[i] + fd.write("\t};\n\n") - fd.write("public:\n\n"); + fd.write("public:\n\n") - fd.write("\tenum {\n"); + fd.write("\tenum {\n") for i in range(0, len(vp_uniforms)): fd.write('\t\tVP_%s,\n' % vp_uniforms[i].upper()) - fd.write("\t};\n"); + fd.write("\t};\n") import glob @@ -1440,13 +1440,13 @@ def save_active_platforms(apnames, ap): for x in ap: pth = x + "/logo.png" # print("open path: "+pth) - pngf = open(pth, "rb"); - b = pngf.read(1); + pngf = open(pth, "rb") + b = pngf.read(1) str = " /* AUTOGENERATED FILE, DO NOT EDIT */ \n" str += " static const unsigned char _" + x[9:] + "_logo[]={" while(len(b) == 1): str += hex(ord(b)) - b = pngf.read(1); + b = pngf.read(1) if (len(b) == 1): str += "," diff --git a/modules/openssl/SCsub b/modules/openssl/SCsub index 75ea9b6bdee..bac80fb76e0 100644 --- a/modules/openssl/SCsub +++ b/modules/openssl/SCsub @@ -682,6 +682,6 @@ env_openssl.add_source_files(env.modules_sources, "*.c") # platform/winrt need to know openssl is available, pass to main env if "platform" in env and env["platform"] == "winrt": env.Append(CPPPATH=[thirdparty_dir]) - env.Append(CPPFLAGS=['-DOPENSSL_ENABLED']); + env.Append(CPPFLAGS=['-DOPENSSL_ENABLED']) Export('env') diff --git a/platform/android/detect.py b/platform/android/detect.py index 5de4f114451..d52bb57c0fa 100644 --- a/platform/android/detect.py +++ b/platform/android/detect.py @@ -49,7 +49,7 @@ def create(env): if "applelink" in tools: tools.remove("applelink") env.Tool('gcc') - return env.Clone(tools=tools); + return env.Clone(tools=tools) def configure(env): @@ -124,7 +124,7 @@ def configure(env): else: env.extra_suffix = ".armv7" + env.extra_suffix - gcc_path = env["ANDROID_NDK_ROOT"] + "/toolchains/" + env["NDK_TARGET"] + "/prebuilt/"; + gcc_path = env["ANDROID_NDK_ROOT"] + "/toolchains/" + env["NDK_TARGET"] + "/prebuilt/" if (sys.platform.startswith("linux")): if (platform.machine().endswith('64')): diff --git a/platform/iphone/SCsub b/platform/iphone/SCsub index 7e71faa377a..38f789259c6 100644 --- a/platform/iphone/SCsub +++ b/platform/iphone/SCsub @@ -23,7 +23,7 @@ iphone_lib = [ #iphone_lib = env.Library('iphone', iphone_lib) -env_ios = env.Clone(); +env_ios = env.Clone() if env['ios_gles22_override'] == "yes": diff --git a/platform/javascript/detect.py b/platform/javascript/detect.py index 04a6bc9cc3c..ec99392987e 100644 --- a/platform/javascript/detect.py +++ b/platform/javascript/detect.py @@ -39,7 +39,7 @@ def get_flags(): def configure(env): - env['ENV'] = os.environ; + env['ENV'] = os.environ env.use_windows_spawn_fix('javascript') env.Append(CPPPATH=['#platform/javascript']) diff --git a/platform/windows/detect.py b/platform/windows/detect.py index 93d29a36066..0576b0bfa9c 100644 --- a/platform/windows/detect.py +++ b/platform/windows/detect.py @@ -267,7 +267,7 @@ def configure(env): env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")]) env.Append(CCFLAGS=["/I" + DIRECTX_PATH + "/Include"]) env.Append(LIBPATH=[DIRECTX_PATH + "/Lib/x86"]) - env['ENV'] = os.environ; + env['ENV'] = os.environ # This detection function needs the tools env (that is env['ENV'], not SCons's env), and that is why it's this far bellow in the code compiler_version_str = methods.detect_visual_c_compiler_version(env['ENV']) @@ -323,10 +323,10 @@ def configure(env): env.Append(LINKFLAGS=['-static']) env.Append(LINKFLAGS=['-static-libgcc']) env.Append(LINKFLAGS=['-static-libstdc++']) - mingw_prefix = env["mingw_prefix"]; + mingw_prefix = env["mingw_prefix"] else: env.Append(LINKFLAGS=['-static']) - mingw_prefix = env["mingw_prefix_64"]; + mingw_prefix = env["mingw_prefix_64"] nulstr = "" diff --git a/platform/winrt/detect.py b/platform/winrt/detect.py index 15ac429687d..2156e593e84 100644 --- a/platform/winrt/detect.py +++ b/platform/winrt/detect.py @@ -49,7 +49,7 @@ def configure(env): sys.exit() arch = "" - env['ENV'] = os.environ; + env['ENV'] = os.environ # ANGLE angle_root = os.getenv("ANGLE_SRC_PATH") diff --git a/scene/SCsub b/scene/SCsub index 4a89eef4698..bd2da1eab99 100644 --- a/scene/SCsub +++ b/scene/SCsub @@ -7,14 +7,14 @@ env.add_source_files(env.scene_sources, "*.cpp") Export('env') -SConscript('main/SCsub'); -SConscript('gui/SCsub'); -SConscript('3d/SCsub'); -SConscript('2d/SCsub'); -SConscript('animation/SCsub'); -SConscript('audio/SCsub'); -SConscript('resources/SCsub'); -SConscript('io/SCsub'); +SConscript('main/SCsub') +SConscript('gui/SCsub') +SConscript('3d/SCsub') +SConscript('2d/SCsub') +SConscript('animation/SCsub') +SConscript('audio/SCsub') +SConscript('resources/SCsub') +SConscript('io/SCsub') lib = env.Library("scene", env.scene_sources) diff --git a/scene/resources/SCsub b/scene/resources/SCsub index 9c1e72f04f1..60b16cd0d4d 100644 --- a/scene/resources/SCsub +++ b/scene/resources/SCsub @@ -7,4 +7,4 @@ env.add_source_files(env.scene_sources, "*.c") Export('env') -SConscript("default_theme/SCsub"); +SConscript("default_theme/SCsub") diff --git a/scene/resources/default_theme/make_header.py b/scene/resources/default_theme/make_header.py index 41d5e2e259b..68c9e925278 100644 --- a/scene/resources/default_theme/make_header.py +++ b/scene/resources/default_theme/make_header.py @@ -1,71 +1,71 @@ -import os; -import glob; -import string; +import os +import glob +import string # Generate include files f = open("theme_data.h", "wb") -f.write("// THIS FILE HAS BEEN AUTOGENERATED, DONT EDIT!!\n"); +f.write("// THIS FILE HAS BEEN AUTOGENERATED, DONT EDIT!!\n") -f.write("\n\n"); +f.write("\n\n") # Generate png image block -pixmaps = glob.glob("*.png"); +pixmaps = glob.glob("*.png") -pixmaps.sort(); +pixmaps.sort() -f.write("\n\n\n"); +f.write("\n\n\n") for x in pixmaps: - var_str = x[:-4] + "_png"; + var_str = x[:-4] + "_png" - f.write("static const unsigned char " + var_str + "[]={\n"); + f.write("static const unsigned char " + var_str + "[]={\n") - pngf = open(x, "rb"); + pngf = open(x, "rb") - b = pngf.read(1); + b = pngf.read(1) while(len(b) == 1): f.write(hex(ord(b))) - b = pngf.read(1); + b = pngf.read(1) if (len(b) == 1): f.write(",") - f.write("\n};\n\n\n"); - pngf.close(); + f.write("\n};\n\n\n") + pngf.close() # Generate shaders block shaders = glob.glob("*.gsl") -shaders.sort(); +shaders.sort() -f.write("\n\n\n"); +f.write("\n\n\n") for x in shaders: - var_str = x[:-4] + "_shader_code"; + var_str = x[:-4] + "_shader_code" - f.write("static const char *" + var_str + "=\n"); + f.write("static const char *" + var_str + "=\n") - sf = open(x, "rb"); + sf = open(x, "rb") - b = sf.readline(); + b = sf.readline() while(b != ""): if (b.endswith("\r\n")): b = b[:-2] if (b.endswith("\n")): b = b[:-1] f.write(" \"" + b) - b = sf.readline(); + b = sf.readline() if (b != ""): f.write("\"\n") - f.write("\";\n\n\n"); - sf.close(); + f.write("\";\n\n\n") + sf.close() -f.close(); +f.close() diff --git a/servers/SCsub b/servers/SCsub index 8fab6c1dc2f..eefa6278c43 100644 --- a/servers/SCsub +++ b/servers/SCsub @@ -7,12 +7,12 @@ env.add_source_files(env.servers_sources, "*.cpp") Export('env') -SConscript('physics/SCsub'); -SConscript('physics_2d/SCsub'); -SConscript('visual/SCsub'); -SConscript('audio/SCsub'); -SConscript('spatial_sound/SCsub'); -SConscript('spatial_sound_2d/SCsub'); +SConscript('physics/SCsub') +SConscript('physics_2d/SCsub') +SConscript('visual/SCsub') +SConscript('audio/SCsub') +SConscript('spatial_sound/SCsub') +SConscript('spatial_sound_2d/SCsub') lib = env.Library("servers", env.servers_sources) diff --git a/tools/SCsub b/tools/SCsub index f11aa34ee69..43cad794d43 100644 --- a/tools/SCsub +++ b/tools/SCsub @@ -47,11 +47,11 @@ def make_translations_header(target, source, env): xl_names.append([name, len(buf), str(decomp_size)]) g.write("struct EditorTranslationList {\n") - g.write("\tconst char* lang;\n"); - g.write("\tint comp_size;\n"); - g.write("\tint uncomp_size;\n"); - g.write("\tconst unsigned char* data;\n"); - g.write("};\n\n"); + g.write("\tconst char* lang;\n") + g.write("\tint comp_size;\n") + g.write("\tint uncomp_size;\n") + g.write("\tconst unsigned char* data;\n") + g.write("};\n\n") g.write("static EditorTranslationList _editor_translations[]={\n") for x in xl_names: g.write("\t{ \"" + x[0] + "\", " + str(x[1]) + ", " + str(x[2]) + ",_translation_" + x[0] + "_compressed},\n") @@ -112,8 +112,8 @@ if (env["tools"] != "no"): env.Depends('#tools/editor/builtin_fonts.h', flist) env.Command('#tools/editor/builtin_fonts.h', flist, make_fonts_header) - SConscript('editor/SCsub'); - SConscript('collada/SCsub'); + SConscript('editor/SCsub') + SConscript('collada/SCsub') SConscript('doc/SCsub') lib = env.Library("tool", env.tool_sources) diff --git a/tools/editor/SCsub b/tools/editor/SCsub index 625103eaeea..caf45d25be5 100644 --- a/tools/editor/SCsub +++ b/tools/editor/SCsub @@ -74,7 +74,7 @@ if (env["tools"] == "yes"): env.add_source_files(env.tool_sources, "*.cpp") Export('env') - SConscript('icons/SCsub'); - SConscript('plugins/SCsub'); - SConscript('fileserver/SCsub'); - SConscript('io_plugins/SCsub'); + SConscript('icons/SCsub') + SConscript('plugins/SCsub') + SConscript('fileserver/SCsub') + SConscript('io_plugins/SCsub') diff --git a/tools/editor/icons/SCsub b/tools/editor/icons/SCsub index f27350a1841..af6ebd67fdd 100644 --- a/tools/editor/icons/SCsub +++ b/tools/editor/icons/SCsub @@ -22,42 +22,42 @@ def make_editor_icons_action(target, source, env): for x in pixmaps: x = str(x) - var_str = os.path.basename(x)[:-4] + "_png"; + var_str = os.path.basename(x)[:-4] + "_png" # print(var_str) - s.write("static const unsigned char " + var_str + "[]={\n"); + s.write("static const unsigned char " + var_str + "[]={\n") - pngf = open(x, "rb"); + pngf = open(x, "rb") - b = pngf.read(1); + b = pngf.read(1) while(len(b) == 1): s.write(hex(ord(b))) - b = pngf.read(1); + b = pngf.read(1) if (len(b) == 1): s.write(",") - s.write("\n};\n\n"); + s.write("\n};\n\n") - pngf.close(); - var_str = os.path.basename(x)[:-4] + "_hidpi_png"; + pngf.close() + var_str = os.path.basename(x)[:-4] + "_hidpi_png" try: pngf = open(os.path.dirname(x) + "/2x/" + os.path.basename(x), "rb") - s.write("static const unsigned char " + var_str + "[]={\n"); + s.write("static const unsigned char " + var_str + "[]={\n") - b = pngf.read(1); + b = pngf.read(1) while(len(b) == 1): s.write(hex(ord(b))) - b = pngf.read(1); + b = pngf.read(1) if (len(b) == 1): s.write(",") - s.write("\n};\n\n\n"); + s.write("\n};\n\n\n") hidpi_list.append(x) except: - s.write("static const unsigned char* " + var_str + "=NULL;\n\n\n"); + s.write("static const unsigned char* " + var_str + "=NULL;\n\n\n") s.write("static Ref make_icon(const uint8_t* p_png,const uint8_t* p_hidpi_png) {\n") s.write("\tRef texture( memnew( ImageTexture ) );\n") @@ -74,12 +74,12 @@ def make_editor_icons_action(target, source, env): for x in pixmaps: x = os.path.basename(str(x)) - type = x[5:-4].title().replace("_", ""); - var_str = x[:-4] + "_png"; - var_str_hidpi = x[:-4] + "_hidpi_png"; - s.write("\tp_theme->set_icon(\"" + type + "\",\"EditorIcons\",make_icon(" + var_str + "," + var_str_hidpi + "));\n"); + type = x[5:-4].title().replace("_", "") + var_str = x[:-4] + "_png" + var_str_hidpi = x[:-4] + "_hidpi_png" + s.write("\tp_theme->set_icon(\"" + type + "\",\"EditorIcons\",make_icon(" + var_str + "," + var_str_hidpi + "));\n") - s.write("\n\n}\n\n"); + s.write("\n\n}\n\n") f = open(dst, "wb") f.write(s.getvalue()) diff --git a/tools/scripts/file-hex-array.py b/tools/scripts/file-hex-array.py index da95c9505f8..a6cdfe541f0 100755 --- a/tools/scripts/file-hex-array.py +++ b/tools/scripts/file-hex-array.py @@ -9,7 +9,8 @@ def tof(filepath): content = content.replace("0x", "") content = content.split(',') for i in range(len(content)): - if len(content[i]) == 1: content[i] = "0" + content[i] + if len(content[i]) == 1: + content[i] = "0" + content[i] content = "".join(content) with open(filepath + ".file", 'wb') as f: content = f.write(content.decode("hex")) diff --git a/tools/scripts/make_glwrapper.py b/tools/scripts/make_glwrapper.py index bae0eeeadc9..5694d2327e2 100644 --- a/tools/scripts/make_glwrapper.py +++ b/tools/scripts/make_glwrapper.py @@ -61,7 +61,7 @@ for x in (range(len(sys.argv) - 1)): if (glpos == -1): continue - ret = line[:glpos].strip(); + ret = line[:glpos].strip() line = line[glpos:].strip() namepos = line.find("(") @@ -127,14 +127,14 @@ header_code = """\ f.write("#include \n\n\n") -f.write(header_code); +f.write(header_code) f.write("#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n\n") f.write("#if defined(_WIN32) && !defined(__CYGWIN__)\n") f.write("#define GLWRP_APIENTRY __stdcall\n") -f.write("#else\n"); +f.write("#else\n") f.write("#define GLWRP_APIENTRY \n") -f.write("#endif\n\n"); +f.write("#endif\n\n") for x in types: f.write(x + "\n") @@ -150,7 +150,7 @@ for x in functions: f.write("#define " + x["name"] + " __wrapper_" + x["name"] + "\n") f.write("\n\n") -f.write("typedef void (*GLWrapperFuncPtr)(void);\n\n"); +f.write("typedef void (*GLWrapperFuncPtr)(void);\n\n") f.write("void glWrapperInit( GLWrapperFuncPtr (*wrapperFunc)(const char*) );\n") f.write("#ifdef __cplusplus\n}\n#endif\n") From 4160b3c9feb32e7762ea121bb44b30ba65676760 Mon Sep 17 00:00:00 2001 From: George Marques Date: Mon, 31 Oct 2016 21:39:45 -0200 Subject: [PATCH 045/223] Fix IPv6 linking for UWP --- drivers/unix/socket_helpers.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/unix/socket_helpers.h b/drivers/unix/socket_helpers.h index 9693911acd6..3aaae82da24 100644 --- a/drivers/unix/socket_helpers.h +++ b/drivers/unix/socket_helpers.h @@ -10,6 +10,10 @@ #endif #endif +#ifdef WINRT_ENABLED +#define in6addr_any IN6ADDR_ANY_INIT +#endif + // helpers for sockaddr -> IP_Address and back, should work for posix and winsock. All implementations should use this static size_t _set_sockaddr(struct sockaddr_storage* p_addr, const IP_Address& p_ip, int p_port) { From fd9aebd4fa37965ec307b87742f1d88aa8be37ca Mon Sep 17 00:00:00 2001 From: volzhs Date: Tue, 1 Nov 2016 22:26:29 +0900 Subject: [PATCH 046/223] Fix to focus or rename node by double click --- scene/gui/tree.cpp | 34 ++++---------------------------- scene/gui/tree.h | 6 ------ tools/editor/scene_tree_dock.cpp | 1 - 3 files changed, 4 insertions(+), 37 deletions(-) diff --git a/scene/gui/tree.cpp b/scene/gui/tree.cpp index ca9c666c01d..912371142f3 100644 --- a/scene/gui/tree.cpp +++ b/scene/gui/tree.cpp @@ -2363,19 +2363,11 @@ void Tree::_input_event(InputEvent p_event) { Input::get_singleton()->set_mouse_mode(Input::MOUSE_MODE_VISIBLE); warp_mouse(range_drag_capture_pos); } else { - - if (delayed_text_editor) { - uint64_t diff = OS::get_singleton()->get_ticks_msec() - first_selection_time; - if (diff >= 400 && diff <= 800) - edit_selected(); - // fast double click - else if (diff < 400) { - emit_signal("item_double_clicked"); - } - - first_selection_time = OS::get_singleton()->get_ticks_msec(); - } else { + Rect2 rect = get_selected()->get_meta("__focus_rect"); + if (rect.has_point(Point2(p_event.mouse_button.x,p_event.mouse_button.y))) { edit_selected(); + } else { + emit_signal("item_double_clicked"); } } pressing_for_editor=false; @@ -2921,8 +2913,6 @@ void Tree::item_selected(int p_column,TreeItem *p_item) { p_item->cells[p_column].selected=true; //emit_signal("multi_selected",p_item,p_column,true); - NO this is for TreeItem::select - if (delayed_text_editor) - first_selection_time = OS::get_singleton()->get_ticks_msec(); } else { @@ -3572,15 +3562,6 @@ bool Tree::get_allow_rmb_select() const{ } -void Tree::set_delayed_text_editor(bool enabled) { - delayed_text_editor = enabled; -} - -bool Tree::is_delayed_text_editor_enabled() const { - return delayed_text_editor; -} - - void Tree::_bind_methods() { ObjectTypeDB::bind_method(_MD("_range_click_timeout"),&Tree::_range_click_timeout); @@ -3634,10 +3615,6 @@ void Tree::_bind_methods() { ObjectTypeDB::bind_method(_MD("set_allow_rmb_select","allow"),&Tree::set_allow_rmb_select); ObjectTypeDB::bind_method(_MD("get_allow_rmb_select"),&Tree::get_allow_rmb_select); - ObjectTypeDB::bind_method(_MD("set_delayed_text_editor","enable"),&Tree::set_delayed_text_editor); - ObjectTypeDB::bind_method(_MD("is_delayed_text_editor_enabled"),&Tree::is_delayed_text_editor_enabled); - - ObjectTypeDB::bind_method(_MD("set_single_select_cell_editing_only_when_already_selected","enable"),&Tree::set_single_select_cell_editing_only_when_already_selected); ObjectTypeDB::bind_method(_MD("get_single_select_cell_editing_only_when_already_selected"),&Tree::get_single_select_cell_editing_only_when_already_selected); @@ -3751,9 +3728,6 @@ Tree::Tree() { force_select_on_already_selected=false; allow_rmb_select=false; - - first_selection_time = 0; - delayed_text_editor = false; } diff --git a/scene/gui/tree.h b/scene/gui/tree.h index 6c2f1dae40f..1936f926c81 100644 --- a/scene/gui/tree.h +++ b/scene/gui/tree.h @@ -439,9 +439,6 @@ friend class TreeItem; float last_drag_time; float time_since_motion;*/ - bool delayed_text_editor; - uint64_t first_selection_time; - float drag_speed; float drag_from; float drag_accum; @@ -537,9 +534,6 @@ public: void set_value_evaluator(ValueEvaluator *p_evaluator); - void set_delayed_text_editor(bool enabled); - bool is_delayed_text_editor_enabled() const; - Tree(); ~Tree(); diff --git a/tools/editor/scene_tree_dock.cpp b/tools/editor/scene_tree_dock.cpp index 16f06c7ac98..49db321d4d8 100644 --- a/tools/editor/scene_tree_dock.cpp +++ b/tools/editor/scene_tree_dock.cpp @@ -2004,7 +2004,6 @@ SceneTreeDock::SceneTreeDock(EditorNode *p_editor,Node *p_scene_root,EditorSelec scene_tree->connect("nodes_dragged",this,"_nodes_drag_begin"); scene_tree->get_scene_tree()->connect("item_double_clicked", this, "_focus_node"); - scene_tree->get_scene_tree()->set_delayed_text_editor(true); scene_tree->set_undo_redo(&editor_data->get_undo_redo()); scene_tree->set_editor_selection(editor_selection); From f935d7ab0ecd4757690cc29e449fec9572bbb435 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Tue, 1 Nov 2016 19:16:46 +0100 Subject: [PATCH 047/223] Make regex compilable with RTTI disabled --- modules/regex/regex.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/modules/regex/regex.cpp b/modules/regex/regex.cpp index a0f4b4934cc..0197da46fb9 100644 --- a/modules/regex/regex.cpp +++ b/modules/regex/regex.cpp @@ -97,6 +97,9 @@ struct RegExNode { memdelete(next); } + // For avoiding RTTI + virtual bool is_look_behind() { return false; } + virtual int test(RegExSearch& s, int pos) const { return next ? next->test(s, pos) : -1; @@ -750,6 +753,8 @@ struct RegExNodeLookBehind : public RegExNodeGroup { reset_pos = true; } + virtual bool is_look_behind() { return true; } + virtual int test(RegExSearch& s, int pos) const { if (pos < length) @@ -1089,7 +1094,7 @@ Error RegEx::compile(const String& p_pattern) { REGEX_COMPILE_FAIL("backreference not found"); for (int i = 0; i < stack.size(); ++i) - if (dynamic_cast(stack[i])) + if (stack[i]->is_look_behind()) REGEX_COMPILE_FAIL("backreferences inside lookbehind not supported"); for (int i = 0; i < group_names.size(); ++i) { @@ -1112,7 +1117,7 @@ Error RegEx::compile(const String& p_pattern) { c = d; for (int i = 0; i < stack.size(); ++i) - if (dynamic_cast(stack[i])) + if (stack[i]->is_look_behind()) REGEX_COMPILE_FAIL("backreferences inside lookbehind not supported"); int ref = -1; @@ -1238,7 +1243,7 @@ Error RegEx::compile(const String& p_pattern) { break; case '|': for (int i = 0; i < stack.size(); ++i) - if (dynamic_cast(stack[i])) + if (stack[i]->is_look_behind()) REGEX_COMPILE_FAIL("alternations inside lookbehind not supported"); stack[0]->add_childset(); break; @@ -1312,7 +1317,7 @@ Error RegEx::compile(const String& p_pattern) { if (min_val != max_val) for (int i = 0; i < stack.size(); ++i) - if (dynamic_cast(stack[i])) + if (stack[i]->is_look_behind()) REGEX_COMPILE_FAIL("variable length quantifiers inside lookbehind not supported"); RegExNodeQuantifier* quant = memnew(RegExNodeQuantifier(min_val, max_val)); From ce5200b30e6d262905912c6571d51ba6f5979bd7 Mon Sep 17 00:00:00 2001 From: Mateusz Adamczyk Date: Mon, 31 Oct 2016 15:45:20 +0100 Subject: [PATCH 048/223] Option for detaching script from node (#6934). --- tools/editor/scene_tree_dock.cpp | 40 +++++++++++++++++++++++++++++--- tools/editor/scene_tree_dock.h | 2 ++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/tools/editor/scene_tree_dock.cpp b/tools/editor/scene_tree_dock.cpp index 16f06c7ac98..b4d32eb5c2c 100644 --- a/tools/editor/scene_tree_dock.cpp +++ b/tools/editor/scene_tree_dock.cpp @@ -86,6 +86,9 @@ void SceneTreeDock::_unhandled_key_input(InputEvent p_event) { else if (ED_IS_SHORTCUT("scene_tree/load_script", p_event)) { _tool_selected(TOOL_LOAD_SCRIPT); } + else if(ED_IS_SHORTCUT("scene_tree/clear_script", p_event)) { + _tool_selected(TOOL_CLEAR_SCRIPT); + } else if (ED_IS_SHORTCUT("scene_tree/move_up", p_event)) { _tool_selected(TOOL_MOVE_UP); } @@ -414,6 +417,18 @@ void SceneTreeDock::_tool_selected(int p_tool, bool p_confirm_override) { } + } break; + case TOOL_CLEAR_SCRIPT: { + Node *selected = scene_tree->get_selected(); + if(!selected) + break; + + Ref