diff --git a/editor/export/editor_export.cpp b/editor/export/editor_export.cpp index cd7e813dbdb..aeb49661691 100644 --- a/editor/export/editor_export.cpp +++ b/editor/export/editor_export.cpp @@ -270,7 +270,7 @@ void EditorExport::load_config() { preset->set_include_filter(config->get_value(section, "include_filter")); preset->set_exclude_filter(config->get_value(section, "exclude_filter")); preset->set_export_path(config->get_value(section, "export_path", "")); - preset->set_script_export_mode(config->get_value(section, "script_export_mode", EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS)); + preset->set_script_export_mode(config->get_value(section, "script_export_mode", EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED)); if (config->has_section_key(section, "encrypt_pck")) { preset->set_enc_pck(config->get_value(section, "encrypt_pck")); diff --git a/editor/export/editor_export_preset.h b/editor/export/editor_export_preset.h index c5f2a0ee79a..effce48111a 100644 --- a/editor/export/editor_export_preset.h +++ b/editor/export/editor_export_preset.h @@ -57,6 +57,7 @@ public: enum ScriptExportMode { MODE_SCRIPT_TEXT, MODE_SCRIPT_BINARY_TOKENS, + MODE_SCRIPT_BINARY_TOKENS_COMPRESSED, }; private: @@ -89,7 +90,7 @@ private: bool enc_directory = false; String script_key; - int script_mode = MODE_SCRIPT_BINARY_TOKENS; + int script_mode = MODE_SCRIPT_BINARY_TOKENS_COMPRESSED; protected: bool _set(const StringName &p_name, const Variant &p_value); diff --git a/editor/export/project_export.cpp b/editor/export/project_export.cpp index dba524310ea..ff1fa3470e2 100644 --- a/editor/export/project_export.cpp +++ b/editor/export/project_export.cpp @@ -1398,7 +1398,9 @@ ProjectExportDialog::ProjectExportDialog() { script_vb->add_margin_child(TTR("GDScript Export Mode:"), script_mode); script_mode->add_item(TTR("Text (easier debugging)"), (int)EditorExportPreset::MODE_SCRIPT_TEXT); script_mode->add_item(TTR("Binary tokens (faster loading)"), (int)EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS); + script_mode->add_item(TTR("Compressed binary tokens (smaller files)"), (int)EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED); script_mode->connect("item_selected", callable_mp(this, &ProjectExportDialog::_script_export_mode_changed)); + sections->add_child(script_vb); sections->connect("tab_changed", callable_mp(this, &ProjectExportDialog::_tab_changed)); diff --git a/modules/gdscript/gdscript.cpp b/modules/gdscript/gdscript.cpp index 551adcb3202..7a576d52925 100644 --- a/modules/gdscript/gdscript.cpp +++ b/modules/gdscript/gdscript.cpp @@ -1066,7 +1066,7 @@ const Vector &GDScript::get_binary_tokens_source() const { Vector GDScript::get_as_binary_tokens() const { GDScriptTokenizerBuffer tokenizer; - return tokenizer.parse_code_string(source); + return tokenizer.parse_code_string(source, GDScriptTokenizerBuffer::COMPRESS_NONE); } const HashMap &GDScript::debug_get_member_functions() const { diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp index a0036d38d6d..3ba6e4d1607 100644 --- a/modules/gdscript/gdscript_parser.cpp +++ b/modules/gdscript/gdscript_parser.cpp @@ -365,6 +365,7 @@ Error GDScriptParser::parse(const String &p_source_code, const String &p_script_ pop_multiline(); memdelete(text_tokenizer); + tokenizer = nullptr; #ifdef DEBUG_ENABLED if (multiline_stack.size() > 0) { @@ -384,6 +385,7 @@ Error GDScriptParser::parse_binary(const Vector &p_binary, const String Error err = buffer_tokenizer->set_code_buffer(p_binary); if (err) { + memdelete(buffer_tokenizer); return err; } @@ -404,6 +406,7 @@ Error GDScriptParser::parse_binary(const Vector &p_binary, const String pop_multiline(); memdelete(buffer_tokenizer); + tokenizer = nullptr; if (errors.is_empty()) { return OK; diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index a4425a2bf00..2940af585dc 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -284,7 +284,7 @@ void GDScriptTokenizerText::push_expression_indented_block() { } void GDScriptTokenizerText::pop_expression_indented_block() { - ERR_FAIL_COND(indent_stack_stack.size() == 0); + ERR_FAIL_COND(indent_stack_stack.is_empty()); indent_stack = indent_stack_stack.back()->get(); indent_stack_stack.pop_back(); } diff --git a/modules/gdscript/gdscript_tokenizer_buffer.cpp b/modules/gdscript/gdscript_tokenizer_buffer.cpp index 5b41c411d84..db523ea9419 100644 --- a/modules/gdscript/gdscript_tokenizer_buffer.cpp +++ b/modules/gdscript/gdscript_tokenizer_buffer.cpp @@ -30,6 +30,7 @@ #include "gdscript_tokenizer_buffer.h" +#include "core/io/compression.h" #include "core/io/marshalls.h" #define TOKENIZER_VERSION 100 @@ -139,19 +140,31 @@ GDScriptTokenizer::Token GDScriptTokenizerBuffer::_binary_to_token(const uint8_t Error GDScriptTokenizerBuffer::set_code_buffer(const Vector &p_buffer) { const uint8_t *buf = p_buffer.ptr(); - int total_len = p_buffer.size(); - ERR_FAIL_COND_V(p_buffer.size() < 24 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA); + ERR_FAIL_COND_V(p_buffer.size() < 12 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA); int version = decode_uint32(&buf[4]); ERR_FAIL_COND_V_MSG(version > TOKENIZER_VERSION, ERR_INVALID_DATA, "Binary GDScript is too recent! Please use a newer engine version."); - uint32_t identifier_count = decode_uint32(&buf[8]); - uint32_t constant_count = decode_uint32(&buf[12]); - uint32_t token_line_count = decode_uint32(&buf[16]); - uint32_t token_count = decode_uint32(&buf[20]); + int decompressed_size = decode_uint32(&buf[8]); - const uint8_t *b = &buf[24]; - total_len -= 24; + Vector contents; + if (decompressed_size == 0) { + contents = p_buffer.slice(12); + } else { + contents.resize(decompressed_size); + int result = Compression::decompress(contents.ptrw(), contents.size(), &buf[12], p_buffer.size() - 12, Compression::MODE_ZSTD); + ERR_FAIL_COND_V_MSG(result != decompressed_size, ERR_INVALID_DATA, "Error decompressing GDScript tokenizer buffer."); + } + + int total_len = contents.size(); + buf = contents.ptr(); + uint32_t identifier_count = decode_uint32(&buf[0]); + uint32_t constant_count = decode_uint32(&buf[4]); + uint32_t token_line_count = decode_uint32(&buf[8]); + uint32_t token_count = decode_uint32(&buf[16]); + + const uint8_t *b = &buf[20]; + total_len -= 20; identifiers.resize(identifier_count); for (uint32_t i = 0; i < identifier_count; i++) { @@ -226,9 +239,7 @@ Error GDScriptTokenizerBuffer::set_code_buffer(const Vector &p_buffer) return OK; } -Vector GDScriptTokenizerBuffer::parse_code_string(const String &p_code) { - Vector buf; - +Vector GDScriptTokenizerBuffer::parse_code_string(const String &p_code, CompressMode p_compress_mode) { HashMap identifier_map; HashMap constant_map; Vector token_buffer; @@ -280,28 +291,23 @@ Vector GDScriptTokenizerBuffer::parse_code_string(const String &p_code) } } - // Save header. - buf.resize(24); - buf.write[0] = 'G'; - buf.write[1] = 'D'; - buf.write[2] = 'S'; - buf.write[3] = 'C'; - encode_uint32(TOKENIZER_VERSION, &buf.write[4]); - encode_uint32(identifier_map.size(), &buf.write[8]); - encode_uint32(constant_map.size(), &buf.write[12]); - encode_uint32(token_lines.size(), &buf.write[16]); - encode_uint32(token_counter, &buf.write[20]); + Vector contents; + contents.resize(20); + encode_uint32(identifier_map.size(), &contents.write[0]); + encode_uint32(constant_map.size(), &contents.write[4]); + encode_uint32(token_lines.size(), &contents.write[8]); + encode_uint32(token_counter, &contents.write[16]); - int buf_pos = 24; + int buf_pos = 20; // Save identifiers. for (const StringName &id : rev_identifier_map) { String s = id.operator String(); int len = s.length(); - buf.resize(buf_pos + (len + 1) * 4); + contents.resize(buf_pos + (len + 1) * 4); - encode_uint32(len, &buf.write[buf_pos]); + encode_uint32(len, &contents.write[buf_pos]); buf_pos += 4; for (int i = 0; i < len; i++) { @@ -309,7 +315,7 @@ Vector GDScriptTokenizerBuffer::parse_code_string(const String &p_code) encode_uint32(s[i], tmp); for (int b = 0; b < 4; b++) { - buf.write[buf_pos + b] = tmp[b] ^ 0xb6; + contents.write[buf_pos + b] = tmp[b] ^ 0xb6; } buf_pos += 4; @@ -322,28 +328,58 @@ Vector GDScriptTokenizerBuffer::parse_code_string(const String &p_code) // Objects cannot be constant, never encode objects. Error err = encode_variant(v, nullptr, len, false); ERR_FAIL_COND_V_MSG(err != OK, Vector(), "Error when trying to encode Variant."); - buf.resize(buf_pos + len); - encode_variant(v, &buf.write[buf_pos], len, false); + contents.resize(buf_pos + len); + encode_variant(v, &contents.write[buf_pos], len, false); buf_pos += len; } // Save lines and columns. - buf.resize(buf_pos + token_lines.size() * 16); + contents.resize(buf_pos + token_lines.size() * 16); for (const KeyValue &e : token_lines) { - encode_uint32(e.key, &buf.write[buf_pos]); + encode_uint32(e.key, &contents.write[buf_pos]); buf_pos += 4; - encode_uint32(e.value, &buf.write[buf_pos]); + encode_uint32(e.value, &contents.write[buf_pos]); buf_pos += 4; } for (const KeyValue &e : token_columns) { - encode_uint32(e.key, &buf.write[buf_pos]); + encode_uint32(e.key, &contents.write[buf_pos]); buf_pos += 4; - encode_uint32(e.value, &buf.write[buf_pos]); + encode_uint32(e.value, &contents.write[buf_pos]); buf_pos += 4; } // Store tokens. - buf.append_array(token_buffer); + contents.append_array(token_buffer); + + Vector buf; + + // Save header. + buf.resize(12); + buf.write[0] = 'G'; + buf.write[1] = 'D'; + buf.write[2] = 'S'; + buf.write[3] = 'C'; + encode_uint32(TOKENIZER_VERSION, &buf.write[4]); + + switch (p_compress_mode) { + case COMPRESS_NONE: + encode_uint32(0u, &buf.write[8]); + buf.append_array(contents); + break; + + case COMPRESS_ZSTD: { + encode_uint32(contents.size(), &buf.write[8]); + Vector compressed; + int max_size = Compression::get_max_compressed_buffer_size(contents.size(), Compression::MODE_ZSTD); + compressed.resize(max_size); + + int compressed_size = Compression::compress(compressed.ptrw(), contents.ptr(), contents.size(), Compression::MODE_ZSTD); + ERR_FAIL_COND_V_MSG(compressed_size < 0, Vector(), "Error compressing GDScript tokenizer buffer."); + compressed.resize(compressed_size); + + buf.append_array(compressed); + } break; + } return buf; } @@ -372,7 +408,7 @@ void GDScriptTokenizerBuffer::push_expression_indented_block() { } void GDScriptTokenizerBuffer::pop_expression_indented_block() { - ERR_FAIL_COND(indent_stack_stack.size() == 0); + ERR_FAIL_COND(indent_stack_stack.is_empty()); indent_stack = indent_stack_stack.back()->get(); indent_stack_stack.pop_back(); } diff --git a/modules/gdscript/gdscript_tokenizer_buffer.h b/modules/gdscript/gdscript_tokenizer_buffer.h index 192a7b3f159..55df66e50f7 100644 --- a/modules/gdscript/gdscript_tokenizer_buffer.h +++ b/modules/gdscript/gdscript_tokenizer_buffer.h @@ -34,6 +34,12 @@ #include "gdscript_tokenizer.h" class GDScriptTokenizerBuffer : public GDScriptTokenizer { +public: + enum CompressMode { + COMPRESS_NONE, + COMPRESS_ZSTD, + }; + enum { TOKEN_BYTE_MASK = 0x80, TOKEN_BITS = 8, @@ -64,7 +70,7 @@ class GDScriptTokenizerBuffer : public GDScriptTokenizer { public: Error set_code_buffer(const Vector &p_buffer); - static Vector parse_code_string(const String &p_code); + static Vector parse_code_string(const String &p_code, CompressMode p_compress_mode); virtual int get_cursor_line() const override; virtual int get_cursor_column() const override; diff --git a/modules/gdscript/register_types.cpp b/modules/gdscript/register_types.cpp index e835c93b7c0..5ff1c78ac97 100644 --- a/modules/gdscript/register_types.cpp +++ b/modules/gdscript/register_types.cpp @@ -84,7 +84,7 @@ class EditorExportGDScript : public EditorExportPlugin { public: virtual void _export_file(const String &p_path, const String &p_type, const HashSet &p_features) override { - int script_mode = EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS; + int script_mode = EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED; const Ref &preset = get_export_preset(); @@ -103,7 +103,8 @@ public: String source; source.parse_utf8(reinterpret_cast(file.ptr()), file.size()); - file = GDScriptTokenizerBuffer::parse_code_string(source); + GDScriptTokenizerBuffer::CompressMode compress_mode = script_mode == EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED ? GDScriptTokenizerBuffer::COMPRESS_ZSTD : GDScriptTokenizerBuffer::COMPRESS_NONE; + file = GDScriptTokenizerBuffer::parse_code_string(source, compress_mode); if (file.is_empty()) { return; } diff --git a/modules/gdscript/tests/gdscript_test_runner.cpp b/modules/gdscript/tests/gdscript_test_runner.cpp index 880289d2a80..a0329eb8d23 100644 --- a/modules/gdscript/tests/gdscript_test_runner.cpp +++ b/modules/gdscript/tests/gdscript_test_runner.cpp @@ -538,7 +538,7 @@ GDScriptTest::TestResult GDScriptTest::execute_test_code(bool p_is_generating) { } else { String code = FileAccess::get_file_as_string(source_file, &err); if (!err) { - Vector buffer = GDScriptTokenizerBuffer::parse_code_string(code); + Vector buffer = GDScriptTokenizerBuffer::parse_code_string(code, GDScriptTokenizerBuffer::COMPRESS_ZSTD); script->set_binary_tokens_source(buffer); } } diff --git a/modules/gdscript/tests/test_gdscript.cpp b/modules/gdscript/tests/test_gdscript.cpp index e4fab68e06c..f6965cf7cfb 100644 --- a/modules/gdscript/tests/test_gdscript.cpp +++ b/modules/gdscript/tests/test_gdscript.cpp @@ -111,7 +111,7 @@ static void test_tokenizer(const String &p_code, const Vector &p_lines) static void test_tokenizer_buffer(const Vector &p_buffer, const Vector &p_lines); static void test_tokenizer_buffer(const String &p_code, const Vector &p_lines) { - Vector binary = GDScriptTokenizerBuffer::parse_code_string(p_code); + Vector binary = GDScriptTokenizerBuffer::parse_code_string(p_code, GDScriptTokenizerBuffer::COMPRESS_NONE); test_tokenizer_buffer(binary, p_lines); }