GDScript: Enable compression on export

Besides the regular option to export GDScript as binary tokens, this
also includes a compression option on top of it. The binary format
needs to encode some information which generally makes it bigger than
the source text. This option reduces that difference by using Zstandard
compression on the buffer.
This commit is contained in:
George Marques 2024-01-26 14:49:31 -03:00
parent b4d0a09f15
commit 72e5f8c31e
No known key found for this signature in database
GPG key ID: 046BD46A3201E43D
11 changed files with 93 additions and 44 deletions

View file

@ -270,7 +270,7 @@ void EditorExport::load_config() {
preset->set_include_filter(config->get_value(section, "include_filter")); preset->set_include_filter(config->get_value(section, "include_filter"));
preset->set_exclude_filter(config->get_value(section, "exclude_filter")); preset->set_exclude_filter(config->get_value(section, "exclude_filter"));
preset->set_export_path(config->get_value(section, "export_path", "")); preset->set_export_path(config->get_value(section, "export_path", ""));
preset->set_script_export_mode(config->get_value(section, "script_export_mode", EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS)); preset->set_script_export_mode(config->get_value(section, "script_export_mode", EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED));
if (config->has_section_key(section, "encrypt_pck")) { if (config->has_section_key(section, "encrypt_pck")) {
preset->set_enc_pck(config->get_value(section, "encrypt_pck")); preset->set_enc_pck(config->get_value(section, "encrypt_pck"));

View file

@ -57,6 +57,7 @@ public:
enum ScriptExportMode { enum ScriptExportMode {
MODE_SCRIPT_TEXT, MODE_SCRIPT_TEXT,
MODE_SCRIPT_BINARY_TOKENS, MODE_SCRIPT_BINARY_TOKENS,
MODE_SCRIPT_BINARY_TOKENS_COMPRESSED,
}; };
private: private:
@ -89,7 +90,7 @@ private:
bool enc_directory = false; bool enc_directory = false;
String script_key; String script_key;
int script_mode = MODE_SCRIPT_BINARY_TOKENS; int script_mode = MODE_SCRIPT_BINARY_TOKENS_COMPRESSED;
protected: protected:
bool _set(const StringName &p_name, const Variant &p_value); bool _set(const StringName &p_name, const Variant &p_value);

View file

@ -1398,7 +1398,9 @@ ProjectExportDialog::ProjectExportDialog() {
script_vb->add_margin_child(TTR("GDScript Export Mode:"), script_mode); script_vb->add_margin_child(TTR("GDScript Export Mode:"), script_mode);
script_mode->add_item(TTR("Text (easier debugging)"), (int)EditorExportPreset::MODE_SCRIPT_TEXT); script_mode->add_item(TTR("Text (easier debugging)"), (int)EditorExportPreset::MODE_SCRIPT_TEXT);
script_mode->add_item(TTR("Binary tokens (faster loading)"), (int)EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS); script_mode->add_item(TTR("Binary tokens (faster loading)"), (int)EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS);
script_mode->add_item(TTR("Compressed binary tokens (smaller files)"), (int)EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED);
script_mode->connect("item_selected", callable_mp(this, &ProjectExportDialog::_script_export_mode_changed)); script_mode->connect("item_selected", callable_mp(this, &ProjectExportDialog::_script_export_mode_changed));
sections->add_child(script_vb); sections->add_child(script_vb);
sections->connect("tab_changed", callable_mp(this, &ProjectExportDialog::_tab_changed)); sections->connect("tab_changed", callable_mp(this, &ProjectExportDialog::_tab_changed));

View file

@ -1066,7 +1066,7 @@ const Vector<uint8_t> &GDScript::get_binary_tokens_source() const {
Vector<uint8_t> GDScript::get_as_binary_tokens() const { Vector<uint8_t> GDScript::get_as_binary_tokens() const {
GDScriptTokenizerBuffer tokenizer; GDScriptTokenizerBuffer tokenizer;
return tokenizer.parse_code_string(source); return tokenizer.parse_code_string(source, GDScriptTokenizerBuffer::COMPRESS_NONE);
} }
const HashMap<StringName, GDScriptFunction *> &GDScript::debug_get_member_functions() const { const HashMap<StringName, GDScriptFunction *> &GDScript::debug_get_member_functions() const {

View file

@ -365,6 +365,7 @@ Error GDScriptParser::parse(const String &p_source_code, const String &p_script_
pop_multiline(); pop_multiline();
memdelete(text_tokenizer); memdelete(text_tokenizer);
tokenizer = nullptr;
#ifdef DEBUG_ENABLED #ifdef DEBUG_ENABLED
if (multiline_stack.size() > 0) { if (multiline_stack.size() > 0) {
@ -384,6 +385,7 @@ Error GDScriptParser::parse_binary(const Vector<uint8_t> &p_binary, const String
Error err = buffer_tokenizer->set_code_buffer(p_binary); Error err = buffer_tokenizer->set_code_buffer(p_binary);
if (err) { if (err) {
memdelete(buffer_tokenizer);
return err; return err;
} }
@ -404,6 +406,7 @@ Error GDScriptParser::parse_binary(const Vector<uint8_t> &p_binary, const String
pop_multiline(); pop_multiline();
memdelete(buffer_tokenizer); memdelete(buffer_tokenizer);
tokenizer = nullptr;
if (errors.is_empty()) { if (errors.is_empty()) {
return OK; return OK;

View file

@ -284,7 +284,7 @@ void GDScriptTokenizerText::push_expression_indented_block() {
} }
void GDScriptTokenizerText::pop_expression_indented_block() { void GDScriptTokenizerText::pop_expression_indented_block() {
ERR_FAIL_COND(indent_stack_stack.size() == 0); ERR_FAIL_COND(indent_stack_stack.is_empty());
indent_stack = indent_stack_stack.back()->get(); indent_stack = indent_stack_stack.back()->get();
indent_stack_stack.pop_back(); indent_stack_stack.pop_back();
} }

View file

@ -30,6 +30,7 @@
#include "gdscript_tokenizer_buffer.h" #include "gdscript_tokenizer_buffer.h"
#include "core/io/compression.h"
#include "core/io/marshalls.h" #include "core/io/marshalls.h"
#define TOKENIZER_VERSION 100 #define TOKENIZER_VERSION 100
@ -139,19 +140,31 @@ GDScriptTokenizer::Token GDScriptTokenizerBuffer::_binary_to_token(const uint8_t
Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) { Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) {
const uint8_t *buf = p_buffer.ptr(); const uint8_t *buf = p_buffer.ptr();
int total_len = p_buffer.size(); ERR_FAIL_COND_V(p_buffer.size() < 12 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);
ERR_FAIL_COND_V(p_buffer.size() < 24 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);
int version = decode_uint32(&buf[4]); int version = decode_uint32(&buf[4]);
ERR_FAIL_COND_V_MSG(version > TOKENIZER_VERSION, ERR_INVALID_DATA, "Binary GDScript is too recent! Please use a newer engine version."); ERR_FAIL_COND_V_MSG(version > TOKENIZER_VERSION, ERR_INVALID_DATA, "Binary GDScript is too recent! Please use a newer engine version.");
uint32_t identifier_count = decode_uint32(&buf[8]); int decompressed_size = decode_uint32(&buf[8]);
uint32_t constant_count = decode_uint32(&buf[12]);
uint32_t token_line_count = decode_uint32(&buf[16]);
uint32_t token_count = decode_uint32(&buf[20]);
const uint8_t *b = &buf[24]; Vector<uint8_t> contents;
total_len -= 24; if (decompressed_size == 0) {
contents = p_buffer.slice(12);
} else {
contents.resize(decompressed_size);
int result = Compression::decompress(contents.ptrw(), contents.size(), &buf[12], p_buffer.size() - 12, Compression::MODE_ZSTD);
ERR_FAIL_COND_V_MSG(result != decompressed_size, ERR_INVALID_DATA, "Error decompressing GDScript tokenizer buffer.");
}
int total_len = contents.size();
buf = contents.ptr();
uint32_t identifier_count = decode_uint32(&buf[0]);
uint32_t constant_count = decode_uint32(&buf[4]);
uint32_t token_line_count = decode_uint32(&buf[8]);
uint32_t token_count = decode_uint32(&buf[16]);
const uint8_t *b = &buf[20];
total_len -= 20;
identifiers.resize(identifier_count); identifiers.resize(identifier_count);
for (uint32_t i = 0; i < identifier_count; i++) { for (uint32_t i = 0; i < identifier_count; i++) {
@ -226,9 +239,7 @@ Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer)
return OK; return OK;
} }
Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code) { Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code, CompressMode p_compress_mode) {
Vector<uint8_t> buf;
HashMap<StringName, uint32_t> identifier_map; HashMap<StringName, uint32_t> identifier_map;
HashMap<Variant, uint32_t, VariantHasher, VariantComparator> constant_map; HashMap<Variant, uint32_t, VariantHasher, VariantComparator> constant_map;
Vector<uint8_t> token_buffer; Vector<uint8_t> token_buffer;
@ -280,28 +291,23 @@ Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code)
} }
} }
// Save header. Vector<uint8_t> contents;
buf.resize(24); contents.resize(20);
buf.write[0] = 'G'; encode_uint32(identifier_map.size(), &contents.write[0]);
buf.write[1] = 'D'; encode_uint32(constant_map.size(), &contents.write[4]);
buf.write[2] = 'S'; encode_uint32(token_lines.size(), &contents.write[8]);
buf.write[3] = 'C'; encode_uint32(token_counter, &contents.write[16]);
encode_uint32(TOKENIZER_VERSION, &buf.write[4]);
encode_uint32(identifier_map.size(), &buf.write[8]);
encode_uint32(constant_map.size(), &buf.write[12]);
encode_uint32(token_lines.size(), &buf.write[16]);
encode_uint32(token_counter, &buf.write[20]);
int buf_pos = 24; int buf_pos = 20;
// Save identifiers. // Save identifiers.
for (const StringName &id : rev_identifier_map) { for (const StringName &id : rev_identifier_map) {
String s = id.operator String(); String s = id.operator String();
int len = s.length(); int len = s.length();
buf.resize(buf_pos + (len + 1) * 4); contents.resize(buf_pos + (len + 1) * 4);
encode_uint32(len, &buf.write[buf_pos]); encode_uint32(len, &contents.write[buf_pos]);
buf_pos += 4; buf_pos += 4;
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
@ -309,7 +315,7 @@ Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code)
encode_uint32(s[i], tmp); encode_uint32(s[i], tmp);
for (int b = 0; b < 4; b++) { for (int b = 0; b < 4; b++) {
buf.write[buf_pos + b] = tmp[b] ^ 0xb6; contents.write[buf_pos + b] = tmp[b] ^ 0xb6;
} }
buf_pos += 4; buf_pos += 4;
@ -322,28 +328,58 @@ Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code)
// Objects cannot be constant, never encode objects. // Objects cannot be constant, never encode objects.
Error err = encode_variant(v, nullptr, len, false); Error err = encode_variant(v, nullptr, len, false);
ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant."); ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant.");
buf.resize(buf_pos + len); contents.resize(buf_pos + len);
encode_variant(v, &buf.write[buf_pos], len, false); encode_variant(v, &contents.write[buf_pos], len, false);
buf_pos += len; buf_pos += len;
} }
// Save lines and columns. // Save lines and columns.
buf.resize(buf_pos + token_lines.size() * 16); contents.resize(buf_pos + token_lines.size() * 16);
for (const KeyValue<uint32_t, uint32_t> &e : token_lines) { for (const KeyValue<uint32_t, uint32_t> &e : token_lines) {
encode_uint32(e.key, &buf.write[buf_pos]); encode_uint32(e.key, &contents.write[buf_pos]);
buf_pos += 4; buf_pos += 4;
encode_uint32(e.value, &buf.write[buf_pos]); encode_uint32(e.value, &contents.write[buf_pos]);
buf_pos += 4; buf_pos += 4;
} }
for (const KeyValue<uint32_t, uint32_t> &e : token_columns) { for (const KeyValue<uint32_t, uint32_t> &e : token_columns) {
encode_uint32(e.key, &buf.write[buf_pos]); encode_uint32(e.key, &contents.write[buf_pos]);
buf_pos += 4; buf_pos += 4;
encode_uint32(e.value, &buf.write[buf_pos]); encode_uint32(e.value, &contents.write[buf_pos]);
buf_pos += 4; buf_pos += 4;
} }
// Store tokens. // Store tokens.
buf.append_array(token_buffer); contents.append_array(token_buffer);
Vector<uint8_t> buf;
// Save header.
buf.resize(12);
buf.write[0] = 'G';
buf.write[1] = 'D';
buf.write[2] = 'S';
buf.write[3] = 'C';
encode_uint32(TOKENIZER_VERSION, &buf.write[4]);
switch (p_compress_mode) {
case COMPRESS_NONE:
encode_uint32(0u, &buf.write[8]);
buf.append_array(contents);
break;
case COMPRESS_ZSTD: {
encode_uint32(contents.size(), &buf.write[8]);
Vector<uint8_t> compressed;
int max_size = Compression::get_max_compressed_buffer_size(contents.size(), Compression::MODE_ZSTD);
compressed.resize(max_size);
int compressed_size = Compression::compress(compressed.ptrw(), contents.ptr(), contents.size(), Compression::MODE_ZSTD);
ERR_FAIL_COND_V_MSG(compressed_size < 0, Vector<uint8_t>(), "Error compressing GDScript tokenizer buffer.");
compressed.resize(compressed_size);
buf.append_array(compressed);
} break;
}
return buf; return buf;
} }
@ -372,7 +408,7 @@ void GDScriptTokenizerBuffer::push_expression_indented_block() {
} }
void GDScriptTokenizerBuffer::pop_expression_indented_block() { void GDScriptTokenizerBuffer::pop_expression_indented_block() {
ERR_FAIL_COND(indent_stack_stack.size() == 0); ERR_FAIL_COND(indent_stack_stack.is_empty());
indent_stack = indent_stack_stack.back()->get(); indent_stack = indent_stack_stack.back()->get();
indent_stack_stack.pop_back(); indent_stack_stack.pop_back();
} }

View file

@ -34,6 +34,12 @@
#include "gdscript_tokenizer.h" #include "gdscript_tokenizer.h"
class GDScriptTokenizerBuffer : public GDScriptTokenizer { class GDScriptTokenizerBuffer : public GDScriptTokenizer {
public:
enum CompressMode {
COMPRESS_NONE,
COMPRESS_ZSTD,
};
enum { enum {
TOKEN_BYTE_MASK = 0x80, TOKEN_BYTE_MASK = 0x80,
TOKEN_BITS = 8, TOKEN_BITS = 8,
@ -64,7 +70,7 @@ class GDScriptTokenizerBuffer : public GDScriptTokenizer {
public: public:
Error set_code_buffer(const Vector<uint8_t> &p_buffer); Error set_code_buffer(const Vector<uint8_t> &p_buffer);
static Vector<uint8_t> parse_code_string(const String &p_code); static Vector<uint8_t> parse_code_string(const String &p_code, CompressMode p_compress_mode);
virtual int get_cursor_line() const override; virtual int get_cursor_line() const override;
virtual int get_cursor_column() const override; virtual int get_cursor_column() const override;

View file

@ -84,7 +84,7 @@ class EditorExportGDScript : public EditorExportPlugin {
public: public:
virtual void _export_file(const String &p_path, const String &p_type, const HashSet<String> &p_features) override { virtual void _export_file(const String &p_path, const String &p_type, const HashSet<String> &p_features) override {
int script_mode = EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS; int script_mode = EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED;
const Ref<EditorExportPreset> &preset = get_export_preset(); const Ref<EditorExportPreset> &preset = get_export_preset();
@ -103,7 +103,8 @@ public:
String source; String source;
source.parse_utf8(reinterpret_cast<const char *>(file.ptr()), file.size()); source.parse_utf8(reinterpret_cast<const char *>(file.ptr()), file.size());
file = GDScriptTokenizerBuffer::parse_code_string(source); GDScriptTokenizerBuffer::CompressMode compress_mode = script_mode == EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED ? GDScriptTokenizerBuffer::COMPRESS_ZSTD : GDScriptTokenizerBuffer::COMPRESS_NONE;
file = GDScriptTokenizerBuffer::parse_code_string(source, compress_mode);
if (file.is_empty()) { if (file.is_empty()) {
return; return;
} }

View file

@ -538,7 +538,7 @@ GDScriptTest::TestResult GDScriptTest::execute_test_code(bool p_is_generating) {
} else { } else {
String code = FileAccess::get_file_as_string(source_file, &err); String code = FileAccess::get_file_as_string(source_file, &err);
if (!err) { if (!err) {
Vector<uint8_t> buffer = GDScriptTokenizerBuffer::parse_code_string(code); Vector<uint8_t> buffer = GDScriptTokenizerBuffer::parse_code_string(code, GDScriptTokenizerBuffer::COMPRESS_ZSTD);
script->set_binary_tokens_source(buffer); script->set_binary_tokens_source(buffer);
} }
} }

View file

@ -111,7 +111,7 @@ static void test_tokenizer(const String &p_code, const Vector<String> &p_lines)
static void test_tokenizer_buffer(const Vector<uint8_t> &p_buffer, const Vector<String> &p_lines); static void test_tokenizer_buffer(const Vector<uint8_t> &p_buffer, const Vector<String> &p_lines);
static void test_tokenizer_buffer(const String &p_code, const Vector<String> &p_lines) { static void test_tokenizer_buffer(const String &p_code, const Vector<String> &p_lines) {
Vector<uint8_t> binary = GDScriptTokenizerBuffer::parse_code_string(p_code); Vector<uint8_t> binary = GDScriptTokenizerBuffer::parse_code_string(p_code, GDScriptTokenizerBuffer::COMPRESS_NONE);
test_tokenizer_buffer(binary, p_lines); test_tokenizer_buffer(binary, p_lines);
} }