From 19f2006ec073f42d1b2dc480ac4103ac24b0b59a Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Sat, 11 Mar 2023 20:23:05 +0000 Subject: [PATCH] Use hash table for GDScript parsing GDScript now uses hash table for lookup of type lists / functions / keywords, instead of linear String comparisons. --- modules/gdscript/gdscript.cpp | 4 + modules/gdscript/gdscript_tokenizer.cpp | 155 +++++++++++++++--------- modules/gdscript/gdscript_tokenizer.h | 15 ++- 3 files changed, 114 insertions(+), 60 deletions(-) diff --git a/modules/gdscript/gdscript.cpp b/modules/gdscript/gdscript.cpp index 14a3a756f4b..bde5cb1deaa 100644 --- a/modules/gdscript/gdscript.cpp +++ b/modules/gdscript/gdscript.cpp @@ -2095,6 +2095,8 @@ GDScriptWarning::Code GDScriptWarning::get_code_from_name(const String &p_name) #endif // DEBUG_ENABLED GDScriptLanguage::GDScriptLanguage() { + GDScriptTokenizer::initialize(); + calls = 0; ERR_FAIL_COND(singleton); singleton = this; @@ -2139,6 +2141,8 @@ GDScriptLanguage::GDScriptLanguage() { } GDScriptLanguage::~GDScriptLanguage() { + GDScriptTokenizer::terminate(); + if (_call_stack) { memdelete_arr(_call_stack); } diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index ed9b2dd971c..c82dfa76e9d 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -35,6 +35,8 @@ #include "core/print_string.h" #include "gdscript_functions.h" +OAHashMap *GDScriptTokenizer::token_hashtable = nullptr; + const char *GDScriptTokenizer::token_names[TK_MAX] = { "Empty", "Identifier", @@ -235,6 +237,96 @@ static const _kws _keyword_list[] = { { GDScriptTokenizer::TK_ERROR, nullptr } }; +// Prepare the hash table for parsing as a one off at startup. +void GDScriptTokenizer::initialize() { + token_hashtable = memnew((OAHashMap)); + + token_hashtable->insert("null", 0); + token_hashtable->insert("true", 1); + token_hashtable->insert("false", 2); + + // _type_list + int id = TOKEN_HASH_TABLE_TYPE_START; + int idx = 0; + while (_type_list[idx].text) { + token_hashtable->insert(_type_list[idx].text, id++); + idx++; + } + + // built in funcs + id = TOKEN_HASH_TABLE_BUILTIN_START; + for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) { + token_hashtable->insert(GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j)), id++); + } + + // keywords + id = TOKEN_HASH_TABLE_KEYWORD_START; + idx = 0; + while (_keyword_list[idx].text) { + token_hashtable->insert(_keyword_list[idx].text, id++); + idx++; + } +} + +void GDScriptTokenizer::terminate() { + if (token_hashtable) { + memdelete(token_hashtable); + token_hashtable = nullptr; + } +} + +// return whether found +bool GDScriptTokenizerText::_parse_identifier(const String &p_str) { + // N.B. GDScriptTokenizer::initialize() must have been called before using this function, + // else token_hashtable will be NULL. + const int *found = token_hashtable->lookup_ptr(p_str); + + if (found) { + int id = *found; + if (id < TOKEN_HASH_TABLE_TYPE_START) { + switch (id) { + case 0: { + _make_constant(Variant()); + } break; + case 1: { + _make_constant(true); + } break; + case 2: { + _make_constant(false); + } break; + default: { + DEV_ASSERT(0); + } break; + } + return true; + } else { + // type list + if (id < TOKEN_HASH_TABLE_BUILTIN_START) { + int idx = id - TOKEN_HASH_TABLE_TYPE_START; + _make_type(_type_list[idx].type); + return true; + } + + // built in func + if (id < TOKEN_HASH_TABLE_KEYWORD_START) { + int idx = id - TOKEN_HASH_TABLE_BUILTIN_START; + _make_built_in_func(GDScriptFunctions::Function(idx)); + return true; + } + + // keyword + int idx = id - TOKEN_HASH_TABLE_KEYWORD_START; + _make_token(_keyword_list[idx].token); + return true; + } + + return true; + } + + // not found + return false; +} + const char *GDScriptTokenizer::get_token_name(Token p_token) { ERR_FAIL_INDEX_V(p_token, TK_MAX, ""); return token_names[p_token]; @@ -977,68 +1069,13 @@ void GDScriptTokenizerText::_advance() { i++; } - bool identifier = false; + // Detect preset keywords / functions using hashtable. + bool found = _parse_identifier(str); - if (str == "null") { - _make_constant(Variant()); - - } else if (str == "true") { - _make_constant(true); - - } else if (str == "false") { - _make_constant(false); - } else { - bool found = false; - - { - int idx = 0; - - while (_type_list[idx].text) { - if (str == _type_list[idx].text) { - _make_type(_type_list[idx].type); - found = true; - break; - } - idx++; - } - } - - if (!found) { - //built in func? - - for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) { - if (str == GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j))) { - _make_built_in_func(GDScriptFunctions::Function(j)); - found = true; - break; - } - } - } - - if (!found) { - //keyword - - int idx = 0; - found = false; - - while (_keyword_list[idx].text) { - if (str == _keyword_list[idx].text) { - _make_token(_keyword_list[idx].token); - found = true; - break; - } - idx++; - } - } - - if (!found) { - identifier = true; - } - } - - if (identifier) { + if (!found) { _make_identifier(str); } + INCPOS(str.length()); return; } diff --git a/modules/gdscript/gdscript_tokenizer.h b/modules/gdscript/gdscript_tokenizer.h index 0ec3c16065f..536362a2252 100644 --- a/modules/gdscript/gdscript_tokenizer.h +++ b/modules/gdscript/gdscript_tokenizer.h @@ -31,6 +31,7 @@ #ifndef GDSCRIPT_TOKENIZER_H #define GDSCRIPT_TOKENIZER_H +#include "core/oa_hash_map.h" #include "core/pair.h" #include "core/string_name.h" #include "core/ustring.h" @@ -154,9 +155,20 @@ protected: static const char *token_names[TK_MAX]; + enum { + TOKEN_HASH_TABLE_TYPE_START = 3, + TOKEN_HASH_TABLE_BUILTIN_START = TOKEN_HASH_TABLE_TYPE_START + Variant::VARIANT_MAX, + TOKEN_HASH_TABLE_KEYWORD_START = TOKEN_HASH_TABLE_BUILTIN_START + GDScriptFunctions::FUNC_MAX, + }; + + static OAHashMap *token_hashtable; + public: static const char *get_token_name(Token p_token); + static void initialize(); + static void terminate(); + bool is_token_literal(int p_offset = 0, bool variable_safe = false) const; StringName get_token_literal(int p_offset = 0) const; @@ -177,7 +189,7 @@ public: virtual bool is_ignoring_warnings() const = 0; #endif // DEBUG_ENABLED - virtual ~GDScriptTokenizer(){}; + virtual ~GDScriptTokenizer() {} }; class GDScriptTokenizerText : public GDScriptTokenizer { @@ -230,6 +242,7 @@ class GDScriptTokenizerText : public GDScriptTokenizer { #endif // DEBUG_ENABLED void _advance(); + bool _parse_identifier(const String &p_str); public: void set_code(const String &p_code);