Merge pull request #72975 from vnen/gdscript-no-ascii-spoof-check

GDScript: Be more lenient with identifiers
This commit is contained in:
Rémi Verschelde 2023-02-09 16:12:09 +01:00
commit 6fca54a81b
No known key found for this signature in database
GPG key ID: C3336907360768E1
8 changed files with 67 additions and 6 deletions

View file

@ -2145,7 +2145,12 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_precedence(Precedence p_pr
make_completion_context(COMPLETION_IDENTIFIER, nullptr); make_completion_context(COMPLETION_IDENTIFIER, nullptr);
GDScriptTokenizer::Token token = current; GDScriptTokenizer::Token token = current;
ParseFunction prefix_rule = get_rule(token.type)->prefix; GDScriptTokenizer::Token::Type token_type = token.type;
if (token.is_identifier()) {
// Allow keywords that can be treated as identifiers.
token_type = GDScriptTokenizer::Token::IDENTIFIER;
}
ParseFunction prefix_rule = get_rule(token_type)->prefix;
if (prefix_rule == nullptr) { if (prefix_rule == nullptr) {
// Expected expression. Let the caller give the proper error message. // Expected expression. Let the caller give the proper error message.
@ -3010,7 +3015,14 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_get_node(ExpressionNode *p
path_state = PATH_STATE_NODE_NAME; path_state = PATH_STATE_NODE_NAME;
} else if (current.is_node_name()) { } else if (current.is_node_name()) {
advance(); advance();
get_node->full_path += previous.get_identifier(); String identifier = previous.get_identifier();
#ifdef DEBUG_ENABLED
// Check spoofing.
if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY) && TS->spoof_check(identifier)) {
push_warning(get_node, GDScriptWarning::CONFUSABLE_IDENTIFIER, identifier);
}
#endif
get_node->full_path += identifier;
path_state = PATH_STATE_NODE_NAME; path_state = PATH_STATE_NODE_NAME;
} else if (!check(GDScriptTokenizer::Token::SLASH) && !check(GDScriptTokenizer::Token::PERCENT)) { } else if (!check(GDScriptTokenizer::Token::SLASH) && !check(GDScriptTokenizer::Token::PERCENT)) {

View file

@ -168,7 +168,11 @@ bool GDScriptTokenizer::Token::is_identifier() const {
switch (type) { switch (type) {
case IDENTIFIER: case IDENTIFIER:
case MATCH: // Used in String.match(). case MATCH: // Used in String.match().
case CONST_INF: // Used in Vector{2,3,4}.INF // Allow constants to be treated as regular identifiers.
case CONST_PI:
case CONST_INF:
case CONST_NAN:
case CONST_TAU:
return true; return true;
default: default:
return false; return false;
@ -188,6 +192,10 @@ bool GDScriptTokenizer::Token::is_node_name() const {
case CLASS_NAME: case CLASS_NAME:
case CLASS: case CLASS:
case CONST: case CONST:
case CONST_PI:
case CONST_INF:
case CONST_NAN:
case CONST_TAU:
case CONTINUE: case CONTINUE:
case ELIF: case ELIF:
case ELSE: case ELSE:
@ -530,9 +538,12 @@ void GDScriptTokenizer::make_keyword_list() {
#endif // DEBUG_ENABLED #endif // DEBUG_ENABLED
GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
bool only_ascii = _peek(-1) < 128;
// Consume all identifier characters. // Consume all identifier characters.
while (is_unicode_identifier_continue(_peek())) { while (is_unicode_identifier_continue(_peek())) {
_advance(); char32_t c = _advance();
only_ascii = only_ascii && c < 128;
} }
int len = _current - _start; int len = _current - _start;
@ -587,7 +598,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
#ifdef DEBUG_ENABLED #ifdef DEBUG_ENABLED
// Additional checks for identifiers but only in debug and if it's available in TextServer. // Additional checks for identifiers but only in debug and if it's available in TextServer.
if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) { if (!only_ascii && TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) {
int64_t confusable = TS->is_confusable(name, keyword_list); int64_t confusable = TS->is_confusable(name, keyword_list);
if (confusable >= 0) { if (confusable >= 0) {
push_error(vformat(R"(Identifier "%s" is visually similar to the GDScript keyword "%s" and thus not allowed.)", name, keyword_list[confusable])); push_error(vformat(R"(Identifier "%s" is visually similar to the GDScript keyword "%s" and thus not allowed.)", name, keyword_list[confusable]));

View file

@ -0,0 +1,3 @@
func test():
var P1 = "ok" # Technically it is visually similar to keyword "PI" but allowed since it's in ASCII range.
print(P1)

View file

@ -0,0 +1,16 @@
func test():
# The following keywords are allowed as identifiers:
var match = "match"
print(match)
var PI = "PI"
print(PI)
var INF = "INF"
print(INF)
var NAN = "NAN"
print(NAN)
var TAU = "TAU"
print(TAU)

View file

@ -0,0 +1,6 @@
GDTEST_OK
match
PI
INF
NAN
TAU

View file

@ -1,5 +1,12 @@
extends Node
func test(): func test():
var port = 0 # Only latin characters. var port = 0 # Only latin characters.
var pοrt = 1 # The "ο" is Greek omicron. var pοrt = 1 # The "ο" is Greek omicron.
prints(port, pοrt) prints(port, pοrt)
# Do not call this since nodes aren't in the tree. It is just a parser check.
func nodes():
var _node1 = $port # Only latin characters.
var _node2 = $pοrt # The "ο" is Greek omicron.

View file

@ -1,6 +1,10 @@
GDTEST_OK GDTEST_OK
>> WARNING >> WARNING
>> Line: 3 >> Line: 5
>> CONFUSABLE_IDENTIFIER
>> The identifier "pοrt" has misleading characters and might be confused with something else.
>> WARNING
>> Line: 12
>> CONFUSABLE_IDENTIFIER >> CONFUSABLE_IDENTIFIER
>> The identifier "pοrt" has misleading characters and might be confused with something else. >> The identifier "pοrt" has misleading characters and might be confused with something else.
0 1 0 1