Merge pull request #72975 from vnen/gdscript-no-ascii-spoof-check

GDScript: Be more lenient with identifiers
This commit is contained in:
Rémi Verschelde 2023-02-09 16:12:09 +01:00
commit 6fca54a81b
No known key found for this signature in database
GPG key ID: C3336907360768E1
8 changed files with 67 additions and 6 deletions

View file

@ -2145,7 +2145,12 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_precedence(Precedence p_pr
make_completion_context(COMPLETION_IDENTIFIER, nullptr);
GDScriptTokenizer::Token token = current;
ParseFunction prefix_rule = get_rule(token.type)->prefix;
GDScriptTokenizer::Token::Type token_type = token.type;
if (token.is_identifier()) {
// Allow keywords that can be treated as identifiers.
token_type = GDScriptTokenizer::Token::IDENTIFIER;
}
ParseFunction prefix_rule = get_rule(token_type)->prefix;
if (prefix_rule == nullptr) {
// Expected expression. Let the caller give the proper error message.
@ -3010,7 +3015,14 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_get_node(ExpressionNode *p
path_state = PATH_STATE_NODE_NAME;
} else if (current.is_node_name()) {
advance();
get_node->full_path += previous.get_identifier();
String identifier = previous.get_identifier();
#ifdef DEBUG_ENABLED
// Check spoofing.
if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY) && TS->spoof_check(identifier)) {
push_warning(get_node, GDScriptWarning::CONFUSABLE_IDENTIFIER, identifier);
}
#endif
get_node->full_path += identifier;
path_state = PATH_STATE_NODE_NAME;
} else if (!check(GDScriptTokenizer::Token::SLASH) && !check(GDScriptTokenizer::Token::PERCENT)) {

View file

@ -168,7 +168,11 @@ bool GDScriptTokenizer::Token::is_identifier() const {
switch (type) {
case IDENTIFIER:
case MATCH: // Used in String.match().
case CONST_INF: // Used in Vector{2,3,4}.INF
// Allow constants to be treated as regular identifiers.
case CONST_PI:
case CONST_INF:
case CONST_NAN:
case CONST_TAU:
return true;
default:
return false;
@ -188,6 +192,10 @@ bool GDScriptTokenizer::Token::is_node_name() const {
case CLASS_NAME:
case CLASS:
case CONST:
case CONST_PI:
case CONST_INF:
case CONST_NAN:
case CONST_TAU:
case CONTINUE:
case ELIF:
case ELSE:
@ -530,9 +538,12 @@ void GDScriptTokenizer::make_keyword_list() {
#endif // DEBUG_ENABLED
GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
bool only_ascii = _peek(-1) < 128;
// Consume all identifier characters.
while (is_unicode_identifier_continue(_peek())) {
_advance();
char32_t c = _advance();
only_ascii = only_ascii && c < 128;
}
int len = _current - _start;
@ -587,7 +598,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
#ifdef DEBUG_ENABLED
// Additional checks for identifiers but only in debug and if it's available in TextServer.
if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) {
if (!only_ascii && TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) {
int64_t confusable = TS->is_confusable(name, keyword_list);
if (confusable >= 0) {
push_error(vformat(R"(Identifier "%s" is visually similar to the GDScript keyword "%s" and thus not allowed.)", name, keyword_list[confusable]));

View file

@ -0,0 +1,3 @@
func test():
var P1 = "ok" # Technically it is visually similar to keyword "PI" but allowed since it's in ASCII range.
print(P1)

View file

@ -0,0 +1,16 @@
func test():
# The following keywords are allowed as identifiers:
var match = "match"
print(match)
var PI = "PI"
print(PI)
var INF = "INF"
print(INF)
var NAN = "NAN"
print(NAN)
var TAU = "TAU"
print(TAU)

View file

@ -0,0 +1,6 @@
GDTEST_OK
match
PI
INF
NAN
TAU

View file

@ -1,5 +1,12 @@
extends Node
func test():
var port = 0 # Only latin characters.
var pοrt = 1 # The "ο" is Greek omicron.
prints(port, pοrt)
# Do not call this since nodes aren't in the tree. It is just a parser check.
func nodes():
var _node1 = $port # Only latin characters.
var _node2 = $pοrt # The "ο" is Greek omicron.

View file

@ -1,6 +1,10 @@
GDTEST_OK
>> WARNING
>> Line: 3
>> Line: 5
>> CONFUSABLE_IDENTIFIER
>> The identifier "pοrt" has misleading characters and might be confused with something else.
>> WARNING
>> Line: 12
>> CONFUSABLE_IDENTIFIER
>> The identifier "pοrt" has misleading characters and might be confused with something else.
0 1