From 108ae1e1c5aa4a2db9925cf3c9305b8d4407285d Mon Sep 17 00:00:00 2001 From: Tomas Mejia <74030992+BigDomas@users.noreply.github.com> Date: Wed, 9 Oct 2024 12:31:08 -0400 Subject: [PATCH] Fix Expression bugs, add some features Fix several bugs related to returning OK on parsing invalid expressions. (ternary operators, missing operators, etc.) Add support for underscores in numeric literals. --- core/math/expression.cpp | 64 ++++++++++++--- core/math/expression.h | 2 + tests/core/math/test_expression.h | 126 +++++++++++++++++++++--------- 3 files changed, 145 insertions(+), 47 deletions(-) diff --git a/core/math/expression.cpp b/core/math/expression.cpp index 0692ece1e6f..81ad088c0e2 100644 --- a/core/math/expression.cpp +++ b/core/math/expression.cpp @@ -362,13 +362,15 @@ Error Expression::_get_token(Token &r_token) { } else if (c == 'e') { reading = READING_EXP; is_float = true; + } else if (c == '_') { + /*Ignore underscores*/ } else { reading = READING_DONE; } } break; case READING_BIN: { - if (bin_beg && !is_binary_digit(c)) { + if (bin_beg && !is_binary_digit(c) && c != '_') { reading = READING_DONE; } else if (c == 'b') { bin_beg = true; @@ -376,7 +378,7 @@ Error Expression::_get_token(Token &r_token) { } break; case READING_HEX: { - if (hex_beg && !is_hex_digit(c)) { + if (hex_beg && !is_hex_digit(c) && c != '_') { reading = READING_DONE; } else if (c == 'x') { hex_beg = true; @@ -387,6 +389,8 @@ Error Expression::_get_token(Token &r_token) { if (is_digit(c)) { } else if (c == 'e') { reading = READING_EXP; + } else if (c == '_') { + /*Ignore underscores*/ } else { reading = READING_DONE; } @@ -399,6 +403,8 @@ Error Expression::_get_token(Token &r_token) { } else if ((c == '-' || c == '+') && !exp_sign && !exp_beg) { exp_sign = true; + } else if (c == '_') { + /*Ignore underscores*/ } else { reading = READING_DONE; } @@ -408,7 +414,10 @@ Error Expression::_get_token(Token &r_token) { if (reading == READING_DONE) { break; } - num += String::chr(c); + + if (c != '_') { //Ignore underscores within a number + num += String::chr(c); + } c = GET_CHAR(); is_first_char = false; } @@ -439,7 +448,8 @@ Error Expression::_get_token(Token &r_token) { cchar = GET_CHAR(); } - str_ofs--; //go back one + if (str_ofs != expression.length()) + str_ofs--; //go back one if current identifier isn't at the end of the expression if (id == "in") { r_token.type = TK_OP_IN; @@ -472,6 +482,14 @@ Error Expression::_get_token(Token &r_token) { r_token.type = TK_OP_AND; } else if (id == "self") { r_token.type = TK_SELF; + } else if (id == "if") { + _set_error("Invalid identifier \"if\""); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } else if (id == "else") { + _set_error("Invalid identifier \"else\""); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; } else { for (int i = 0; i < Variant::VARIANT_MAX; i++) { if (id == Variant::get_type_name(Variant::Type(i))) { @@ -571,6 +589,8 @@ Expression::ENode *Expression::_parse_expression() { case TK_CURLY_BRACKET_OPEN: { //a dictionary DictionaryNode *dn = alloc_node(); + previous_expect_commas = expect_commas; + expect_commas = true; while (true) { int cofs = str_ofs; @@ -611,11 +631,13 @@ Expression::ENode *Expression::_parse_expression() { } expr = dn; + expect_commas = previous_expect_commas; } break; case TK_BRACKET_OPEN: { //an array - ArrayNode *an = alloc_node(); + previous_expect_commas = expect_commas; + expect_commas = true; while (true) { int cofs = str_ofs; @@ -643,6 +665,7 @@ Expression::ENode *Expression::_parse_expression() { } expr = an; + expect_commas = previous_expect_commas; } break; case TK_PARENTHESIS_OPEN: { //a suexpression @@ -670,6 +693,8 @@ Expression::ENode *Expression::_parse_expression() { func_call->method = identifier; SelfNode *self_node = alloc_node(); func_call->base = self_node; + previous_expect_commas = expect_commas; + expect_commas = true; while (true) { int cofs2 = str_ofs; @@ -698,6 +723,7 @@ Expression::ENode *Expression::_parse_expression() { } expr = func_call; + expect_commas = previous_expect_commas; } else { //named indexing str_ofs = cofs; @@ -739,8 +765,10 @@ Expression::ENode *Expression::_parse_expression() { } break; case TK_BASIC_TYPE: { //constructor.. - Variant::Type bt = Variant::Type(int(tk.value)); + previous_expect_commas = expect_commas; + expect_commas = true; + _get_token(tk); if (tk.type != TK_PARENTHESIS_OPEN) { _set_error("Expected '('"); @@ -777,12 +805,13 @@ Expression::ENode *Expression::_parse_expression() { } expr = constructor; - + expect_commas = previous_expect_commas; } break; case TK_BUILTIN_FUNC: { //builtin function - StringName func = tk.value; + previous_expect_commas = expect_commas; + expect_commas = true; _get_token(tk); if (tk.type != TK_PARENTHESIS_OPEN) { @@ -827,7 +856,7 @@ Expression::ENode *Expression::_parse_expression() { } expr = bifunc; - + expect_commas = previous_expect_commas; } break; case TK_OP_SUB: { ExpressionNode e; @@ -885,6 +914,9 @@ Expression::ENode *Expression::_parse_expression() { } break; case TK_PERIOD: { //named indexing or function call + previous_expect_commas = expect_commas; + expect_commas = true; + _get_token(tk); if (tk.type != TK_IDENTIFIER && tk.type != TK_BUILTIN_FUNC) { _set_error("Expected identifier after '.'"); @@ -928,6 +960,7 @@ Expression::ENode *Expression::_parse_expression() { } expr = func_call; + expect_commas = previous_expect_commas; } else { //named indexing str_ofs = cofs; @@ -1035,6 +1068,17 @@ Expression::ENode *Expression::_parse_expression() { case TK_OP_BIT_INVERT: op = Variant::OP_BIT_NEGATE; break; + case TK_IDENTIFIER: + case TK_CONSTANT: + // There's 2 consecutive constants/identifiers + _set_error("Expected operator."); + return nullptr; + case TK_COMMA: + if (!expect_commas) { + _set_error("Unexpected ','"); + return nullptr; + } + break; default: { } } @@ -1473,6 +1517,8 @@ Error Expression::parse(const String &p_expression, const Vector &p_inpu error_str = String(); error_set = false; str_ofs = 0; + expect_commas = false; + previous_expect_commas = false; input_names = p_input_names; expression = p_expression; diff --git a/core/math/expression.h b/core/math/expression.h index 46bc3618df0..970e68285c3 100644 --- a/core/math/expression.h +++ b/core/math/expression.h @@ -51,6 +51,8 @@ private: bool sequenced = false; int str_ofs = 0; + bool expect_commas = false; + bool previous_expect_commas = false; bool expression_dirty = false; bool _compile_expression(); diff --git a/tests/core/math/test_expression.h b/tests/core/math/test_expression.h index c3e42804919..a7216cad901 100644 --- a/tests/core/math/test_expression.h +++ b/tests/core/math/test_expression.h @@ -179,25 +179,49 @@ TEST_CASE("[Expression] Scientific notation") { Expression expression; CHECK_MESSAGE( - expression.parse("2.e5") == OK, - "The expression should parse successfully."); - CHECK_MESSAGE( - double(expression.execute()) == doctest::Approx(200'000), - "The expression should return the expected result."); - - // The middle "e" is ignored here. - CHECK_MESSAGE( - expression.parse("2e5") == OK, + expression.parse("2e+5") == OK, "The expression should parse successfully."); CHECK_MESSAGE( double(expression.execute()) == doctest::Approx(2e5), "The expression should return the expected result."); CHECK_MESSAGE( - expression.parse("2e.5") == OK, + expression.parse("2.e5") == OK, "The expression should parse successfully."); CHECK_MESSAGE( - double(expression.execute()) == doctest::Approx(2), + double(expression.execute()) == doctest::Approx(2e5), + "The expression should return the expected result."); + + CHECK_MESSAGE( + expression.parse("2.e-5") == OK, + "The expression should parse successfully."); + CHECK_MESSAGE( + double(expression.execute()) == doctest::Approx(2e-5), + "The expression should return the expected result."); +} + +TEST_CASE("[Expression] Other bases") { + Expression expression; + + CHECK_MESSAGE( + expression.parse("0b101") == OK, + "The expression should parse successfully."); + CHECK_MESSAGE( + int(expression.execute()) == 5, + "The expression should return the expected result."); + + CHECK_MESSAGE( + expression.parse("0x20") == OK, + "The expression should parse successfully."); + CHECK_MESSAGE( + int(expression.execute()) == 32, + "The expression should return the expected result."); + + CHECK_MESSAGE( + expression.parse("0x2a") == OK, + "The expression should parse successfully."); + CHECK_MESSAGE( + int(expression.execute()) == 42, "The expression should return the expected result."); } @@ -208,11 +232,36 @@ TEST_CASE("[Expression] Underscored numeric literals") { expression.parse("1_000_000") == OK, "The expression should parse successfully."); CHECK_MESSAGE( - expression.parse("1_000.000") == OK, + int(expression.execute()) == 1'000'000, + "The expression should return the expected result."); + + CHECK_MESSAGE( + expression.parse("1.000_1") == OK, "The expression should parse successfully."); CHECK_MESSAGE( - expression.parse("0xff_99_00") == OK, + double(expression.execute()) == doctest::Approx(1.0001), + "The expression should return the expected result."); + + CHECK_MESSAGE( + expression.parse("0b1001_0110") == OK, "The expression should parse successfully."); + CHECK_MESSAGE( + int(expression.execute()) == 0b10010110, + "The expression should return the expected result."); + + CHECK_MESSAGE( + expression.parse("0xff_00") == OK, + "The expression should parse successfully."); + CHECK_MESSAGE( + int(expression.execute()) == 0xff00, + "The expression should return the expected result."); + + CHECK_MESSAGE( + expression.parse("0.01e1_0") == OK, + "The expression should parse successfully."); + CHECK_MESSAGE( + double(expression.execute()) == doctest::Approx(0.01e10), + "The expression should return the expected result."); } TEST_CASE("[Expression] Built-in functions") { @@ -412,6 +461,29 @@ TEST_CASE("[Expression] Invalid expressions") { CHECK_MESSAGE( expression.parse("123\"456") == ERR_INVALID_PARAMETER, "The expression shouldn't parse successfully."); + + CHECK_MESSAGE( + expression.parse("$1.00") == ERR_INVALID_PARAMETER, + "The expression shouldn't parse successfully."); + + CHECK_MESSAGE( + expression.parse("???5") == ERR_INVALID_PARAMETER, + "The expression shouldn't parse successfully."); + + // Ternary operator is unsupported + CHECK_MESSAGE( + expression.parse("0 if true else 1") == ERR_INVALID_PARAMETER, + "The expression shouldn't parse successfully."); + + // Commas can't be used as a decimal point. + CHECK_MESSAGE( + expression.parse("123,456") == ERR_INVALID_PARAMETER, + "The expression shouldn't parse successfully."); + + // Spaces can't be used as a separator for large numbers. + CHECK_MESSAGE( + expression.parse("123 456") == ERR_INVALID_PARAMETER, + "The expression shouldn't parse successfully."); } TEST_CASE("[Expression] Unusual expressions") { @@ -421,6 +493,9 @@ TEST_CASE("[Expression] Unusual expressions") { CHECK_MESSAGE( expression.parse("(((((((((((((((666)))))))))))))))") == OK, "The expression should parse successfully."); + CHECK_MESSAGE( + int(expression.execute()) == 666, + "The expression should return the expected result."); // Using invalid identifiers doesn't cause a parse error. ERR_PRINT_OFF; @@ -432,31 +507,6 @@ TEST_CASE("[Expression] Unusual expressions") { "The expression should return the expected result."); ERR_PRINT_ON; - ERR_PRINT_OFF; - CHECK_MESSAGE( - expression.parse("$1.00 + ???5") == OK, - "The expression should parse successfully."); - CHECK_MESSAGE( - int(expression.execute()) == 0, - "The expression should return the expected result."); - ERR_PRINT_ON; - - // Commas can't be used as a decimal parameter. - CHECK_MESSAGE( - expression.parse("123,456") == OK, - "The expression should parse successfully."); - CHECK_MESSAGE( - int(expression.execute()) == 123, - "The expression should return the expected result."); - - // Spaces can't be used as a separator for large numbers. - CHECK_MESSAGE( - expression.parse("123 456") == OK, - "The expression should parse successfully."); - CHECK_MESSAGE( - int(expression.execute()) == 123, - "The expression should return the expected result."); - // Division by zero is accepted, even though it prints an error message normally. CHECK_MESSAGE( expression.parse("-25.4 / 0") == OK,