2023-01-05 13:25:55 +01:00
|
|
|
/**************************************************************************/
|
|
|
|
/* gdscript_tokenizer.h */
|
|
|
|
/**************************************************************************/
|
|
|
|
/* This file is part of: */
|
|
|
|
/* GODOT ENGINE */
|
|
|
|
/* https://godotengine.org */
|
|
|
|
/**************************************************************************/
|
|
|
|
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
|
|
|
|
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
|
|
|
|
/* */
|
|
|
|
/* Permission is hereby granted, free of charge, to any person obtaining */
|
|
|
|
/* a copy of this software and associated documentation files (the */
|
|
|
|
/* "Software"), to deal in the Software without restriction, including */
|
|
|
|
/* without limitation the rights to use, copy, modify, merge, publish, */
|
|
|
|
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
|
|
|
/* permit persons to whom the Software is furnished to do so, subject to */
|
|
|
|
/* the following conditions: */
|
|
|
|
/* */
|
|
|
|
/* The above copyright notice and this permission notice shall be */
|
|
|
|
/* included in all copies or substantial portions of the Software. */
|
|
|
|
/* */
|
|
|
|
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
|
|
|
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
|
|
|
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
|
|
|
|
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
|
|
|
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
|
|
|
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
|
|
|
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|
|
|
/**************************************************************************/
|
2018-01-05 00:50:27 +01:00
|
|
|
|
2017-11-16 18:38:18 +01:00
|
|
|
#ifndef GDSCRIPT_TOKENIZER_H
|
|
|
|
#define GDSCRIPT_TOKENIZER_H
|
2014-02-10 02:10:30 +01:00
|
|
|
|
2022-05-13 15:04:37 +02:00
|
|
|
#include "core/templates/hash_map.h"
|
2022-05-19 17:00:06 +02:00
|
|
|
#include "core/templates/hash_set.h"
|
2020-11-07 23:33:38 +01:00
|
|
|
#include "core/templates/list.h"
|
|
|
|
#include "core/templates/vector.h"
|
|
|
|
#include "core/variant/variant.h"
|
2014-02-25 13:31:47 +01:00
|
|
|
|
2023-11-17 20:44:38 +01:00
|
|
|
#ifdef MINGW_ENABLED
|
|
|
|
#undef CONST
|
|
|
|
#undef IN
|
|
|
|
#undef VOID
|
|
|
|
#endif
|
|
|
|
|
2017-11-16 18:38:18 +01:00
|
|
|
class GDScriptTokenizer {
|
2014-02-10 02:10:30 +01:00
|
|
|
public:
|
2020-07-06 17:24:24 +02:00
|
|
|
enum CursorPlace {
|
|
|
|
CURSOR_NONE,
|
|
|
|
CURSOR_BEGINNING,
|
|
|
|
CURSOR_MIDDLE,
|
|
|
|
CURSOR_END,
|
|
|
|
};
|
|
|
|
|
2020-05-02 00:14:56 +02:00
|
|
|
struct Token {
|
|
|
|
enum Type {
|
|
|
|
EMPTY,
|
|
|
|
// Basic
|
|
|
|
ANNOTATION,
|
|
|
|
IDENTIFIER,
|
|
|
|
LITERAL,
|
|
|
|
// Comparison
|
|
|
|
LESS,
|
|
|
|
LESS_EQUAL,
|
|
|
|
GREATER,
|
|
|
|
GREATER_EQUAL,
|
|
|
|
EQUAL_EQUAL,
|
|
|
|
BANG_EQUAL,
|
|
|
|
// Logical
|
|
|
|
AND,
|
|
|
|
OR,
|
|
|
|
NOT,
|
|
|
|
AMPERSAND_AMPERSAND,
|
|
|
|
PIPE_PIPE,
|
|
|
|
BANG,
|
|
|
|
// Bitwise
|
|
|
|
AMPERSAND,
|
|
|
|
PIPE,
|
|
|
|
TILDE,
|
|
|
|
CARET,
|
|
|
|
LESS_LESS,
|
|
|
|
GREATER_GREATER,
|
|
|
|
// Math
|
|
|
|
PLUS,
|
|
|
|
MINUS,
|
|
|
|
STAR,
|
2022-03-07 18:25:21 +01:00
|
|
|
STAR_STAR,
|
2020-05-02 00:14:56 +02:00
|
|
|
SLASH,
|
|
|
|
PERCENT,
|
|
|
|
// Assignment
|
|
|
|
EQUAL,
|
|
|
|
PLUS_EQUAL,
|
|
|
|
MINUS_EQUAL,
|
|
|
|
STAR_EQUAL,
|
2022-03-07 18:25:21 +01:00
|
|
|
STAR_STAR_EQUAL,
|
2020-05-02 00:14:56 +02:00
|
|
|
SLASH_EQUAL,
|
|
|
|
PERCENT_EQUAL,
|
|
|
|
LESS_LESS_EQUAL,
|
|
|
|
GREATER_GREATER_EQUAL,
|
|
|
|
AMPERSAND_EQUAL,
|
|
|
|
PIPE_EQUAL,
|
|
|
|
CARET_EQUAL,
|
|
|
|
// Control flow
|
|
|
|
IF,
|
|
|
|
ELIF,
|
|
|
|
ELSE,
|
|
|
|
FOR,
|
|
|
|
WHILE,
|
|
|
|
BREAK,
|
|
|
|
CONTINUE,
|
|
|
|
PASS,
|
|
|
|
RETURN,
|
|
|
|
MATCH,
|
2023-07-31 12:47:26 +02:00
|
|
|
WHEN,
|
2020-05-02 00:14:56 +02:00
|
|
|
// Keywords
|
|
|
|
AS,
|
|
|
|
ASSERT,
|
|
|
|
AWAIT,
|
|
|
|
BREAKPOINT,
|
|
|
|
CLASS,
|
|
|
|
CLASS_NAME,
|
|
|
|
CONST,
|
|
|
|
ENUM,
|
|
|
|
EXTENDS,
|
|
|
|
FUNC,
|
|
|
|
IN,
|
|
|
|
IS,
|
|
|
|
NAMESPACE,
|
|
|
|
PRELOAD,
|
|
|
|
SELF,
|
|
|
|
SIGNAL,
|
|
|
|
STATIC,
|
|
|
|
SUPER,
|
2020-07-16 03:02:44 +02:00
|
|
|
TRAIT,
|
2020-05-02 00:14:56 +02:00
|
|
|
VAR,
|
|
|
|
VOID,
|
|
|
|
YIELD,
|
|
|
|
// Punctuation
|
|
|
|
BRACKET_OPEN,
|
|
|
|
BRACKET_CLOSE,
|
|
|
|
BRACE_OPEN,
|
|
|
|
BRACE_CLOSE,
|
|
|
|
PARENTHESIS_OPEN,
|
|
|
|
PARENTHESIS_CLOSE,
|
|
|
|
COMMA,
|
|
|
|
SEMICOLON,
|
|
|
|
PERIOD,
|
|
|
|
PERIOD_PERIOD,
|
|
|
|
COLON,
|
|
|
|
DOLLAR,
|
|
|
|
FORWARD_ARROW,
|
|
|
|
UNDERSCORE,
|
|
|
|
// Whitespace
|
|
|
|
NEWLINE,
|
|
|
|
INDENT,
|
|
|
|
DEDENT,
|
|
|
|
// Constants
|
|
|
|
CONST_PI,
|
|
|
|
CONST_TAU,
|
|
|
|
CONST_INF,
|
|
|
|
CONST_NAN,
|
|
|
|
// Error message improvement
|
|
|
|
VCS_CONFLICT_MARKER,
|
|
|
|
BACKTICK,
|
|
|
|
QUESTION_MARK,
|
|
|
|
// Special
|
|
|
|
ERROR,
|
|
|
|
TK_EOF, // "EOF" is reserved
|
|
|
|
TK_MAX
|
|
|
|
};
|
2014-02-10 02:10:30 +01:00
|
|
|
|
2020-05-02 00:14:56 +02:00
|
|
|
Type type = EMPTY;
|
|
|
|
Variant literal;
|
|
|
|
int start_line = 0, end_line = 0, start_column = 0, end_column = 0;
|
|
|
|
int leftmost_column = 0, rightmost_column = 0; // Column span for multiline tokens.
|
2020-07-06 17:24:24 +02:00
|
|
|
int cursor_position = -1;
|
|
|
|
CursorPlace cursor_place = CURSOR_NONE;
|
|
|
|
String source;
|
2014-02-25 13:31:47 +01:00
|
|
|
|
2020-05-02 00:14:56 +02:00
|
|
|
const char *get_name() const;
|
2023-02-15 15:41:46 +01:00
|
|
|
bool can_precede_bin_op() const;
|
2020-08-18 01:14:46 +02:00
|
|
|
bool is_identifier() const;
|
2020-08-19 15:19:05 +02:00
|
|
|
bool is_node_name() const;
|
2024-01-22 15:31:55 +01:00
|
|
|
StringName get_identifier() const { return literal; }
|
2014-02-10 02:10:30 +01:00
|
|
|
|
2020-05-02 00:14:56 +02:00
|
|
|
Token(Type p_type) {
|
|
|
|
type = p_type;
|
|
|
|
}
|
2014-02-10 02:10:30 +01:00
|
|
|
|
2024-01-22 15:31:55 +01:00
|
|
|
Token() {}
|
2014-02-10 02:10:30 +01:00
|
|
|
};
|
|
|
|
|
2020-11-29 03:37:57 +01:00
|
|
|
#ifdef TOOLS_ENABLED
|
|
|
|
struct CommentData {
|
|
|
|
String comment;
|
2023-09-15 17:59:30 +02:00
|
|
|
// true: Comment starts at beginning of line or after indentation.
|
|
|
|
// false: Inline comment (starts after some code).
|
2020-11-29 03:37:57 +01:00
|
|
|
bool new_line = false;
|
|
|
|
CommentData() {}
|
|
|
|
CommentData(const String &p_comment, bool p_new_line) {
|
|
|
|
comment = p_comment;
|
|
|
|
new_line = p_new_line;
|
|
|
|
}
|
|
|
|
};
|
2024-01-22 15:31:55 +01:00
|
|
|
virtual const HashMap<int, CommentData> &get_comments() const = 0;
|
2020-11-29 03:37:57 +01:00
|
|
|
#endif // TOOLS_ENABLED
|
|
|
|
|
2024-01-22 15:31:55 +01:00
|
|
|
static String get_token_name(Token::Type p_token_type);
|
|
|
|
|
|
|
|
virtual int get_cursor_line() const = 0;
|
|
|
|
virtual int get_cursor_column() const = 0;
|
|
|
|
virtual void set_cursor_position(int p_line, int p_column) = 0;
|
|
|
|
virtual void set_multiline_mode(bool p_state) = 0;
|
|
|
|
virtual bool is_past_cursor() const = 0;
|
|
|
|
virtual void push_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.
|
|
|
|
virtual void pop_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.
|
|
|
|
virtual bool is_text() = 0;
|
|
|
|
|
|
|
|
virtual Token scan() = 0;
|
|
|
|
|
|
|
|
virtual ~GDScriptTokenizer() {}
|
|
|
|
};
|
|
|
|
|
|
|
|
class GDScriptTokenizerText : public GDScriptTokenizer {
|
2020-05-02 00:14:56 +02:00
|
|
|
String source;
|
2020-07-27 12:43:20 +02:00
|
|
|
const char32_t *_source = nullptr;
|
|
|
|
const char32_t *_current = nullptr;
|
2020-07-06 17:24:24 +02:00
|
|
|
int line = -1, column = -1;
|
|
|
|
int cursor_line = -1, cursor_column = -1;
|
2020-05-02 00:14:56 +02:00
|
|
|
int tab_size = 4;
|
|
|
|
|
|
|
|
// Keep track of multichar tokens.
|
2020-07-27 12:43:20 +02:00
|
|
|
const char32_t *_start = nullptr;
|
2020-05-02 00:14:56 +02:00
|
|
|
int start_line = 0, start_column = 0;
|
|
|
|
int leftmost_column = 0, rightmost_column = 0;
|
|
|
|
|
|
|
|
// Info cache.
|
|
|
|
bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.
|
|
|
|
bool multiline_mode = false;
|
|
|
|
List<Token> error_stack;
|
|
|
|
bool pending_newline = false;
|
2023-02-15 15:41:46 +01:00
|
|
|
Token last_token;
|
2020-05-02 00:14:56 +02:00
|
|
|
Token last_newline;
|
|
|
|
int pending_indents = 0;
|
|
|
|
List<int> indent_stack;
|
2021-03-25 14:36:29 +01:00
|
|
|
List<List<int>> indent_stack_stack; // For lambdas, which require manipulating the indentation point.
|
2020-07-27 12:43:20 +02:00
|
|
|
List<char32_t> paren_stack;
|
|
|
|
char32_t indent_char = '\0';
|
2020-05-02 00:14:56 +02:00
|
|
|
int position = 0;
|
|
|
|
int length = 0;
|
2024-01-22 15:31:55 +01:00
|
|
|
Vector<int> continuation_lines;
|
2023-01-19 02:56:00 +01:00
|
|
|
#ifdef DEBUG_ENABLED
|
|
|
|
Vector<String> keyword_list;
|
|
|
|
#endif // DEBUG_ENABLED
|
2020-05-02 00:14:56 +02:00
|
|
|
|
2020-11-29 03:37:57 +01:00
|
|
|
#ifdef TOOLS_ENABLED
|
2022-05-13 15:04:37 +02:00
|
|
|
HashMap<int, CommentData> comments;
|
2020-11-29 03:37:57 +01:00
|
|
|
#endif // TOOLS_ENABLED
|
|
|
|
|
2020-05-02 00:14:56 +02:00
|
|
|
_FORCE_INLINE_ bool _is_at_end() { return position >= length; }
|
2020-07-27 12:43:20 +02:00
|
|
|
_FORCE_INLINE_ char32_t _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }
|
2020-05-02 00:14:56 +02:00
|
|
|
int indent_level() const { return indent_stack.size(); }
|
2020-12-15 13:04:21 +01:00
|
|
|
bool has_error() const { return !error_stack.is_empty(); }
|
2020-05-02 00:14:56 +02:00
|
|
|
Token pop_error();
|
2020-07-27 12:43:20 +02:00
|
|
|
char32_t _advance();
|
2021-10-28 12:18:24 +02:00
|
|
|
String _get_indent_char_name(char32_t ch);
|
2020-05-02 00:14:56 +02:00
|
|
|
void _skip_whitespace();
|
|
|
|
void check_indent();
|
|
|
|
|
2023-01-19 02:56:00 +01:00
|
|
|
#ifdef DEBUG_ENABLED
|
|
|
|
void make_keyword_list();
|
|
|
|
#endif // DEBUG_ENABLED
|
|
|
|
|
2020-05-02 00:14:56 +02:00
|
|
|
Token make_error(const String &p_message);
|
|
|
|
void push_error(const String &p_message);
|
|
|
|
void push_error(const Token &p_error);
|
2020-07-27 12:43:20 +02:00
|
|
|
Token make_paren_error(char32_t p_paren);
|
2020-07-06 17:24:24 +02:00
|
|
|
Token make_token(Token::Type p_type);
|
|
|
|
Token make_literal(const Variant &p_literal);
|
|
|
|
Token make_identifier(const StringName &p_identifier);
|
2020-07-27 12:43:20 +02:00
|
|
|
Token check_vcs_marker(char32_t p_test, Token::Type p_double_type);
|
|
|
|
void push_paren(char32_t p_char);
|
|
|
|
bool pop_paren(char32_t p_expected);
|
2020-05-02 00:14:56 +02:00
|
|
|
|
|
|
|
void newline(bool p_make_token);
|
|
|
|
Token number();
|
|
|
|
Token potential_identifier();
|
|
|
|
Token string();
|
|
|
|
Token annotation();
|
2014-02-10 02:10:30 +01:00
|
|
|
|
2017-03-05 16:44:50 +01:00
|
|
|
public:
|
2020-05-02 00:14:56 +02:00
|
|
|
void set_source_code(const String &p_source_code);
|
2014-02-25 13:31:47 +01:00
|
|
|
|
2024-01-22 15:31:55 +01:00
|
|
|
const Vector<int> &get_continuation_lines() const { return continuation_lines; }
|
|
|
|
|
|
|
|
virtual int get_cursor_line() const override;
|
|
|
|
virtual int get_cursor_column() const override;
|
|
|
|
virtual void set_cursor_position(int p_line, int p_column) override;
|
|
|
|
virtual void set_multiline_mode(bool p_state) override;
|
|
|
|
virtual bool is_past_cursor() const override;
|
|
|
|
virtual void push_expression_indented_block() override; // For lambdas, or blocks inside expressions.
|
|
|
|
virtual void pop_expression_indented_block() override; // For lambdas, or blocks inside expressions.
|
|
|
|
virtual bool is_text() override { return true; }
|
|
|
|
|
|
|
|
#ifdef TOOLS_ENABLED
|
|
|
|
virtual const HashMap<int, CommentData> &get_comments() const override {
|
|
|
|
return comments;
|
|
|
|
}
|
|
|
|
#endif // TOOLS_ENABLED
|
|
|
|
|
|
|
|
virtual Token scan() override;
|
2014-02-25 13:31:47 +01:00
|
|
|
|
2024-01-22 15:31:55 +01:00
|
|
|
GDScriptTokenizerText();
|
2014-02-10 02:10:30 +01:00
|
|
|
};
|
|
|
|
|
2022-07-23 23:41:51 +02:00
|
|
|
#endif // GDSCRIPT_TOKENIZER_H
|