Add support for numeric XML entities to XMLParser
* Add support for decimal numeric entities to String::xml_unescape * Add more error checks to String::xml_unescape * Refactor XMLParser to use String::xml_unescape instead of an internal implementation
This commit is contained in:
parent
ed28ce53bf
commit
a28beb3048
7 changed files with 255 additions and 92 deletions
|
@ -36,63 +36,6 @@
|
|||
|
||||
VARIANT_ENUM_CAST(XMLParser::NodeType);
|
||||
|
||||
static bool _equalsn(const CharType *str1, const CharType *str2, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len && str1[i] && str2[i]; ++i) {
|
||||
if (str1[i] != str2[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// if one (or both) of the strings was smaller then they
|
||||
// are only equal if they have the same length
|
||||
return (i == len) || (str1[i] == 0 && str2[i] == 0);
|
||||
}
|
||||
|
||||
String XMLParser::_replace_special_characters(const String &origstr) {
|
||||
int pos = origstr.find("&");
|
||||
int oldPos = 0;
|
||||
|
||||
if (pos == -1) {
|
||||
return origstr;
|
||||
}
|
||||
|
||||
String newstr;
|
||||
|
||||
while (pos != -1 && pos < origstr.length() - 2) {
|
||||
// check if it is one of the special characters
|
||||
|
||||
int specialChar = -1;
|
||||
for (int i = 0; i < (int)special_characters.size(); ++i) {
|
||||
const CharType *p = &origstr[pos] + 1;
|
||||
|
||||
if (_equalsn(&special_characters[i][1], p, special_characters[i].length() - 1)) {
|
||||
specialChar = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (specialChar != -1) {
|
||||
newstr += (origstr.substr(oldPos, pos - oldPos));
|
||||
newstr += (special_characters[specialChar][0]);
|
||||
pos += special_characters[specialChar].length();
|
||||
} else {
|
||||
newstr += (origstr.substr(oldPos, pos - oldPos + 1));
|
||||
pos += 1;
|
||||
}
|
||||
|
||||
// find next &
|
||||
oldPos = pos;
|
||||
pos = origstr.find("&", pos);
|
||||
}
|
||||
|
||||
if (oldPos < origstr.length() - 1) {
|
||||
newstr += (origstr.substr(oldPos, origstr.length() - oldPos));
|
||||
}
|
||||
|
||||
return newstr;
|
||||
}
|
||||
|
||||
static inline bool _is_white_space(char c) {
|
||||
return (c == ' ' || c == '\t' || c == '\n' || c == '\r');
|
||||
}
|
||||
|
@ -116,7 +59,7 @@ bool XMLParser::_set_text(char *start, char *end) {
|
|||
|
||||
// set current text to the parsed text, and replace xml special characters
|
||||
String s = String::utf8(start, (int)(end - start));
|
||||
node_name = _replace_special_characters(s);
|
||||
node_name = s.xml_unescape();
|
||||
|
||||
// current XML node type is text
|
||||
node_type = NODE_TEXT;
|
||||
|
@ -321,7 +264,7 @@ void XMLParser::_parse_opening_xml_element() {
|
|||
String s = String::utf8(attributeValueBegin,
|
||||
(int)(attributeValueEnd - attributeValueBegin));
|
||||
|
||||
attr.value = _replace_special_characters(s);
|
||||
attr.value = s.xml_unescape();
|
||||
attributes.push_back(attr);
|
||||
} else {
|
||||
// tag is closed directly
|
||||
|
@ -579,14 +522,8 @@ int XMLParser::get_current_line() const {
|
|||
}
|
||||
|
||||
XMLParser::XMLParser() {
|
||||
data = nullptr;
|
||||
close();
|
||||
special_characters.push_back("&");
|
||||
special_characters.push_back("<lt;");
|
||||
special_characters.push_back(">gt;");
|
||||
special_characters.push_back("\"quot;");
|
||||
special_characters.push_back("'apos;");
|
||||
}
|
||||
|
||||
XMLParser::~XMLParser() {
|
||||
if (data) {
|
||||
memdelete_arr(data);
|
||||
|
|
|
@ -65,15 +65,13 @@ public:
|
|||
};
|
||||
|
||||
private:
|
||||
char *data;
|
||||
char *P;
|
||||
uint64_t length;
|
||||
void unescape(String &p_str);
|
||||
Vector<String> special_characters;
|
||||
char *data = nullptr;
|
||||
char *P = nullptr;
|
||||
uint64_t length = 0;
|
||||
String node_name;
|
||||
bool node_empty;
|
||||
NodeType node_type;
|
||||
uint64_t node_offset;
|
||||
bool node_empty = false;
|
||||
NodeType node_type = NODE_NONE;
|
||||
uint64_t node_offset = 0;
|
||||
|
||||
struct Attribute {
|
||||
String name;
|
||||
|
@ -82,7 +80,6 @@ private:
|
|||
|
||||
Vector<Attribute> attributes;
|
||||
|
||||
String _replace_special_characters(const String &origstr);
|
||||
bool _set_text(char *start, char *end);
|
||||
void _parse_closing_xml_element();
|
||||
void _ignore_definition();
|
||||
|
|
|
@ -3549,29 +3549,58 @@ static _FORCE_INLINE_ int _xml_unescape(const CharType *p_src, int p_src_len, Ch
|
|||
|
||||
if (p_src_len >= 4 && p_src[1] == '#') {
|
||||
CharType c = 0;
|
||||
|
||||
for (int i = 2; i < p_src_len; i++) {
|
||||
eat = i + 1;
|
||||
CharType ct = p_src[i];
|
||||
if (ct == ';') {
|
||||
break;
|
||||
} else if (ct >= '0' && ct <= '9') {
|
||||
ct = ct - '0';
|
||||
} else if (ct >= 'a' && ct <= 'f') {
|
||||
ct = (ct - 'a') + 10;
|
||||
} else if (ct >= 'A' && ct <= 'F') {
|
||||
ct = (ct - 'A') + 10;
|
||||
} else {
|
||||
continue;
|
||||
bool overflow = false;
|
||||
if (p_src[2] == 'x') {
|
||||
// Hex entity &#x<num>;
|
||||
for (int i = 3; i < p_src_len; i++) {
|
||||
eat = i + 1;
|
||||
CharType ct = p_src[i];
|
||||
if (ct == ';') {
|
||||
break;
|
||||
} else if (ct >= '0' && ct <= '9') {
|
||||
ct = ct - '0';
|
||||
} else if (ct >= 'a' && ct <= 'f') {
|
||||
ct = (ct - 'a') + 10;
|
||||
} else if (ct >= 'A' && ct <= 'F') {
|
||||
ct = (ct - 'A') + 10;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
if (c > (WCHAR_MAX >> 4)) {
|
||||
overflow = true;
|
||||
break;
|
||||
}
|
||||
c <<= 4;
|
||||
c |= ct;
|
||||
}
|
||||
} else {
|
||||
// Decimal entity &#<num>;
|
||||
for (int i = 2; i < p_src_len; i++) {
|
||||
eat = i + 1;
|
||||
CharType ct = p_src[i];
|
||||
if (ct == ';' || ct < '0' || ct > '9') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (p_src[eat - 1] == ';') {
|
||||
int64_t val = String::to_int(p_src + 2, eat - 3);
|
||||
if (val > 0 && val <= WCHAR_MAX) {
|
||||
c = (CharType)val;
|
||||
} else {
|
||||
overflow = true;
|
||||
}
|
||||
}
|
||||
c <<= 4;
|
||||
c |= ct;
|
||||
}
|
||||
|
||||
// Value must be non-zero, in the range of char32_t,
|
||||
// actually end with ';'. If invalid, leave the entity as-is
|
||||
if (c == '\0' || overflow || p_src[eat - 1] != ';') {
|
||||
eat = 1;
|
||||
c = *p_src;
|
||||
}
|
||||
if (p_dst) {
|
||||
*p_dst = c;
|
||||
}
|
||||
|
||||
} else if (p_src_len >= 4 && p_src[1] == 'g' && p_src[2] == 't' && p_src[3] == ';') {
|
||||
if (p_dst) {
|
||||
*p_dst = '>';
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
#include "test_render.h"
|
||||
#include "test_shader_lang.h"
|
||||
#include "test_string.h"
|
||||
#include "test_xml_parser.h"
|
||||
|
||||
const char **tests_get_names() {
|
||||
static const char *test_names[] = {
|
||||
|
@ -65,6 +66,7 @@ const char **tests_get_names() {
|
|||
"gd_bytecode",
|
||||
"ordered_hash_map",
|
||||
"astar",
|
||||
"xml_parser",
|
||||
nullptr
|
||||
};
|
||||
|
||||
|
@ -138,6 +140,10 @@ MainLoop *test_main(String p_test, const List<String> &p_args) {
|
|||
return TestAStar::test();
|
||||
}
|
||||
|
||||
if (p_test == "xml_parser") {
|
||||
return TestXMLParser::test();
|
||||
}
|
||||
|
||||
print_line("Unknown test: " + p_test);
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -1123,6 +1123,45 @@ bool test_35() {
|
|||
return state;
|
||||
}
|
||||
|
||||
bool test_36() {
|
||||
#define CHECK(X) \
|
||||
if (!(X)) { \
|
||||
OS::get_singleton()->print("\tFAIL at %s\n", #X); \
|
||||
return false; \
|
||||
} else { \
|
||||
OS::get_singleton()->print("\tPASS\n"); \
|
||||
}
|
||||
OS::get_singleton()->print("\n\nTest 36: xml unescape\n");
|
||||
// Named entities
|
||||
String input = ""&'<>";
|
||||
CHECK(input.xml_unescape() == "\"&\'<>");
|
||||
|
||||
// Numeric entities
|
||||
input = "AB";
|
||||
CHECK(input.xml_unescape() == "AB");
|
||||
|
||||
input = "�&x#0;More text";
|
||||
String result = input.xml_unescape();
|
||||
// Didn't put in a leading NUL and terminate the string
|
||||
CHECK(input.length() > 0);
|
||||
CHECK(input[0] != '\0');
|
||||
// Entity should be left as-is if invalid
|
||||
CHECK(input.xml_unescape() == input);
|
||||
|
||||
// Shouldn't consume without ending in a ';'
|
||||
input = "B";
|
||||
CHECK(input.xml_unescape() == input);
|
||||
input = "A";
|
||||
CHECK(input.xml_unescape() == input);
|
||||
|
||||
// Invalid characters should make the entity ignored
|
||||
input = "ASomeIrrelevantText;";
|
||||
CHECK(input.xml_unescape() == input);
|
||||
input = "BSomeIrrelevantText;";
|
||||
CHECK(input.xml_unescape() == input);
|
||||
return true;
|
||||
}
|
||||
|
||||
typedef bool (*TestFunc)();
|
||||
|
||||
TestFunc test_funcs[] = {
|
||||
|
@ -1162,6 +1201,7 @@ TestFunc test_funcs[] = {
|
|||
test_33,
|
||||
test_34,
|
||||
test_35,
|
||||
test_36,
|
||||
nullptr
|
||||
|
||||
};
|
||||
|
|
109
main/tests/test_xml_parser.cpp
Normal file
109
main/tests/test_xml_parser.cpp
Normal file
|
@ -0,0 +1,109 @@
|
|||
/*************************************************************************/
|
||||
/* test_xml_parser.cpp */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#include "test_xml_parser.h"
|
||||
|
||||
#include "core/os/os.h"
|
||||
|
||||
namespace TestXMLParser {
|
||||
#define CHECK(X) \
|
||||
if (!(X)) { \
|
||||
OS::get_singleton()->print("\tFAIL at %s\n", #X); \
|
||||
return false; \
|
||||
} else { \
|
||||
OS::get_singleton()->print("\tPASS\n"); \
|
||||
}
|
||||
bool test_1() {
|
||||
String source = "<?xml version = \"1.0\" encoding=\"UTF-8\" ?>\
|
||||
<top attr=\"attr value\">\
|
||||
Text<AB>\
|
||||
</top>";
|
||||
Vector<uint8_t> buff;
|
||||
for (int i = 0; i < source.length(); i++) {
|
||||
buff.push_back(source.get(i));
|
||||
}
|
||||
XMLParser parser;
|
||||
parser.open_buffer(buff);
|
||||
|
||||
// <?xml ...?> gets parsed as NODE_UNKNOWN
|
||||
CHECK(parser.read() == OK);
|
||||
CHECK(parser.get_node_type() == XMLParser::NodeType::NODE_UNKNOWN);
|
||||
|
||||
CHECK(parser.read() == OK);
|
||||
CHECK(parser.get_node_type() == XMLParser::NodeType::NODE_ELEMENT);
|
||||
CHECK(parser.get_node_name() == "top");
|
||||
CHECK(parser.has_attribute("attr"));
|
||||
CHECK(parser.get_attribute_value("attr") == "attr value");
|
||||
|
||||
CHECK(parser.read() == OK);
|
||||
CHECK(parser.get_node_type() == XMLParser::NodeType::NODE_TEXT);
|
||||
CHECK(parser.get_node_data().lstrip(" \t") == "Text<AB>");
|
||||
|
||||
CHECK(parser.read() == OK);
|
||||
CHECK(parser.get_node_type() == XMLParser::NodeType::NODE_ELEMENT_END);
|
||||
CHECK(parser.get_node_name() == "top");
|
||||
|
||||
parser.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
typedef bool (*TestFunc)();
|
||||
TestFunc test_funcs[] = {
|
||||
test_1,
|
||||
nullptr
|
||||
};
|
||||
|
||||
MainLoop *test() {
|
||||
int count = 0;
|
||||
int passed = 0;
|
||||
|
||||
while (true) {
|
||||
if (!test_funcs[count]) {
|
||||
break;
|
||||
}
|
||||
bool pass = test_funcs[count]();
|
||||
if (pass) {
|
||||
passed++;
|
||||
}
|
||||
OS::get_singleton()->print("\t%s\n", pass ? "PASS" : "FAILED");
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
OS::get_singleton()->print("\n\n\n");
|
||||
OS::get_singleton()->print("*************\n");
|
||||
OS::get_singleton()->print("***TOTALS!***\n");
|
||||
OS::get_singleton()->print("*************\n");
|
||||
|
||||
OS::get_singleton()->print("Passed %i of %i tests\n", passed, count);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace TestXMLParser
|
45
main/tests/test_xml_parser.h
Normal file
45
main/tests/test_xml_parser.h
Normal file
|
@ -0,0 +1,45 @@
|
|||
/*************************************************************************/
|
||||
/* test_xml_parser.h */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef TEST_XML_PARSER_H
|
||||
#define TEST_XML_PARSER_H
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "core/io/xml_parser.h"
|
||||
#include "core/os/main_loop.h"
|
||||
#include "core/ustring.h"
|
||||
#include "core/vector.h"
|
||||
|
||||
namespace TestXMLParser {
|
||||
|
||||
MainLoop *test();
|
||||
}
|
||||
#endif // TEST_XML_PARSER_H
|
Loading…
Reference in a new issue