From 3f89287ddbe1625bb00f92415950db6096a79ce0 Mon Sep 17 00:00:00 2001 From: Haoyu Qiu Date: Tue, 18 May 2021 15:01:21 +0800 Subject: [PATCH] Fix XMLParser behavior for comments and premature endings (cherry picked from commit 549ad70760e96828dadc98ad47bd4788e14947f2) --- core/io/xml_parser.cpp | 97 ++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 33 deletions(-) diff --git a/core/io/xml_parser.cpp b/core/io/xml_parser.cpp index 4e63566e970..a4715a229c1 100644 --- a/core/io/xml_parser.cpp +++ b/core/io/xml_parser.cpp @@ -132,7 +132,7 @@ void XMLParser::_parse_closing_xml_element() { ++P; const char *pBeginClose = P; - while (*P != '>') { + while (*P && *P != '>') { ++P; } @@ -140,7 +140,10 @@ void XMLParser::_parse_closing_xml_element() { #ifdef DEBUG_XML print_line("XML CLOSE: " + node_name); #endif - ++P; + + if (*P) { + ++P; + } } void XMLParser::_ignore_definition() { @@ -148,11 +151,14 @@ void XMLParser::_ignore_definition() { char *F = P; // move until end marked with '>' reached - while (*P != '>') { + while (*P && *P != '>') { ++P; } node_name.parse_utf8(F, P - F); - ++P; + + if (*P) { + ++P; + } } bool XMLParser::_parse_cdata() { @@ -170,6 +176,7 @@ bool XMLParser::_parse_cdata() { } if (!*P) { + node_name = ""; return true; } @@ -188,10 +195,9 @@ bool XMLParser::_parse_cdata() { } if (cDataEnd) { - node_name = String::utf8(cDataBegin, (int)(cDataEnd - cDataBegin)); - } else { - node_name = ""; + cDataEnd = P; } + node_name = String::utf8(cDataBegin, (int)(cDataEnd - cDataBegin)); #ifdef DEBUG_XML print_line("XML CDATA: " + node_name); #endif @@ -203,24 +209,45 @@ void XMLParser::_parse_comment() { node_type = NODE_COMMENT; P += 1; - char *pCommentBegin = P; + char *pEndOfInput = data + length; + char *pCommentBegin; + char *pCommentEnd; - int count = 1; + if (P + 1 < pEndOfInput && P[0] == '-' && P[1] == '-') { + // Comment, use '-->' as end. + pCommentBegin = P + 2; + for (pCommentEnd = pCommentBegin; pCommentEnd + 2 < pEndOfInput; pCommentEnd++) { + if (pCommentEnd[0] == '-' && pCommentEnd[1] == '-' && pCommentEnd[2] == '>') { + break; + } + } + if (pCommentEnd + 2 < pEndOfInput) { + P = pCommentEnd + 3; + } else { + P = pCommentEnd = pEndOfInput; + } + } else { + // Like document type definition, match angle brackets. + pCommentBegin = P; - // move until end of comment reached - while (count) { - if (*P == '>') { - --count; - } else if (*P == '<') { - ++count; + int count = 1; + while (*P && count) { + if (*P == '>') { + --count; + } else if (*P == '<') { + ++count; + } + ++P; } - ++P; + if (count) { + pCommentEnd = P; + } else { + pCommentEnd = P - 1; + } } - P -= 3; - node_name = String::utf8(pCommentBegin + 2, (int)(P - pCommentBegin - 2)); - P += 3; + node_name = String::utf8(pCommentBegin, (int)(pCommentEnd - pCommentBegin)); #ifdef DEBUG_XML print_line("XML COMMENT: " + node_name); #endif @@ -235,14 +262,14 @@ void XMLParser::_parse_opening_xml_element() { const char *startName = P; // find end of element - while (*P != '>' && !_is_white_space(*P)) { + while (*P && *P != '>' && !_is_white_space(*P)) { ++P; } const char *endName = P; // find attributes - while (*P != '>') { + while (*P && *P != '>') { if (_is_white_space(*P)) { ++P; } else { @@ -252,10 +279,14 @@ void XMLParser::_parse_opening_xml_element() { // read the attribute names const char *attributeNameBegin = P; - while (!_is_white_space(*P) && *P != '=') { + while (*P && !_is_white_space(*P) && *P != '=') { ++P; } + if (!*P) { + break; + } + const char *attributeNameEnd = P; ++P; @@ -266,7 +297,7 @@ void XMLParser::_parse_opening_xml_element() { } if (!*P) { // malformatted xml file - return; + break; } const char attributeQuoteChar = *P; @@ -278,12 +309,10 @@ void XMLParser::_parse_opening_xml_element() { ++P; } - if (!*P) { // malformatted xml file - return; - } - const char *attributeValueEnd = P; - ++P; + if (*P) { + ++P; + } Attribute attr; attr.name = String::utf8(attributeNameBegin, @@ -315,7 +344,9 @@ void XMLParser::_parse_opening_xml_element() { print_line("XML OPEN: " + node_name); #endif - ++P; + if (*P) { + ++P; + } } void XMLParser::_parse_current_node() { @@ -327,10 +358,6 @@ void XMLParser::_parse_current_node() { ++P; } - if (!*P) { - return; - } - if (P - start > 0) { // we found some text, store it if (_set_text(start, P)) { @@ -338,6 +365,10 @@ void XMLParser::_parse_current_node() { } } + if (!*P) { + return; + } + ++P; // based on current token, parse and report next element