Add naturalcasecmp_to function to String

Functions as a complement to `naturalnocasecmp_to`
This commit is contained in:
Ninni Pipping 2023-05-04 20:32:45 +02:00
parent 8c729f0f34
commit 46a7018e3c
5 changed files with 143 additions and 59 deletions

View file

@ -812,15 +812,15 @@ signed char String::nocasecmp_to(const String &p_str) const {
const char32_t *this_str = get_data(); const char32_t *this_str = get_data();
while (true) { while (true) {
if (*that_str == 0 && *this_str == 0) { if (*that_str == 0 && *this_str == 0) { // If both strings are at the end, they are equal.
return 0; //we're equal return 0;
} else if (*this_str == 0) { } else if (*this_str == 0) { // If at the end of this, and not of other, we are less.
return -1; //if this is empty, and the other one is not, then we're less.. I think?
} else if (*that_str == 0) {
return 1; //otherwise the other one is smaller..
} else if (_find_upper(*this_str) < _find_upper(*that_str)) { //more than
return -1; return -1;
} else if (_find_upper(*this_str) > _find_upper(*that_str)) { //less than } else if (*that_str == 0) { // If at end of other, and not of this, we are greater.
return 1;
} else if (_find_upper(*this_str) < _find_upper(*that_str)) { // If current character in this is less, we are less.
return -1;
} else if (_find_upper(*this_str) > _find_upper(*that_str)) { // If current character in this is greater, we are greater.
return 1; return 1;
} }
@ -844,15 +844,15 @@ signed char String::casecmp_to(const String &p_str) const {
const char32_t *this_str = get_data(); const char32_t *this_str = get_data();
while (true) { while (true) {
if (*that_str == 0 && *this_str == 0) { if (*that_str == 0 && *this_str == 0) { // If both strings are at the end, they are equal.
return 0; //we're equal return 0;
} else if (*this_str == 0) { } else if (*this_str == 0) { // If at the end of this, and not of other, we are less.
return -1; //if this is empty, and the other one is not, then we're less.. I think?
} else if (*that_str == 0) {
return 1; //otherwise the other one is smaller..
} else if (*this_str < *that_str) { //more than
return -1; return -1;
} else if (*this_str > *that_str) { //less than } else if (*that_str == 0) { // If at end of other, and not of this, we are greater.
return 1;
} else if (*this_str < *that_str) { // If current character in this is less, we are less.
return -1;
} else if (*this_str > *that_str) { // If current character in this is greater, we are greater.
return 1; return 1;
} }
@ -861,6 +861,100 @@ signed char String::casecmp_to(const String &p_str) const {
} }
} }
static _FORCE_INLINE_ signed char natural_cmp_common(const char32_t *&r_this_str, const char32_t *&r_that_str) {
// Keep ptrs to start of numerical sequences.
const char32_t *this_substr = r_this_str;
const char32_t *that_substr = r_that_str;
// Compare lengths of both numerical sequences, ignoring leading zeros.
while (is_digit(*r_this_str)) {
r_this_str++;
}
while (is_digit(*r_that_str)) {
r_that_str++;
}
while (*this_substr == '0') {
this_substr++;
}
while (*that_substr == '0') {
that_substr++;
}
int this_len = r_this_str - this_substr;
int that_len = r_that_str - that_substr;
if (this_len < that_len) {
return -1;
} else if (this_len > that_len) {
return 1;
}
// If lengths equal, compare lexicographically.
while (this_substr != r_this_str && that_substr != r_that_str) {
if (*this_substr < *that_substr) {
return -1;
} else if (*this_substr > *that_substr) {
return 1;
}
this_substr++;
that_substr++;
}
return 0;
}
signed char String::naturalcasecmp_to(const String &p_str) const {
const char32_t *this_str = get_data();
const char32_t *that_str = p_str.get_data();
if (this_str && that_str) {
while (*this_str == '.' || *that_str == '.') {
if (*this_str++ != '.') {
return 1;
}
if (*that_str++ != '.') {
return -1;
}
if (!*that_str) {
return 1;
}
if (!*this_str) {
return -1;
}
}
while (*this_str) {
if (!*that_str) {
return 1;
} else if (is_digit(*this_str)) {
if (!is_digit(*that_str)) {
return -1;
}
signed char ret = natural_cmp_common(this_str, that_str);
if (ret) {
return ret;
}
} else if (is_digit(*that_str)) {
return 1;
} else {
if (*this_str < *that_str) { // If current character in this is less, we are less.
return -1;
} else if (*this_str > *that_str) { // If current character in this is greater, we are greater.
return 1;
}
this_str++;
that_str++;
}
}
if (*that_str) {
return -1;
}
}
return 0;
}
signed char String::naturalnocasecmp_to(const String &p_str) const { signed char String::naturalnocasecmp_to(const String &p_str) const {
const char32_t *this_str = get_data(); const char32_t *this_str = get_data();
const char32_t *that_str = p_str.get_data(); const char32_t *that_str = p_str.get_data();
@ -889,48 +983,16 @@ signed char String::naturalnocasecmp_to(const String &p_str) const {
return -1; return -1;
} }
// Keep ptrs to start of numerical sequences signed char ret = natural_cmp_common(this_str, that_str);
const char32_t *this_substr = this_str; if (ret) {
const char32_t *that_substr = that_str; return ret;
// Compare lengths of both numerical sequences, ignoring leading zeros
while (is_digit(*this_str)) {
this_str++;
}
while (is_digit(*that_str)) {
that_str++;
}
while (*this_substr == '0') {
this_substr++;
}
while (*that_substr == '0') {
that_substr++;
}
int this_len = this_str - this_substr;
int that_len = that_str - that_substr;
if (this_len < that_len) {
return -1;
} else if (this_len > that_len) {
return 1;
}
// If lengths equal, compare lexicographically
while (this_substr != this_str && that_substr != that_str) {
if (*this_substr < *that_substr) {
return -1;
} else if (*this_substr > *that_substr) {
return 1;
}
this_substr++;
that_substr++;
} }
} else if (is_digit(*that_str)) { } else if (is_digit(*that_str)) {
return 1; return 1;
} else { } else {
if (_find_upper(*this_str) < _find_upper(*that_str)) { //more than if (_find_upper(*this_str) < _find_upper(*that_str)) { // If current character in this is less, we are less.
return -1; return -1;
} else if (_find_upper(*this_str) > _find_upper(*that_str)) { //less than } else if (_find_upper(*this_str) > _find_upper(*that_str)) { // If current character in this is greater, we are greater.
return 1; return 1;
} }

View file

@ -262,6 +262,7 @@ public:
signed char casecmp_to(const String &p_str) const; signed char casecmp_to(const String &p_str) const;
signed char nocasecmp_to(const String &p_str) const; signed char nocasecmp_to(const String &p_str) const;
signed char naturalcasecmp_to(const String &p_str) const;
signed char naturalnocasecmp_to(const String &p_str) const; signed char naturalnocasecmp_to(const String &p_str) const;
const char32_t *get_data() const; const char32_t *get_data() const;

View file

@ -1633,6 +1633,7 @@ static void _register_variant_builtin_methods() {
bind_string_method(casecmp_to, sarray("to"), varray()); bind_string_method(casecmp_to, sarray("to"), varray());
bind_string_method(nocasecmp_to, sarray("to"), varray()); bind_string_method(nocasecmp_to, sarray("to"), varray());
bind_string_method(naturalcasecmp_to, sarray("to"), varray());
bind_string_method(naturalnocasecmp_to, sarray("to"), varray()); bind_string_method(naturalnocasecmp_to, sarray("to"), varray());
bind_string_method(length, sarray(), varray()); bind_string_method(length, sarray(), varray());
bind_string_method(substr, sarray("from", "len"), varray(-1)); bind_string_method(substr, sarray("from", "len"), varray(-1));

View file

@ -111,7 +111,7 @@
<description> <description>
Performs a case-sensitive comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" and "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order. Performs a case-sensitive comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" and "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order.
With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code]. With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code].
To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method nocasecmp_to] and [method naturalnocasecmp_to]. To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method nocasecmp_to], [method naturalcasecmp_to], and [method naturalnocasecmp_to].
</description> </description>
</method> </method>
<method name="chr" qualifiers="static"> <method name="chr" qualifiers="static">
@ -570,6 +570,16 @@
Returns the [url=https://en.wikipedia.org/wiki/MD5]MD5 hash[/url] of the string as another [String]. Returns the [url=https://en.wikipedia.org/wiki/MD5]MD5 hash[/url] of the string as another [String].
</description> </description>
</method> </method>
<method name="naturalcasecmp_to" qualifiers="const">
<return type="int" />
<param index="0" name="to" type="String" />
<description>
Performs a [b]case-sensitive[/b], [i]natural order[/i] comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" or "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order.
When used for sorting, natural order comparison orders sequences of numbers by the combined value of each digit as is often expected, instead of the single digit's value. A sorted sequence of numbered strings will be [code]["1", "2", "3", ...][/code], not [code]["1", "10", "2", "3", ...][/code].
With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code].
To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method naturalnocasecmp_to], [method nocasecmp_to], and [method casecmp_to].
</description>
</method>
<method name="naturalnocasecmp_to" qualifiers="const"> <method name="naturalnocasecmp_to" qualifiers="const">
<return type="int" /> <return type="int" />
<param index="0" name="to" type="String" /> <param index="0" name="to" type="String" />
@ -577,7 +587,7 @@
Performs a [b]case-insensitive[/b], [i]natural order[/i] comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" or "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order. Internally, lowercase characters are converted to uppercase for the comparison. Performs a [b]case-insensitive[/b], [i]natural order[/i] comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" or "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order. Internally, lowercase characters are converted to uppercase for the comparison.
When used for sorting, natural order comparison orders sequences of numbers by the combined value of each digit as is often expected, instead of the single digit's value. A sorted sequence of numbered strings will be [code]["1", "2", "3", ...][/code], not [code]["1", "10", "2", "3", ...][/code]. When used for sorting, natural order comparison orders sequences of numbers by the combined value of each digit as is often expected, instead of the single digit's value. A sorted sequence of numbered strings will be [code]["1", "2", "3", ...][/code], not [code]["1", "10", "2", "3", ...][/code].
With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code]. With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code].
To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method nocasecmp_to] and [method casecmp_to]. To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method naturalcasecmp_to], [method nocasecmp_to], and [method casecmp_to].
</description> </description>
</method> </method>
<method name="nocasecmp_to" qualifiers="const"> <method name="nocasecmp_to" qualifiers="const">
@ -586,7 +596,7 @@
<description> <description>
Performs a [b]case-insensitive[/b] comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" or "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order. Internally, lowercase characters are converted to uppercase for the comparison. Performs a [b]case-insensitive[/b] comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" or "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order. Internally, lowercase characters are converted to uppercase for the comparison.
With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code]. With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code].
To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method casecmp_to] and [method naturalnocasecmp_to]. To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method casecmp_to], [method naturalcasecmp_to], and [method naturalnocasecmp_to].
</description> </description>
</method> </method>
<method name="num" qualifiers="static"> <method name="num" qualifiers="static">

View file

@ -105,7 +105,7 @@
<description> <description>
Performs a case-sensitive comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" and "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order. Performs a case-sensitive comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" and "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order.
With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code]. With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code].
To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method nocasecmp_to] and [method naturalnocasecmp_to]. To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method nocasecmp_to], [method naturalcasecmp_to], and [method naturalnocasecmp_to].
</description> </description>
</method> </method>
<method name="contains" qualifiers="const"> <method name="contains" qualifiers="const">
@ -545,6 +545,16 @@
Returns the [url=https://en.wikipedia.org/wiki/MD5]MD5 hash[/url] of the string as another [String]. Returns the [url=https://en.wikipedia.org/wiki/MD5]MD5 hash[/url] of the string as another [String].
</description> </description>
</method> </method>
<method name="naturalcasecmp_to" qualifiers="const">
<return type="int" />
<param index="0" name="to" type="String" />
<description>
Performs a [b]case-sensitive[/b], [i]natural order[/i] comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" or "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order.
When used for sorting, natural order comparison orders sequences of numbers by the combined value of each digit as is often expected, instead of the single digit's value. A sorted sequence of numbered strings will be [code]["1", "2", "3", ...][/code], not [code]["1", "10", "2", "3", ...][/code].
With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code].
To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method naturalnocasecmp_to], [method nocasecmp_to], and [method casecmp_to].
</description>
</method>
<method name="naturalnocasecmp_to" qualifiers="const"> <method name="naturalnocasecmp_to" qualifiers="const">
<return type="int" /> <return type="int" />
<param index="0" name="to" type="String" /> <param index="0" name="to" type="String" />
@ -552,7 +562,7 @@
Performs a [b]case-insensitive[/b], [i]natural order[/i] comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" or "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order. Internally, lowercase characters are converted to uppercase for the comparison. Performs a [b]case-insensitive[/b], [i]natural order[/i] comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" or "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order. Internally, lowercase characters are converted to uppercase for the comparison.
When used for sorting, natural order comparison orders sequences of numbers by the combined value of each digit as is often expected, instead of the single digit's value. A sorted sequence of numbered strings will be [code]["1", "2", "3", ...][/code], not [code]["1", "10", "2", "3", ...][/code]. When used for sorting, natural order comparison orders sequences of numbers by the combined value of each digit as is often expected, instead of the single digit's value. A sorted sequence of numbered strings will be [code]["1", "2", "3", ...][/code], not [code]["1", "10", "2", "3", ...][/code].
With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code]. With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code].
To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method nocasecmp_to] and [method casecmp_to]. To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method naturalcasecmp_to], [method nocasecmp_to], and [method casecmp_to].
</description> </description>
</method> </method>
<method name="nocasecmp_to" qualifiers="const"> <method name="nocasecmp_to" qualifiers="const">
@ -561,7 +571,7 @@
<description> <description>
Performs a [b]case-insensitive[/b] comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" or "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order. Internally, lowercase characters are converted to uppercase for the comparison. Performs a [b]case-insensitive[/b] comparison to another string. Returns [code]-1[/code] if less than, [code]1[/code] if greater than, or [code]0[/code] if equal. "Less than" or "greater than" are determined by the [url=https://en.wikipedia.org/wiki/List_of_Unicode_characters]Unicode code points[/url] of each string, which roughly matches the alphabetical order. Internally, lowercase characters are converted to uppercase for the comparison.
With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code]. With different string lengths, returns [code]1[/code] if this string is longer than the [param to] string, or [code]-1[/code] if shorter. Note that the length of empty strings is [i]always[/i] [code]0[/code].
To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method casecmp_to] and [method naturalnocasecmp_to]. To get a [bool] result from a string comparison, use the [code]==[/code] operator instead. See also [method casecmp_to], [method naturalcasecmp_to], and [method naturalnocasecmp_to].
</description> </description>
</method> </method>
<method name="pad_decimals" qualifiers="const"> <method name="pad_decimals" qualifiers="const">