Fix String::word_wrap() for long words
- Changes `TextServer.string_get_word_breaks()` - Returns pairs of boundary start and end offsets - Accepts `chars_per_line` to return line breaks - Removes `String::word_wrap()` Co-authored-by: bruvzg <7645683+bruvzg@users.noreply.github.com>
This commit is contained in:
parent
f18f2740da
commit
207e52c161
16 changed files with 203 additions and 104 deletions
|
@ -220,37 +220,6 @@ void CharString::copy_from(const char *p_cstr) {
|
|||
/* String */
|
||||
/*************************************************************************/
|
||||
|
||||
//kind of poor should be rewritten properly
|
||||
String String::word_wrap(int p_chars_per_line) const {
|
||||
int from = 0;
|
||||
int last_space = 0;
|
||||
String ret;
|
||||
for (int i = 0; i < length(); i++) {
|
||||
if (i - from >= p_chars_per_line) {
|
||||
if (last_space == -1) {
|
||||
ret += substr(from, i - from + 1) + "\n";
|
||||
} else {
|
||||
ret += substr(from, last_space - from) + "\n";
|
||||
i = last_space; //rewind
|
||||
}
|
||||
from = i + 1;
|
||||
last_space = -1;
|
||||
} else if (operator[](i) == ' ' || operator[](i) == '\t') {
|
||||
last_space = i;
|
||||
} else if (operator[](i) == '\n') {
|
||||
ret += substr(from, i - from) + "\n";
|
||||
from = i + 1;
|
||||
last_space = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (from < length()) {
|
||||
ret += substr(from, length());
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const {
|
||||
// Splits the URL into scheme, host, port, path. Strip credentials when present.
|
||||
String base = *this;
|
||||
|
|
|
@ -425,7 +425,6 @@ public:
|
|||
String c_escape_multiline() const;
|
||||
String c_unescape() const;
|
||||
String json_escape() const;
|
||||
String word_wrap(int p_chars_per_line) const;
|
||||
Error parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const;
|
||||
|
||||
String property_name_encode() const;
|
||||
|
|
|
@ -1548,8 +1548,15 @@
|
|||
<return type="PackedInt32Array" />
|
||||
<param index="0" name="string" type="String" />
|
||||
<param index="1" name="language" type="String" default="""" />
|
||||
<param index="2" name="chars_per_line" type="int" default="0" />
|
||||
<description>
|
||||
Returns array of the word break character offsets.
|
||||
Returns an array of the word break boundaries. Elements in the returned array are the offsets of the start and end of words. Therefore the length of the array is always even.
|
||||
When [param chars_per_line] is greater than zero, line break boundaries are returned instead.
|
||||
[codeblock]
|
||||
var ts = TextServerManager.get_primary_interface()
|
||||
print(ts.string_get_word_breaks("Godot Engine")) # Prints [0, 5, 6, 12]
|
||||
print(ts.string_get_word_breaks("Godot Engine", "en", 5)) # Prints [0, 5, 6, 11, 11, 12]
|
||||
[/codeblock]
|
||||
</description>
|
||||
</method>
|
||||
<method name="string_to_lower" qualifiers="const">
|
||||
|
|
|
@ -1346,6 +1346,7 @@
|
|||
<return type="PackedInt32Array" />
|
||||
<param index="0" name="string" type="String" />
|
||||
<param index="1" name="language" type="String" />
|
||||
<param index="2" name="chars_per_line" type="int" />
|
||||
<description>
|
||||
</description>
|
||||
</method>
|
||||
|
|
|
@ -751,7 +751,16 @@ void ScriptEditorDebugger::_set_reason_text(const String &p_reason, MessageType
|
|||
reason->add_theme_color_override("font_color", get_theme_color(SNAME("success_color"), SNAME("Editor")));
|
||||
}
|
||||
reason->set_text(p_reason);
|
||||
reason->set_tooltip_text(p_reason.word_wrap(80));
|
||||
|
||||
const PackedInt32Array boundaries = TS->string_get_word_breaks(p_reason, "", 80);
|
||||
PackedStringArray lines;
|
||||
for (int i = 0; i < boundaries.size(); i += 2) {
|
||||
const int start = boundaries[i];
|
||||
const int end = boundaries[i + 1];
|
||||
lines.append(p_reason.substr(start, end - start + 1));
|
||||
}
|
||||
|
||||
reason->set_tooltip_text(String("\n").join(lines));
|
||||
}
|
||||
|
||||
void ScriptEditorDebugger::_notification(int p_what) {
|
||||
|
|
|
@ -132,8 +132,16 @@ void SceneTreeEditor::_cell_button_pressed(Object *p_item, int p_column, int p_i
|
|||
if (config_err.is_empty()) {
|
||||
return;
|
||||
}
|
||||
config_err = config_err.word_wrap(80);
|
||||
warning->set_text(config_err);
|
||||
|
||||
const PackedInt32Array boundaries = TS->string_get_word_breaks(config_err, "", 80);
|
||||
PackedStringArray lines;
|
||||
for (int i = 0; i < boundaries.size(); i += 2) {
|
||||
const int start = boundaries[i];
|
||||
const int end = boundaries[i + 1];
|
||||
lines.append(config_err.substr(start, end - start + 1));
|
||||
}
|
||||
|
||||
warning->set_text(String("\n").join(lines));
|
||||
warning->popup_centered();
|
||||
|
||||
} else if (p_id == BUTTON_SIGNALS) {
|
||||
|
|
|
@ -6246,7 +6246,7 @@ String TextServerAdvanced::_string_to_lower(const String &p_string, const String
|
|||
return String::utf16(lower.ptr(), len);
|
||||
}
|
||||
|
||||
PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language) const {
|
||||
PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const {
|
||||
const String lang = (p_language.is_empty()) ? TranslationServer::get_singleton()->get_tool_locale() : p_language;
|
||||
// Convert to UTF-16.
|
||||
Char16String utf16 = p_string.utf16();
|
||||
|
@ -6254,15 +6254,7 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str
|
|||
HashSet<int> breaks;
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
UBreakIterator *bi = ubrk_open(UBRK_LINE, lang.ascii().get_data(), (const UChar *)utf16.get_data(), utf16.length(), &err);
|
||||
if (U_FAILURE(err)) {
|
||||
// No data loaded - use fallback.
|
||||
for (int i = 0; i < p_string.length(); i++) {
|
||||
char32_t c = p_string[i];
|
||||
if (is_whitespace(c) || is_linebreak(c)) {
|
||||
breaks.insert(i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (U_SUCCESS(err)) {
|
||||
while (ubrk_next(bi) != UBRK_DONE) {
|
||||
int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1;
|
||||
if (pos != p_string.length() - 1) {
|
||||
|
@ -6273,24 +6265,80 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str
|
|||
ubrk_close(bi);
|
||||
|
||||
PackedInt32Array ret;
|
||||
|
||||
int line_start = 0;
|
||||
int line_end = 0; // End of last word on current line.
|
||||
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
|
||||
int word_length = 0;
|
||||
|
||||
for (int i = 0; i < p_string.length(); i++) {
|
||||
char32_t c = p_string[i];
|
||||
if (c == 0xfffc) {
|
||||
continue;
|
||||
}
|
||||
if (u_ispunct(c) && c != 0x005F) {
|
||||
ret.push_back(i);
|
||||
continue;
|
||||
}
|
||||
if (is_underscore(c)) {
|
||||
ret.push_back(i);
|
||||
continue;
|
||||
}
|
||||
if (breaks.has(i)) {
|
||||
const char32_t c = p_string[i];
|
||||
|
||||
if (is_linebreak(c)) {
|
||||
// Force newline.
|
||||
ret.push_back(line_start);
|
||||
ret.push_back(i);
|
||||
line_start = i + 1;
|
||||
line_end = line_start;
|
||||
word_start = line_start;
|
||||
word_length = 0;
|
||||
} else if (c == 0xfffc) {
|
||||
continue;
|
||||
} else if ((u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) {
|
||||
// A whitespace ends current word.
|
||||
if (word_length > 0) {
|
||||
line_end = i - 1;
|
||||
word_start = -1;
|
||||
word_length = 0;
|
||||
}
|
||||
} else if (breaks.has(i)) {
|
||||
// End current word, no space.
|
||||
if (word_length > 0) {
|
||||
line_end = i;
|
||||
word_start = i + 1;
|
||||
word_length = 0;
|
||||
}
|
||||
if (p_chars_per_line <= 0) {
|
||||
ret.push_back(line_start);
|
||||
ret.push_back(line_end + 1);
|
||||
line_start = word_start;
|
||||
line_end = line_start;
|
||||
}
|
||||
} else {
|
||||
if (word_start == -1) {
|
||||
word_start = i;
|
||||
if (p_chars_per_line <= 0) {
|
||||
ret.push_back(line_start);
|
||||
ret.push_back(line_end + 1);
|
||||
line_start = word_start;
|
||||
line_end = line_start;
|
||||
}
|
||||
}
|
||||
word_length += 1;
|
||||
|
||||
if (p_chars_per_line > 0) {
|
||||
if (word_length > p_chars_per_line) {
|
||||
// Word too long: wrap before current character.
|
||||
ret.push_back(line_start);
|
||||
ret.push_back(i);
|
||||
line_start = i;
|
||||
line_end = i;
|
||||
word_start = i;
|
||||
word_length = 1;
|
||||
} else if (i - line_start + 1 > p_chars_per_line) {
|
||||
// Line too long: wrap after the last word.
|
||||
ret.push_back(line_start);
|
||||
ret.push_back(line_end + 1);
|
||||
line_start = word_start;
|
||||
line_end = line_start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (line_start < p_string.length()) {
|
||||
ret.push_back(line_start);
|
||||
ret.push_back(p_string.length());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -915,7 +915,7 @@ public:
|
|||
MODBIND2RC(String, parse_number, const String &, const String &);
|
||||
MODBIND1RC(String, percent_sign, const String &);
|
||||
|
||||
MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &);
|
||||
MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int);
|
||||
|
||||
MODBIND2RC(int64_t, is_confusable, const String &, const PackedStringArray &);
|
||||
MODBIND1RC(bool, spoof_check, const String &);
|
||||
|
|
|
@ -4099,26 +4099,69 @@ String TextServerFallback::_string_to_lower(const String &p_string, const String
|
|||
return lower;
|
||||
}
|
||||
|
||||
PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language) const {
|
||||
PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const {
|
||||
PackedInt32Array ret;
|
||||
|
||||
int line_start = 0;
|
||||
int line_end = 0; // End of last word on current line.
|
||||
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
|
||||
int word_length = 0;
|
||||
|
||||
for (int i = 0; i < p_string.length(); i++) {
|
||||
char32_t c = p_string[i];
|
||||
if (c == 0xfffc) {
|
||||
continue;
|
||||
}
|
||||
if (is_punct(c) && c != 0x005F) {
|
||||
ret.push_back(i);
|
||||
continue;
|
||||
}
|
||||
if (is_underscore(c)) {
|
||||
ret.push_back(i);
|
||||
continue;
|
||||
}
|
||||
if (is_whitespace(c) || is_linebreak(c)) {
|
||||
const char32_t c = p_string[i];
|
||||
|
||||
if (is_linebreak(c)) {
|
||||
// Force newline.
|
||||
ret.push_back(line_start);
|
||||
ret.push_back(i);
|
||||
line_start = i + 1;
|
||||
line_end = line_start;
|
||||
word_start = line_start;
|
||||
word_length = 0;
|
||||
} else if (c == 0xfffc) {
|
||||
continue;
|
||||
} else if ((is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) {
|
||||
// A whitespace ends current word.
|
||||
if (word_length > 0) {
|
||||
line_end = i - 1;
|
||||
word_start = -1;
|
||||
word_length = 0;
|
||||
}
|
||||
} else {
|
||||
if (word_start == -1) {
|
||||
word_start = i;
|
||||
if (p_chars_per_line <= 0) {
|
||||
ret.push_back(line_start);
|
||||
ret.push_back(line_end + 1);
|
||||
line_start = word_start;
|
||||
line_end = line_start;
|
||||
}
|
||||
}
|
||||
word_length += 1;
|
||||
|
||||
if (p_chars_per_line > 0) {
|
||||
if (word_length > p_chars_per_line) {
|
||||
// Word too long: wrap before current character.
|
||||
ret.push_back(line_start);
|
||||
ret.push_back(i);
|
||||
line_start = i;
|
||||
line_end = i;
|
||||
word_start = i;
|
||||
word_length = 1;
|
||||
} else if (i - line_start + 1 > p_chars_per_line) {
|
||||
// Line too long: wrap after the last word.
|
||||
ret.push_back(line_start);
|
||||
ret.push_back(line_end + 1);
|
||||
line_start = word_start;
|
||||
line_end = line_start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (line_start < p_string.length()) {
|
||||
ret.push_back(line_start);
|
||||
ret.push_back(p_string.length());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -786,7 +786,7 @@ public:
|
|||
MODBIND1RC(double, shaped_text_get_underline_position, const RID &);
|
||||
MODBIND1RC(double, shaped_text_get_underline_thickness, const RID &);
|
||||
|
||||
MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &);
|
||||
MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int);
|
||||
|
||||
MODBIND2RC(String, string_to_upper, const String &, const String &);
|
||||
MODBIND2RC(String, string_to_lower, const String &, const String &);
|
||||
|
|
|
@ -117,13 +117,12 @@ void TTS_Linux::speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNo
|
|||
free_spd_voices(voices);
|
||||
}
|
||||
PackedInt32Array breaks = TS->string_get_word_breaks(message.text, language);
|
||||
int prev = 0;
|
||||
for (int i = 0; i < breaks.size(); i++) {
|
||||
text += message.text.substr(prev, breaks[i] - prev);
|
||||
text += "<mark name=\"" + String::num_int64(breaks[i], 10) + "\"/>";
|
||||
prev = breaks[i];
|
||||
for (int i = 0; i < breaks.size(); i += 2) {
|
||||
const int start = breaks[i];
|
||||
const int end = breaks[i + 1];
|
||||
text += message.text.substr(start, end - start + 1);
|
||||
text += "<mark name=\"" + String::num_int64(end, 10) + "\"/>";
|
||||
}
|
||||
text += message.text.substr(prev, -1);
|
||||
|
||||
spd_set_synthesis_voice(tts->synth, message.voice.utf8().get_data());
|
||||
spd_set_volume(tts->synth, message.volume * 2 - 100);
|
||||
|
|
|
@ -308,7 +308,7 @@ void TextServerExtension::_bind_methods() {
|
|||
GDVIRTUAL_BIND(_strip_diacritics, "string");
|
||||
GDVIRTUAL_BIND(_is_valid_identifier, "string");
|
||||
|
||||
GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language");
|
||||
GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language", "chars_per_line");
|
||||
|
||||
GDVIRTUAL_BIND(_is_confusable, "string", "dict");
|
||||
GDVIRTUAL_BIND(_spoof_check, "string");
|
||||
|
@ -1379,9 +1379,9 @@ TypedArray<Vector2i> TextServerExtension::parse_structured_text(StructuredTextPa
|
|||
return ret;
|
||||
}
|
||||
|
||||
PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language) const {
|
||||
PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const {
|
||||
PackedInt32Array ret;
|
||||
GDVIRTUAL_CALL(_string_get_word_breaks, p_string, p_language, ret);
|
||||
GDVIRTUAL_CALL(_string_get_word_breaks, p_string, p_language, p_chars_per_line, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -510,8 +510,8 @@ public:
|
|||
virtual String strip_diacritics(const String &p_string) const override;
|
||||
GDVIRTUAL1RC(String, _strip_diacritics, const String &);
|
||||
|
||||
virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override;
|
||||
GDVIRTUAL2RC(PackedInt32Array, _string_get_word_breaks, const String &, const String &);
|
||||
virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "", int p_chars_per_line = 0) const override;
|
||||
GDVIRTUAL3RC(PackedInt32Array, _string_get_word_breaks, const String &, const String &, int);
|
||||
|
||||
virtual bool is_valid_identifier(const String &p_string) const override;
|
||||
GDVIRTUAL1RC(bool, _is_valid_identifier, const String &);
|
||||
|
|
|
@ -454,7 +454,7 @@ void TextServer::_bind_methods() {
|
|||
ClassDB::bind_method(D_METHOD("parse_number", "number", "language"), &TextServer::parse_number, DEFVAL(""));
|
||||
ClassDB::bind_method(D_METHOD("percent_sign", "language"), &TextServer::percent_sign, DEFVAL(""));
|
||||
|
||||
ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language"), &TextServer::string_get_word_breaks, DEFVAL(""));
|
||||
ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language", "chars_per_line"), &TextServer::string_get_word_breaks, DEFVAL(""), DEFVAL(0));
|
||||
|
||||
ClassDB::bind_method(D_METHOD("is_confusable", "string", "dict"), &TextServer::is_confusable);
|
||||
ClassDB::bind_method(D_METHOD("spoof_check", "string"), &TextServer::spoof_check);
|
||||
|
|
|
@ -493,7 +493,7 @@ public:
|
|||
virtual String percent_sign(const String &p_language = "") const = 0;
|
||||
|
||||
// String functions.
|
||||
virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const = 0;
|
||||
virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "", int p_chars_per_line = 0) const = 0;
|
||||
|
||||
virtual int64_t is_confusable(const String &p_string, const PackedStringArray &p_dict) const { return -1; };
|
||||
virtual bool spoof_check(const String &p_string) const { return false; };
|
||||
|
|
|
@ -593,12 +593,18 @@ TEST_SUITE("[TextServer]") {
|
|||
String text1 = U"linguistically similar and effectively form";
|
||||
// 14^ 22^ 26^ 38^
|
||||
PackedInt32Array breaks = ts->string_get_word_breaks(text1, "en");
|
||||
CHECK(breaks.size() == 4);
|
||||
if (breaks.size() == 4) {
|
||||
CHECK(breaks[0] == 14);
|
||||
CHECK(breaks[1] == 22);
|
||||
CHECK(breaks[2] == 26);
|
||||
CHECK(breaks[3] == 38);
|
||||
CHECK(breaks.size() == 10);
|
||||
if (breaks.size() == 10) {
|
||||
CHECK(breaks[0] == 0);
|
||||
CHECK(breaks[1] == 14);
|
||||
CHECK(breaks[2] == 15);
|
||||
CHECK(breaks[3] == 22);
|
||||
CHECK(breaks[4] == 23);
|
||||
CHECK(breaks[5] == 26);
|
||||
CHECK(breaks[6] == 27);
|
||||
CHECK(breaks[7] == 38);
|
||||
CHECK(breaks[8] == 39);
|
||||
CHECK(breaks[9] == 43);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -608,16 +614,26 @@ TEST_SUITE("[TextServer]") {
|
|||
// 3^ 7^ 13^ 16^ 20^ 25^ 29^ 32^
|
||||
|
||||
PackedInt32Array breaks = ts->string_get_word_breaks(text2, "th");
|
||||
CHECK(breaks.size() == 8);
|
||||
if (breaks.size() == 8) {
|
||||
CHECK(breaks[0] == 3);
|
||||
CHECK(breaks[1] == 7);
|
||||
CHECK(breaks[2] == 13);
|
||||
CHECK(breaks[3] == 16);
|
||||
CHECK(breaks[4] == 20);
|
||||
CHECK(breaks[5] == 25);
|
||||
CHECK(breaks[6] == 29);
|
||||
CHECK(breaks[7] == 32);
|
||||
CHECK(breaks.size() == 18);
|
||||
if (breaks.size() == 18) {
|
||||
CHECK(breaks[0] == 0);
|
||||
CHECK(breaks[1] == 4);
|
||||
CHECK(breaks[2] == 4);
|
||||
CHECK(breaks[3] == 8);
|
||||
CHECK(breaks[4] == 8);
|
||||
CHECK(breaks[5] == 14);
|
||||
CHECK(breaks[6] == 14);
|
||||
CHECK(breaks[7] == 17);
|
||||
CHECK(breaks[8] == 17);
|
||||
CHECK(breaks[9] == 21);
|
||||
CHECK(breaks[10] == 21);
|
||||
CHECK(breaks[11] == 26);
|
||||
CHECK(breaks[12] == 26);
|
||||
CHECK(breaks[13] == 30);
|
||||
CHECK(breaks[14] == 30);
|
||||
CHECK(breaks[15] == 33);
|
||||
CHECK(breaks[16] == 33);
|
||||
CHECK(breaks[17] == 42);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue