Merge pull request #61316 from bruvzg/tts_3x

[3.x] Backport text-to-speech support.
2022-08-05 23:45:51 +02:00 · 2022-08-05 23:45:51 +02:00 · 0bed7ea63f
commit 0bed7ea63f
parent 0dccbcd7ce 6489fe890e
45 changed files with 3905 additions and 2 deletions
--- a/.github/workflows/linux_builds.yml
+++ b/.github/workflows/linux_builds.yml
@ -59,7 +59,8 @@ jobs:
          # The actual dependencies
          sudo apt-get install build-essential pkg-config libx11-dev libxcursor-dev \
              libxinerama-dev libgl1-mesa-dev libglu-dev libasound2-dev libpulse-dev \
-              libdbus-1-dev libudev-dev libxi-dev libxrandr-dev yasm xvfb wget unzip
+              libdbus-1-dev libudev-dev libxi-dev libxrandr-dev yasm xvfb wget unzip \
+              libspeechd-dev speech-dispatcher

      - name: Setup Godot build cache
        uses: ./.github/actions/godot-cache
--- a/core/bind/core_bind.cpp
+++ b/core/bind/core_bind.cpp
@ -190,6 +190,42 @@ void _OS::global_menu_clear(const String &p_menu) {
 	OS::get_singleton()->global_menu_clear(p_menu);
 }

+bool _OS::tts_is_speaking() const {
+	return OS::get_singleton()->tts_is_speaking();
+}
+
+bool _OS::tts_is_paused() const {
+	return OS::get_singleton()->tts_is_paused();
+}
+
+Array _OS::tts_get_voices() const {
+	return OS::get_singleton()->tts_get_voices();
+}
+
+PoolStringArray _OS::tts_get_voices_for_language(const String &p_language) const {
+	return OS::get_singleton()->tts_get_voices_for_language(p_language);
+}
+
+void _OS::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	OS::get_singleton()->tts_speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
+}
+
+void _OS::tts_pause() {
+	OS::get_singleton()->tts_pause();
+}
+
+void _OS::tts_resume() {
+	OS::get_singleton()->tts_resume();
+}
+
+void _OS::tts_stop() {
+	OS::get_singleton()->tts_stop();
+}
+
+void _OS::tts_set_utterance_callback(TTSUtteranceEvent p_event, Object *p_object, String p_callback) {
+	OS::get_singleton()->tts_set_utterance_callback((OS::TTSUtteranceEvent)p_event, p_object, p_callback);
+}
+
 Point2 _OS::get_mouse_position() const {
 	return OS::get_singleton()->get_mouse_position();
 }
@ -1260,6 +1296,18 @@ void _OS::_bind_methods() {
 	//ClassDB::bind_method(D_METHOD("is_video_mode_resizable","screen"),&_OS::is_video_mode_resizable,DEFVAL(0));
 	//ClassDB::bind_method(D_METHOD("get_fullscreen_mode_list","screen"),&_OS::get_fullscreen_mode_list,DEFVAL(0));

+	ClassDB::bind_method(D_METHOD("tts_is_speaking"), &_OS::tts_is_speaking);
+	ClassDB::bind_method(D_METHOD("tts_is_paused"), &_OS::tts_is_paused);
+	ClassDB::bind_method(D_METHOD("tts_get_voices"), &_OS::tts_get_voices);
+	ClassDB::bind_method(D_METHOD("tts_get_voices_for_language", "language"), &_OS::tts_get_voices_for_language);
+
+	ClassDB::bind_method(D_METHOD("tts_speak", "text", "voice", "volume", "pitch", "rate", "utterance_id", "interrupt"), &_OS::tts_speak, DEFVAL(50), DEFVAL(1.f), DEFVAL(1.f), DEFVAL(0), DEFVAL(false));
+	ClassDB::bind_method(D_METHOD("tts_pause"), &_OS::tts_pause);
+	ClassDB::bind_method(D_METHOD("tts_resume"), &_OS::tts_resume);
+	ClassDB::bind_method(D_METHOD("tts_stop"), &_OS::tts_stop);
+
+	ClassDB::bind_method(D_METHOD("tts_set_utterance_callback", "event", "object", "callback"), &_OS::tts_set_utterance_callback);
+
 	ClassDB::bind_method(D_METHOD("global_menu_add_item", "menu", "label", "id", "meta"), &_OS::global_menu_add_item);
 	ClassDB::bind_method(D_METHOD("global_menu_add_separator", "menu"), &_OS::global_menu_add_separator);
 	ClassDB::bind_method(D_METHOD("global_menu_remove_item", "menu", "idx"), &_OS::global_menu_remove_item);
@ -1578,6 +1626,11 @@ void _OS::_bind_methods() {
 	BIND_ENUM_CONSTANT(POWERSTATE_NO_BATTERY);
 	BIND_ENUM_CONSTANT(POWERSTATE_CHARGING);
 	BIND_ENUM_CONSTANT(POWERSTATE_CHARGED);
+
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_STARTED);
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_ENDED);
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_CANCELED);
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_BOUNDARY);
 }

 _OS::_OS() {
--- a/core/bind/core_bind.h
+++ b/core/bind/core_bind.h
@ -152,11 +152,31 @@ public:
 		OPENGL_CONTEXT, // HGLRC, X11::GLXContext, NSOpenGLContext*, EGLContext* ...
 	};

+	enum TTSUtteranceEvent {
+		TTS_UTTERANCE_STARTED,
+		TTS_UTTERANCE_ENDED,
+		TTS_UTTERANCE_CANCELED,
+		TTS_UTTERANCE_BOUNDARY,
+		TTS_UTTERANCE_MAX,
+	};
+
 	void global_menu_add_item(const String &p_menu, const String &p_label, const Variant &p_signal, const Variant &p_meta);
 	void global_menu_add_separator(const String &p_menu);
 	void global_menu_remove_item(const String &p_menu, int p_idx);
 	void global_menu_clear(const String &p_menu);

+	bool tts_is_speaking() const;
+	bool tts_is_paused() const;
+	Array tts_get_voices() const;
+	PoolStringArray tts_get_voices_for_language(const String &p_language) const;
+
+	void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	void tts_pause();
+	void tts_resume();
+	void tts_stop();
+
+	void tts_set_utterance_callback(TTSUtteranceEvent p_event, Object *p_object, String p_callback);
+
 	Point2 get_mouse_position() const;
 	void set_window_title(const String &p_title);
 	void set_window_mouse_passthrough(const PoolVector2Array &p_region);
@ -429,6 +449,7 @@ VARIANT_ENUM_CAST(_OS::VirtualKeyboardType);
 VARIANT_ENUM_CAST(_OS::SystemDir);
 VARIANT_ENUM_CAST(_OS::ScreenOrientation);
 VARIANT_ENUM_CAST(_OS::HandleType);
+VARIANT_ENUM_CAST(_OS::TTSUtteranceEvent);

 class _Geometry : public Object {
 	GDCLASS(_Geometry, Object);
--- a/core/os/os.cpp
+++ b/core/os/os.cpp
@ -557,6 +557,75 @@ bool OS::can_use_threads() const {
 #endif
 }

+bool OS::tts_is_speaking() const {
+	WARN_PRINT("TTS is not supported by this platform.");
+	return false;
+}
+
+bool OS::tts_is_paused() const {
+	WARN_PRINT("TTS is not supported by this platform.");
+	return false;
+}
+
+void OS::tts_pause() {
+	WARN_PRINT("TTS is not supported by this platformr.");
+}
+
+void OS::tts_resume() {
+	WARN_PRINT("TTS is not supported by this platform.");
+}
+
+Array OS::tts_get_voices() const {
+	WARN_PRINT("TTS is not supported by this platform.");
+	return Array();
+}
+
+PoolStringArray OS::tts_get_voices_for_language(const String &p_language) const {
+	PoolStringArray ret;
+	Array voices = tts_get_voices();
+	for (int i = 0; i < voices.size(); i++) {
+		const Dictionary &voice = voices[i];
+		if (voice.has("id") && voice.has("language") && voice["language"].operator String().begins_with(p_language)) {
+			ret.push_back(voice["id"]);
+		}
+	}
+	return ret;
+}
+
+void OS::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	WARN_PRINT("TTS is not supported by this platform.");
+}
+
+void OS::tts_stop() {
+	WARN_PRINT("TTS is not supported by this platform.");
+}
+
+void OS::tts_set_utterance_callback(TTSUtteranceEvent p_event, Object *p_object, const StringName &p_callback) {
+	ERR_FAIL_INDEX(p_event, OS::TTS_UTTERANCE_MAX);
+	utterance_callback[p_event].object = p_object;
+	utterance_callback[p_event].cb_name = p_callback;
+}
+
+void OS::tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos) {
+	ERR_FAIL_INDEX(p_event, OS::TTS_UTTERANCE_MAX);
+	switch (p_event) {
+		case OS::TTS_UTTERANCE_STARTED:
+		case OS::TTS_UTTERANCE_ENDED:
+		case OS::TTS_UTTERANCE_CANCELED: {
+			if (utterance_callback[p_event].object != nullptr) {
+				utterance_callback[p_event].object->call_deferred(utterance_callback[p_event].cb_name, p_id);
+			}
+		} break;
+		case OS::TTS_UTTERANCE_BOUNDARY: {
+			if (utterance_callback[p_event].object != nullptr) {
+				utterance_callback[p_event].object->call_deferred(utterance_callback[p_event].cb_name, p_pos, p_id);
+			}
+		} break;
+		default:
+			break;
+	}
+}
+
 OS::MouseMode OS::get_mouse_mode() const {
 	return MOUSE_MODE_VISIBLE;
 }
--- a/core/os/os.h
+++ b/core/os/os.h
@ -123,6 +123,31 @@ public:
 		}
 	};

+	struct TTSUtterance {
+		String text;
+		String voice;
+		int volume = 50;
+		float pitch = 1.f;
+		float rate = 1.f;
+		int id = 0;
+	};
+
+	enum TTSUtteranceEvent {
+		TTS_UTTERANCE_STARTED,
+		TTS_UTTERANCE_ENDED,
+		TTS_UTTERANCE_CANCELED,
+		TTS_UTTERANCE_BOUNDARY,
+		TTS_UTTERANCE_MAX,
+	};
+
+private:
+	struct Callback {
+		Object *object = nullptr;
+		StringName cb_name;
+	};
+
+	Callback utterance_callback[TTS_UTTERANCE_MAX];
+
 protected:
 	friend class Main;

@ -172,6 +197,20 @@ public:
 	virtual void set_mouse_mode(MouseMode p_mode);
 	virtual MouseMode get_mouse_mode() const;

+	virtual bool tts_is_speaking() const;
+	virtual bool tts_is_paused() const;
+	virtual Array tts_get_voices() const;
+
+	virtual PoolStringArray tts_get_voices_for_language(const String &p_language) const;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	virtual void tts_pause();
+	virtual void tts_resume();
+	virtual void tts_stop();
+
+	virtual void tts_set_utterance_callback(TTSUtteranceEvent p_event, Object *p_object, const StringName &p_callback);
+	virtual void tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos = 0);
+
 	virtual void warp_mouse_position(const Point2 &p_to) {}
 	virtual Point2 get_mouse_position() const = 0;
 	virtual int get_mouse_button_state() const = 0;
--- a/doc/classes/OS.xml
+++ b/doc/classes/OS.xml
@ -1032,6 +1032,94 @@
 				[b]Note:[/b] This method is implemented on Android, iOS, UWP, and HTML5.
 			</description>
 		</method>
+		<method name="tts_get_voices" qualifiers="const">
+			<return type="Array" />
+			<description>
+				Returns an [Array] of voice information dictionaries.
+				Each [Dictionary] contains two [String] entries:
+				- [code]name[/code] is voice name.
+				- [code]id[/code] is voice identifier.
+				- [code]language[/code] is language code in [code]lang_Variant[/code] format. [code]lang[/code] part is a 2 or 3-letter code based on the ISO-639 standard, in lowercase. And [code]Variant[/code] part is an engine dependent string describing country, region or/and dialect.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_get_voices_for_language" qualifiers="const">
+			<return type="PoolStringArray" />
+			<argument index="0" name="language" type="String" />
+			<description>
+				Returns an [PoolStringArray] of voice identifiers for the [code]language[/code].
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_is_paused" qualifiers="const">
+			<return type="bool" />
+			<description>
+				Returns [code]true[/code] if the synthesizer is in a paused state.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_is_speaking" qualifiers="const">
+			<return type="bool" />
+			<description>
+				Returns [code]true[/code] if the synthesizer is generating speech, or have utterance waiting in the queue.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_pause">
+			<return type="void" />
+			<description>
+				Puts the synthesizer into a paused state.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_resume">
+			<return type="void" />
+			<description>
+				Resumes the synthesizer if it was paused.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_set_utterance_callback">
+			<return type="void" />
+			<argument index="0" name="event" type="int" enum="OS.TTSUtteranceEvent" />
+			<argument index="1" name="object" type="Object" />
+			<argument index="2" name="callback" type="String" />
+			<description>
+				Adds a callback, which is called when the utterance has started, finished, canceled or reached a text boundary.
+				- [code]TTS_UTTERANCE_STARTED[/code], [code]TTS_UTTERANCE_ENDED[/code], and [code]TTS_UTTERANCE_CANCELED[/code] callable's method should take one [int] parameter, the utterance id.
+				- [code]TTS_UTTERANCE_BOUNDARY[/code] callable's method should take two [int] parameters, the index of the character and the utterance id.
+				[b]Note:[/b] The granularity of the boundary callbacks is engine dependent.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_speak">
+			<return type="void" />
+			<argument index="0" name="text" type="String" />
+			<argument index="1" name="voice" type="String" />
+			<argument index="2" name="volume" type="int" default="50" />
+			<argument index="3" name="pitch" type="float" default="1.0" />
+			<argument index="4" name="rate" type="float" default="1.0" />
+			<argument index="5" name="utterance_id" type="int" default="0" />
+			<argument index="6" name="interrupt" type="bool" default="false" />
+			<description>
+				Adds an utterance to the queue. If [code]interrupt[/code] is [code]true[/code], the queue is cleared first.
+				- [code]voice[/code] identifier is one of the [code]"id"[/code] values returned by [method tts_get_voices] or one of the values returned by [method tts_get_voices_for_language].
+				- [code]volume[/code] ranges from [code]0[/code] (lowest) to [code]100[/code] (highest).
+				- [code]pitch[/code] ranges from [code]0.0[/code] (lowest) to [code]2.0[/code] (highest), [code]1.0[/code] is default pitch for the current voice.
+				- [code]rate[/code] ranges from [code]0.1[/code] (lowest) to [code]10.0[/code] (highest), [code]1.0[/code] is a normal speaking rate. Other values act as a percentage relative.
+				- [code]utterance_id[/code] is passed as a parameter to the callback functions.
+				[b]Note:[/b] On Windows and Linux, utterance [code]text[/code] can use SSML markup. SSML support is engine and voice dependent. If the engine does not support SSML, you should strip out all XML markup before calling [method tts_speak].
+				[b]Note:[/b] The granularity of pitch, rate, and volume is engine and voice dependent. Values may be truncated.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_stop">
+			<return type="void" />
+			<description>
+				Stops synthesis in progress and removes all utterances from the queue.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
 	</methods>
 	<members>
 		<member name="clipboard" type="String" setter="set_clipboard" getter="get_clipboard" default="&quot;&quot;">
@ -1283,5 +1371,17 @@
 		<constant name="POWERSTATE_CHARGED" value="4" enum="PowerState">
 			Plugged in, battery fully charged.
 		</constant>
+		<constant name="TTS_UTTERANCE_STARTED" value="0" enum="TTSUtteranceEvent">
+			Utterance has begun to be spoken.
+		</constant>
+		<constant name="TTS_UTTERANCE_ENDED" value="1" enum="TTSUtteranceEvent">
+			Utterance was successfully finished.
+		</constant>
+		<constant name="TTS_UTTERANCE_CANCELED" value="2" enum="TTSUtteranceEvent">
+			Utterance was canceled, or TTS service was unable to process it.
+		</constant>
+		<constant name="TTS_UTTERANCE_BOUNDARY" value="3" enum="TTSUtteranceEvent">
+			Utterance reached a word or sentence boundary.
+		</constant>
 	</constants>
 </class>
--- a/platform/android/SCsub
+++ b/platform/android/SCsub
@ -9,6 +9,7 @@ android_files = [
    "file_access_filesystem_jandroid.cpp",
    "audio_driver_opensl.cpp",
    "dir_access_jandroid.cpp",
+    "tts_android.cpp",
    "thread_jandroid.cpp",
    "net_socket_android.cpp",
    "java_godot_lib_jni.cpp",
--- a/platform/android/java/lib/src/org/godotengine/godot/Godot.java
+++ b/platform/android/java/lib/src/org/godotengine/godot/Godot.java
@ -38,6 +38,7 @@ import org.godotengine.godot.io.directory.DirectoryAccessHandler;
 import org.godotengine.godot.io.file.FileAccessHandler;
 import org.godotengine.godot.plugin.GodotPlugin;
 import org.godotengine.godot.plugin.GodotPluginRegistry;
+import org.godotengine.godot.tts.GodotTTS;
 import org.godotengine.godot.utils.GodotNetUtils;
 import org.godotengine.godot.utils.PermissionsUtil;
 import org.godotengine.godot.xr.XRMode;
@ -254,6 +255,7 @@ public class Godot extends Fragment implements SensorEventListener, IDownloaderC

 	public GodotIO io;
 	public GodotNetUtils netUtils;
+	public GodotTTS tts;

 	static SingletonBase[] singletons = new SingletonBase[MAX_SINGLETONS];
 	static int singleton_count = 0;
@ -575,6 +577,7 @@ public class Godot extends Fragment implements SensorEventListener, IDownloaderC
 		final Activity activity = getActivity();
 		io = new GodotIO(activity);
 		netUtils = new GodotNetUtils(activity);
+		tts = new GodotTTS(activity);
 		Context context = getContext();
 		DirectoryAccessHandler directoryAccessHandler = new DirectoryAccessHandler(context);
 		FileAccessHandler fileAccessHandler = new FileAccessHandler(context);
--- a/platform/android/java/lib/src/org/godotengine/godot/GodotLib.java
+++ b/platform/android/java/lib/src/org/godotengine/godot/GodotLib.java
@ -91,6 +91,11 @@ public class GodotLib {
 	 */
 	public static native boolean step();

+	/**
+	 * TTS callback.
+	 */
+	public static native void ttsCallback(int event, int id, int pos);
+
 	/**
 	 * Forward touch events from the main thread to the GL thread.
 	 */
--- a/platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java
+++ b/platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java
@ -0,0 +1,298 @@
+/*************************************************************************/
+/*  GodotTTS.java                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+package org.godotengine.godot.tts;
+
+import org.godotengine.godot.GodotLib;
+
+import android.app.Activity;
+import android.os.Bundle;
+import android.speech.tts.TextToSpeech;
+import android.speech.tts.UtteranceProgressListener;
+import android.speech.tts.Voice;
+
+import androidx.annotation.Keep;
+
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Set;
+
+/**
+ * Wrapper for Android Text to Speech API and custom utterance query implementation.
+ * <p>
+ * A [GodotTTS] provides the following features:
+ * <p>
+ * <ul>
+ * <li>Access to the Android Text to Speech API.
+ * <li>Utterance pause / resume functions, unsupported by Android TTS API.
+ * </ul>
+ */
+@Keep
+public class GodotTTS extends UtteranceProgressListener {
+	// Note: These constants must be in sync with OS::TTSUtteranceEvent enum from "core/os/os.h".
+	final private static int EVENT_START = 0;
+	final private static int EVENT_END = 1;
+	final private static int EVENT_CANCEL = 2;
+	final private static int EVENT_BOUNDARY = 3;
+
+	final private TextToSpeech synth;
+	final private LinkedList<GodotUtterance> queue;
+	final private Object lock = new Object();
+	private GodotUtterance lastUtterance;
+
+	private boolean speaking;
+	private boolean paused;
+
+	public GodotTTS(Activity p_activity) {
+		synth = new TextToSpeech(p_activity, null);
+		queue = new LinkedList<GodotUtterance>();
+
+		synth.setOnUtteranceProgressListener(this);
+	}
+
+	private void updateTTS() {
+		if (!speaking && queue.size() > 0) {
+			int mode = TextToSpeech.QUEUE_FLUSH;
+			GodotUtterance message = queue.pollFirst();
+
+			Set<Voice> voices = synth.getVoices();
+			for (Voice v : voices) {
+				if (v.getName().equals(message.voice)) {
+					synth.setVoice(v);
+					break;
+				}
+			}
+			synth.setPitch(message.pitch);
+			synth.setSpeechRate(message.rate);
+
+			Bundle params = new Bundle();
+			params.putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, message.volume / 100.f);
+
+			lastUtterance = message;
+			lastUtterance.start = 0;
+			lastUtterance.offset = 0;
+			paused = false;
+
+			synth.speak(message.text, mode, params, String.valueOf(message.id));
+			speaking = true;
+		}
+	}
+
+	/**
+	 * Called by TTS engine when the TTS service is about to speak the specified range.
+	 */
+	@Override
+	public void onRangeStart(String utteranceId, int start, int end, int frame) {
+		synchronized (lock) {
+			if (lastUtterance != null && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				lastUtterance.offset = start;
+				GodotLib.ttsCallback(EVENT_BOUNDARY, lastUtterance.id, start + lastUtterance.start);
+			}
+		}
+	}
+
+	/**
+	 * Called by TTS engine when an utterance was canceled in progress.
+	 */
+	@Override
+	public void onStop(String utteranceId, boolean interrupted) {
+		synchronized (lock) {
+			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0);
+				speaking = false;
+				updateTTS();
+			}
+		}
+	}
+
+	/**
+	 * Called by TTS engine when an utterance has begun to be spoken..
+	 */
+	@Override
+	public void onStart(String utteranceId) {
+		synchronized (lock) {
+			if (lastUtterance != null && lastUtterance.start == 0 && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				GodotLib.ttsCallback(EVENT_START, lastUtterance.id, 0);
+			}
+		}
+	}
+
+	/**
+	 * Called by TTS engine when an utterance was successfully finished.
+	 */
+	@Override
+	public void onDone(String utteranceId) {
+		synchronized (lock) {
+			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				GodotLib.ttsCallback(EVENT_END, lastUtterance.id, 0);
+				speaking = false;
+				updateTTS();
+			}
+		}
+	}
+
+	/**
+	 * Called by TTS engine when an error has occurred during processing.
+	 */
+	@Override
+	public void onError(String utteranceId, int errorCode) {
+		synchronized (lock) {
+			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0);
+				speaking = false;
+				updateTTS();
+			}
+		}
+	}
+
+	/**
+	 * Called by TTS engine when an error has occurred during processing (pre API level 21 version).
+	 */
+	@Override
+	public void onError(String utteranceId) {
+		synchronized (lock) {
+			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0);
+				speaking = false;
+				updateTTS();
+			}
+		}
+	}
+
+	/**
+	 * Adds an utterance to the queue.
+	 */
+	public void speak(String text, String voice, int volume, float pitch, float rate, int utterance_id, boolean interrupt) {
+		synchronized (lock) {
+			GodotUtterance message = new GodotUtterance(text, voice, volume, pitch, rate, utterance_id);
+			queue.addLast(message);
+
+			if (isPaused()) {
+				resumeSpeaking();
+			} else {
+				updateTTS();
+			}
+		}
+	}
+
+	/**
+	 * Puts the synthesizer into a paused state.
+	 */
+	public void pauseSpeaking() {
+		synchronized (lock) {
+			if (!paused) {
+				paused = true;
+				synth.stop();
+			}
+		}
+	}
+
+	/**
+	 * Resumes the synthesizer if it was paused.
+	 */
+	public void resumeSpeaking() {
+		synchronized (lock) {
+			if (lastUtterance != null && paused) {
+				int mode = TextToSpeech.QUEUE_FLUSH;
+
+				Set<Voice> voices = synth.getVoices();
+				for (Voice v : voices) {
+					if (v.getName().equals(lastUtterance.voice)) {
+						synth.setVoice(v);
+						break;
+					}
+				}
+				synth.setPitch(lastUtterance.pitch);
+				synth.setSpeechRate(lastUtterance.rate);
+
+				Bundle params = new Bundle();
+				params.putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, lastUtterance.volume / 100.f);
+
+				lastUtterance.start = lastUtterance.offset;
+				lastUtterance.offset = 0;
+				paused = false;
+
+				synth.speak(lastUtterance.text.substring(lastUtterance.start), mode, params, String.valueOf(lastUtterance.id));
+				speaking = true;
+			} else {
+				paused = false;
+			}
+		}
+	}
+
+	/**
+	 * Stops synthesis in progress and removes all utterances from the queue.
+	 */
+	public void stopSpeaking() {
+		synchronized (lock) {
+			for (GodotUtterance u : queue) {
+				GodotLib.ttsCallback(EVENT_CANCEL, u.id, 0);
+			}
+			queue.clear();
+
+			if (lastUtterance != null) {
+				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0);
+			}
+			lastUtterance = null;
+
+			paused = false;
+			speaking = false;
+
+			synth.stop();
+		}
+	}
+
+	/**
+	 * Returns voice information.
+	 */
+	public String[] getVoices() {
+		Set<Voice> voices = synth.getVoices();
+		String[] list = new String[voices.size()];
+		int i = 0;
+		for (Voice v : voices) {
+			list[i++] = v.getLocale().toString() + ";" + v.getName();
+		}
+		return list;
+	}
+
+	/**
+	 * Returns true if the synthesizer is generating speech, or have utterance waiting in the queue.
+	 */
+	public boolean isSpeaking() {
+		return speaking;
+	}
+
+	/**
+	 * Returns true if the synthesizer is in a paused state.
+	 */
+	public boolean isPaused() {
+		return paused;
+	}
+}
--- a/platform/android/java/lib/src/org/godotengine/godot/tts/GodotUtterance.java
+++ b/platform/android/java/lib/src/org/godotengine/godot/tts/GodotUtterance.java
@ -0,0 +1,55 @@
+/*************************************************************************/
+/*  GodotUtterance.java                                                  */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+package org.godotengine.godot.tts;
+
+/**
+ * A speech request for GodotTTS.
+ */
+class GodotUtterance {
+	final String text;
+	final String voice;
+	final int volume;
+	final float pitch;
+	final float rate;
+	final int id;
+
+	int offset = -1;
+	int start = 0;
+
+	GodotUtterance(String text, String voice, int volume, float pitch, float rate, int id) {
+		this.text = text;
+		this.voice = voice;
+		this.volume = volume;
+		this.pitch = pitch;
+		this.rate = rate;
+		this.id = id;
+	}
+}
--- a/platform/android/java_godot_lib_jni.cpp
+++ b/platform/android/java_godot_lib_jni.cpp
@ -48,6 +48,7 @@
 #include "os_android.h"
 #include "string_android.h"
 #include "thread_jandroid.h"
+#include "tts_android.h"

 #include <android/input.h>
 #include <unistd.h>
@ -136,6 +137,7 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_initialize(JNIEnv *en
 	DirAccessJAndroid::setup(p_directory_access_handler);
 	FileAccessFilesystemJAndroid::setup(p_file_access_handler);
 	NetSocketAndroid::setup(p_net_utils);
+	TTS_Android::setup(godot_java->get_member_object("tts", "Lorg/godotengine/godot/tts/GodotTTS;", env));

 	os_android = new OS_Android(godot_java, godot_io_java, p_use_apk_expansion);

@ -238,6 +240,10 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_back(JNIEnv *env, jcl
 	}
 }

+JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_ttsCallback(JNIEnv *env, jclass clazz, jint event, jint id, jint pos) {
+	TTS_Android::_java_utterance_callback(event, id, pos);
+}
+
 JNIEXPORT jboolean JNICALL Java_org_godotengine_godot_GodotLib_step(JNIEnv *env, jclass clazz) {
 	if (step.get() == -1) {
 		return true;
--- a/platform/android/java_godot_lib_jni.h
+++ b/platform/android/java_godot_lib_jni.h
@ -42,6 +42,7 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_ondestroy(JNIEnv *env
 JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_setup(JNIEnv *env, jclass clazz, jobjectArray p_cmdline);
 JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_resize(JNIEnv *env, jclass clazz, jint width, jint height);
 JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_newcontext(JNIEnv *env, jclass clazz);
+JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_ttsCallback(JNIEnv *env, jclass clazz, jint event, jint id, jint pos);
 JNIEXPORT jboolean JNICALL Java_org_godotengine_godot_GodotLib_step(JNIEnv *env, jclass clazz);
 JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_back(JNIEnv *env, jclass clazz);
 void touch_preprocessing(JNIEnv *env, jclass clazz, jint input_device, jint ev, jint pointer, jint pointer_count, jfloatArray positions, jint buttons_mask = 0, jfloat vertical_factor = 0, jfloat horizontal_factor = 0);
--- a/platform/android/os_android.cpp
+++ b/platform/android/os_android.cpp
@ -52,6 +52,7 @@

 #include "java_godot_io_wrapper.h"
 #include "java_godot_wrapper.h"
+#include "tts_android.h"

 const char *OS_Android::ANDROID_EXEC_PATH = "apk";

@ -81,6 +82,34 @@ public:
 	virtual ~AndroidLogger() {}
 };

+bool OS_Android::tts_is_speaking() const {
+	return TTS_Android::is_speaking();
+}
+
+bool OS_Android::tts_is_paused() const {
+	return TTS_Android::is_paused();
+}
+
+Array OS_Android::tts_get_voices() const {
+	return TTS_Android::get_voices();
+}
+
+void OS_Android::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	TTS_Android::speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
+}
+
+void OS_Android::tts_pause() {
+	TTS_Android::pause();
+}
+
+void OS_Android::tts_resume() {
+	TTS_Android::resume();
+}
+
+void OS_Android::tts_stop() {
+	TTS_Android::stop();
+}
+
 int OS_Android::get_video_driver_count() const {
 	return 2;
 }
--- a/platform/android/os_android.h
+++ b/platform/android/os_android.h
@ -68,6 +68,15 @@ class OS_Android : public OS_Unix {
 public:
 	static const char *ANDROID_EXEC_PATH;

+	virtual bool tts_is_speaking() const;
+	virtual bool tts_is_paused() const;
+	virtual Array tts_get_voices() const;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	virtual void tts_pause();
+	virtual void tts_resume();
+	virtual void tts_stop();
+
 	// functions used by main to initialize/deinitialize the OS
 	virtual int get_video_driver_count() const;
 	virtual const char *get_video_driver_name(int p_driver) const;
--- a/platform/android/tts_android.cpp
+++ b/platform/android/tts_android.cpp
@ -0,0 +1,240 @@
+/*************************************************************************/
+/*  tts_android.cpp                                                      */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "tts_android.h"
+
+#include "java_godot_wrapper.h"
+#include "os_android.h"
+#include "string_android.h"
+#include "thread_jandroid.h"
+
+jobject TTS_Android::tts = 0;
+jclass TTS_Android::cls = 0;
+
+jmethodID TTS_Android::_is_speaking = 0;
+jmethodID TTS_Android::_is_paused = 0;
+jmethodID TTS_Android::_get_voices = 0;
+jmethodID TTS_Android::_speak = 0;
+jmethodID TTS_Android::_pause_speaking = 0;
+jmethodID TTS_Android::_resume_speaking = 0;
+jmethodID TTS_Android::_stop_speaking = 0;
+
+HashMap<int, Vector<char16_t>> TTS_Android::ids;
+
+Vector<char16_t> TTS_Android::str_to_utf16(const String &p_string) {
+	int l = p_string.length();
+	if (!l) {
+		return Vector<char16_t>();
+	}
+
+	const CharType *d = &p_string[0];
+	int fl = 0;
+	for (int i = 0; i < l; i++) {
+		uint32_t c = d[i];
+		if (c <= 0xffff) { // 16 bits.
+			fl += 1;
+		} else if (c <= 0x10ffff) { // 32 bits.
+			fl += 2;
+		} else {
+			print_error("Unicode parsing error: Invalid unicode codepoint " + String::num_int64(c, 16) + ".");
+			return Vector<char16_t>();
+		}
+		if (c >= 0xd800 && c <= 0xdfff) {
+			print_error("Unicode parsing error: Invalid unicode codepoint " + String::num_int64(c, 16) + ".");
+			return Vector<char16_t>();
+		}
+	}
+
+	Vector<char16_t> utf16s;
+	if (fl == 0) {
+		return utf16s;
+	}
+
+	utf16s.resize(fl + 1);
+	uint16_t *cdst = (uint16_t *)utf16s.ptrw();
+
+#define APPEND_CHAR(m_c) *(cdst++) = m_c
+
+	for (int i = 0; i < l; i++) {
+		uint32_t c = d[i];
+
+		if (c <= 0xffff) { // 16 bits.
+			APPEND_CHAR(c);
+		} else { // 32 bits.
+			APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate.
+			APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate.
+		}
+	}
+#undef APPEND_CHAR
+	*cdst = 0; //trailing zero
+
+	return utf16s;
+}
+
+void TTS_Android::setup(jobject p_tts) {
+	JNIEnv *env = get_jni_env();
+
+	tts = env->NewGlobalRef(p_tts);
+
+	jclass c = env->GetObjectClass(tts);
+	cls = (jclass)env->NewGlobalRef(c);
+
+	_is_speaking = env->GetMethodID(cls, "isSpeaking", "()Z");
+	_is_paused = env->GetMethodID(cls, "isPaused", "()Z");
+	_get_voices = env->GetMethodID(cls, "getVoices", "()[Ljava/lang/String;");
+	_speak = env->GetMethodID(cls, "speak", "(Ljava/lang/String;Ljava/lang/String;IFFIZ)V");
+	_pause_speaking = env->GetMethodID(cls, "pauseSpeaking", "()V");
+	_resume_speaking = env->GetMethodID(cls, "resumeSpeaking", "()V");
+	_stop_speaking = env->GetMethodID(cls, "stopSpeaking", "()V");
+}
+
+void TTS_Android::_java_utterance_callback(int p_event, int p_id, int p_pos) {
+	if (ids.has(p_id)) {
+		int pos = 0;
+		if ((OS::TTSUtteranceEvent)p_event == OS::TTS_UTTERANCE_BOUNDARY) {
+			// Convert position from UTF-16 to UTF-32.
+			const Vector<char16_t> &string = ids[p_id];
+			for (int i = 0; i < MIN(p_pos, string.size() - 1); i++) {
+				char16_t c = string[i];
+				if ((c & 0xfffffc00) == 0xd800) {
+					i++;
+				}
+				pos++;
+			}
+		} else if ((OS::TTSUtteranceEvent)p_event != OS::TTS_UTTERANCE_STARTED) {
+			ids.erase(p_id);
+		}
+		OS::get_singleton()->tts_post_utterance_event((OS::TTSUtteranceEvent)p_event, p_id, pos);
+	}
+}
+
+bool TTS_Android::is_speaking() {
+	if (_is_speaking) {
+		JNIEnv *env = get_jni_env();
+
+		ERR_FAIL_COND_V(env == nullptr, false);
+		return env->CallBooleanMethod(tts, _is_speaking);
+	} else {
+		return false;
+	}
+}
+
+bool TTS_Android::is_paused() {
+	if (_is_paused) {
+		JNIEnv *env = get_jni_env();
+
+		ERR_FAIL_COND_V(env == nullptr, false);
+		return env->CallBooleanMethod(tts, _is_paused);
+	} else {
+		return false;
+	}
+}
+
+Array TTS_Android::get_voices() {
+	Array list;
+	if (_get_voices) {
+		JNIEnv *env = get_jni_env();
+		ERR_FAIL_COND_V(env == nullptr, list);
+
+		jobject voices_object = env->CallObjectMethod(tts, _get_voices);
+		jobjectArray *arr = reinterpret_cast<jobjectArray *>(&voices_object);
+
+		jsize len = env->GetArrayLength(*arr);
+		for (int i = 0; i < len; i++) {
+			jstring jStr = (jstring)env->GetObjectArrayElement(*arr, i);
+			String str = jstring_to_string(jStr, env);
+			Vector<String> tokens = str.split(";", true, 2);
+			if (tokens.size() == 2) {
+				Dictionary voice_d;
+				voice_d["name"] = tokens[1];
+				voice_d["id"] = tokens[1];
+				voice_d["language"] = tokens[0];
+				list.push_back(voice_d);
+			}
+			env->DeleteLocalRef(jStr);
+		}
+	}
+	return list;
+}
+
+void TTS_Android::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	if (p_interrupt) {
+		stop();
+	}
+
+	if (p_text.empty()) {
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, p_utterance_id);
+		return;
+	}
+
+	ids[p_utterance_id] = str_to_utf16(p_text);
+
+	if (_speak) {
+		JNIEnv *env = get_jni_env();
+		ERR_FAIL_COND(env == nullptr);
+
+		jstring jStrT = env->NewStringUTF(p_text.utf8().get_data());
+		jstring jStrV = env->NewStringUTF(p_voice.utf8().get_data());
+		env->CallVoidMethod(tts, _speak, jStrT, jStrV, CLAMP(p_volume, 0, 100), CLAMP(p_pitch, 0.f, 2.f), CLAMP(p_rate, 0.1f, 10.f), p_utterance_id, p_interrupt);
+	}
+}
+
+void TTS_Android::pause() {
+	if (_pause_speaking) {
+		JNIEnv *env = get_jni_env();
+
+		ERR_FAIL_COND(env == nullptr);
+		env->CallVoidMethod(tts, _pause_speaking);
+	}
+}
+
+void TTS_Android::resume() {
+	if (_resume_speaking) {
+		JNIEnv *env = get_jni_env();
+
+		ERR_FAIL_COND(env == nullptr);
+		env->CallVoidMethod(tts, _resume_speaking);
+	}
+}
+
+void TTS_Android::stop() {
+	const int *k = NULL;
+	while ((k = ids.next(k))) {
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, *k);
+	}
+	ids.clear();
+
+	if (_stop_speaking) {
+		JNIEnv *env = get_jni_env();
+
+		ERR_FAIL_COND(env == nullptr);
+		env->CallVoidMethod(tts, _stop_speaking);
+	}
+}
--- a/platform/android/tts_android.h
+++ b/platform/android/tts_android.h
@ -0,0 +1,69 @@
+/*************************************************************************/
+/*  tts_android.h                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef TTS_ANDROID_H
+#define TTS_ANDROID_H
+
+#include "core/array.h"
+#include "core/os/os.h"
+#include "core/ustring.h"
+
+#include <jni.h>
+
+class TTS_Android {
+	static jobject tts;
+	static jclass cls;
+
+	static jmethodID _is_speaking;
+	static jmethodID _is_paused;
+	static jmethodID _get_voices;
+	static jmethodID _speak;
+	static jmethodID _pause_speaking;
+	static jmethodID _resume_speaking;
+	static jmethodID _stop_speaking;
+
+	static HashMap<int, Vector<char16_t>> ids;
+
+	static Vector<char16_t> str_to_utf16(const String &p_string);
+
+public:
+	static void setup(jobject p_tts);
+	static void _java_utterance_callback(int p_event, int p_id, int p_pos);
+
+	static bool is_speaking();
+	static bool is_paused();
+	static Array get_voices();
+	static void speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt);
+	static void pause();
+	static void resume();
+	static void stop();
+};
+
+#endif // TTS_ANDROID_H
--- a/platform/iphone/SCsub
+++ b/platform/iphone/SCsub
@ -11,6 +11,7 @@ iphone_lib = [
    "ios.mm",
    "joypad_iphone.mm",
    "godot_view.mm",
+    "tts_ios.mm",
    "display_layer.mm",
    "godot_app_delegate.m",
    "godot_view_renderer.mm",
--- a/platform/iphone/os_iphone.h
+++ b/platform/iphone/os_iphone.h
@ -55,6 +55,8 @@ private:

 	iOS *ios;

+	id tts = nullptr;
+
 	JoypadIPhone *joypad_iphone;

 	MainLoop *main_loop;
@ -103,6 +105,15 @@ public:

 	void start();

+	virtual bool tts_is_speaking() const;
+	virtual bool tts_is_paused() const;
+	virtual Array tts_get_voices() const;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	virtual void tts_pause();
+	virtual void tts_resume();
+	virtual void tts_stop();
+
 	virtual Error open_dynamic_library(const String p_path, void *&p_library_handle, bool p_also_set_library_path = false);
 	virtual Error close_dynamic_library(void *p_library_handle);
 	virtual Error get_dynamic_library_symbol_handle(void *p_library_handle, const String p_name, void *&p_symbol_handle, bool p_optional = false);
--- a/platform/iphone/os_iphone.mm
+++ b/platform/iphone/os_iphone.mm
@ -52,6 +52,8 @@
 #import "native_video_view.h"
 #import "view_controller.h"

+#include "tts_ios.h"
+
 #import <UIKit/UIKit.h>
 #include <dlfcn.h>
 #include <sys/sysctl.h>
@ -87,6 +89,41 @@ OSIPhone *OSIPhone::get_singleton() {
 	return (OSIPhone *)OS::get_singleton();
 };

+bool OSIPhone::tts_is_speaking() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return [tts isSpeaking];
+}
+
+bool OSIPhone::tts_is_paused() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return [tts isPaused];
+}
+
+Array OSIPhone::tts_get_voices() const {
+	ERR_FAIL_COND_V(!tts, Array());
+	return [tts getVoices];
+}
+
+void OSIPhone::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	ERR_FAIL_COND(!tts);
+	[tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt];
+}
+
+void OSIPhone::tts_pause() {
+	ERR_FAIL_COND(!tts);
+	[tts pauseSpeaking];
+}
+
+void OSIPhone::tts_resume() {
+	ERR_FAIL_COND(!tts);
+	[tts resumeSpeaking];
+}
+
+void OSIPhone::tts_stop() {
+	ERR_FAIL_COND(!tts);
+	[tts stopSpeaking];
+}
+
 void OSIPhone::set_data_dir(String p_dir) {
 	DirAccess *da = DirAccess::open(p_dir);

@ -163,6 +200,9 @@ Error OSIPhone::initialize(const VideoMode &p_desired, int p_video_driver, int p
 		visual_server = memnew(VisualServerWrapMT(visual_server, false));
 	}

+	// Init TTS
+	tts = [[TTS_IOS alloc] init];
+
 	visual_server->init();
 	//visual_server->cursor_set_visible(false, 0);

--- a/platform/iphone/tts_ios.h
+++ b/platform/iphone/tts_ios.h
@ -0,0 +1,63 @@
+/*************************************************************************/
+/*  tts_ios.h                                                            */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef TTS_IOS_H
+#define TTS_IOS_H
+
+#if __has_include(<AVFAudio/AVSpeechSynthesis.h>)
+#import <AVFAudio/AVSpeechSynthesis.h>
+#else
+#import <AVFoundation/AVFoundation.h>
+#endif
+
+#include "core/array.h"
+#include "core/list.h"
+#include "core/map.h"
+#include "core/os/os.h"
+#include "core/ustring.h"
+
+@interface TTS_IOS : NSObject <AVSpeechSynthesizerDelegate> {
+	bool speaking;
+	Map<id, int> ids;
+
+	AVSpeechSynthesizer *av_synth;
+	List<OS::TTSUtterance> queue;
+}
+
+- (void)pauseSpeaking;
+- (void)resumeSpeaking;
+- (void)stopSpeaking;
+- (bool)isSpeaking;
+- (bool)isPaused;
+- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt;
+- (Array)getVoices;
+@end
+
+#endif // TTS_IOS_H
--- a/platform/iphone/tts_ios.mm
+++ b/platform/iphone/tts_ios.mm
@ -0,0 +1,165 @@
+/*************************************************************************/
+/*  tts_ios.mm                                                           */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "tts_ios.h"
+
+@implementation TTS_IOS
+
+- (id)init {
+	self = [super init];
+	self->speaking = false;
+	self->av_synth = [[AVSpeechSynthesizer alloc] init];
+	[self->av_synth setDelegate:self];
+	print_verbose("Text-to-Speech: AVSpeechSynthesizer initialized.");
+	return self;
+}
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth willSpeakRangeOfSpeechString:(NSRange)characterRange utterance:(AVSpeechUtterance *)utterance {
+	NSString *string = [utterance speechString];
+
+	// Convert from UTF-16 to UTF-32 position.
+	int pos = 0;
+	for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) {
+		unichar c = [string characterAtIndex:i];
+		if ((c & 0xfffffc00) == 0xd800) {
+			i++;
+		}
+		pos++;
+	}
+
+	OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_BOUNDARY, ids[utterance], pos);
+}
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didCancelSpeechUtterance:(AVSpeechUtterance *)utterance {
+	OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, ids[utterance]);
+	ids.erase(utterance);
+	speaking = false;
+	[self update];
+}
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didFinishSpeechUtterance:(AVSpeechUtterance *)utterance {
+	OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_ENDED, ids[utterance]);
+	ids.erase(utterance);
+	speaking = false;
+	[self update];
+}
+
+- (void)update {
+	if (!speaking && queue.size() > 0) {
+		OS::TTSUtterance &message = queue.front()->get();
+
+		AVSpeechUtterance *new_utterance = [[AVSpeechUtterance alloc] initWithString:[NSString stringWithUTF8String:message.text.utf8().get_data()]];
+		[new_utterance setVoice:[AVSpeechSynthesisVoice voiceWithIdentifier:[NSString stringWithUTF8String:message.voice.utf8().get_data()]]];
+		if (message.rate > 1.f) {
+			[new_utterance setRate:Math::range_lerp(message.rate, 1.f, 10.f, AVSpeechUtteranceDefaultSpeechRate, AVSpeechUtteranceMaximumSpeechRate)];
+		} else if (message.rate < 1.f) {
+			[new_utterance setRate:Math::range_lerp(message.rate, 0.1f, 1.f, AVSpeechUtteranceMinimumSpeechRate, AVSpeechUtteranceDefaultSpeechRate)];
+		}
+		[new_utterance setPitchMultiplier:message.pitch];
+		[new_utterance setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))];
+
+		ids[new_utterance] = message.id;
+		[av_synth speakUtterance:new_utterance];
+
+		queue.pop_front();
+
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_STARTED, message.id);
+		speaking = true;
+	}
+}
+
+- (void)pauseSpeaking {
+	[av_synth pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate];
+}
+
+- (void)resumeSpeaking {
+	[av_synth continueSpeaking];
+}
+
+- (void)stopSpeaking {
+	for (List<OS::TTSUtterance>::Element *E = queue.front(); E; E = E->next()) {
+		OS::TTSUtterance &message = E->get();
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, message.id);
+	}
+	queue.clear();
+	[av_synth stopSpeakingAtBoundary:AVSpeechBoundaryImmediate];
+	speaking = false;
+}
+
+- (bool)isSpeaking {
+	return speaking || (queue.size() > 0);
+}
+
+- (bool)isPaused {
+	return [av_synth isPaused];
+}
+
+- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt {
+	if (interrupt) {
+		[self stopSpeaking];
+	}
+
+	if (text.empty()) {
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, utterance_id);
+		return;
+	}
+
+	OS::TTSUtterance message;
+	message.text = text;
+	message.voice = voice;
+	message.volume = CLAMP(volume, 0, 100);
+	message.pitch = CLAMP(pitch, 0.f, 2.f);
+	message.rate = CLAMP(rate, 0.1f, 10.f);
+	message.id = utterance_id;
+	queue.push_back(message);
+
+	if ([self isPaused]) {
+		[self resumeSpeaking];
+	} else {
+		[self update];
+	}
+}
+
+- (Array)getVoices {
+	Array list;
+	for (AVSpeechSynthesisVoice *voice in [AVSpeechSynthesisVoice speechVoices]) {
+		NSString *voiceIdentifierString = [voice identifier];
+		NSString *voiceLocaleIdentifier = [voice language];
+		NSString *voiceName = [voice name];
+		Dictionary voice_d;
+		voice_d["name"] = String::utf8([voiceName UTF8String]);
+		voice_d["id"] = String::utf8([voiceIdentifierString UTF8String]);
+		voice_d["language"] = String::utf8([voiceLocaleIdentifier UTF8String]);
+		list.push_back(voice_d);
+	}
+	return list;
+}
+
+@end
--- a/platform/javascript/godot_js.h
+++ b/platform/javascript/godot_js.h
@ -68,6 +68,15 @@ extern int godot_js_input_gamepad_sample_get(int p_idx, float r_btns[16], int32_
 extern void godot_js_input_paste_cb(void (*p_callback)(const char *p_text));
 extern void godot_js_input_drop_files_cb(void (*p_callback)(char **p_filev, int p_filec));

+// TTS
+extern int godot_js_tts_is_speaking();
+extern int godot_js_tts_is_paused();
+extern int godot_js_tts_get_voices(void (*p_callback)(int p_size, const char **p_voices));
+extern void godot_js_tts_speak(const char *p_text, const char *p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, void (*p_callback)(int p_event, int p_id, int p_pos));
+extern void godot_js_tts_pause();
+extern void godot_js_tts_resume();
+extern void godot_js_tts_stop();
+
 // Display
 extern int godot_js_display_screen_dpi_get();
 extern double godot_js_display_pixel_ratio_get();
@ -110,6 +119,7 @@ extern void godot_js_display_notification_cb(void (*p_callback)(int p_notificati

 // Display Virtual Keyboard
 extern int godot_js_display_vk_available();
+extern int godot_js_display_tts_available();
 extern void godot_js_display_vk_cb(void (*p_input)(const char *p_text, int p_cursor));
 extern void godot_js_display_vk_show(const char *p_text, int p_type, int p_start, int p_end);
 extern void godot_js_display_vk_hide();
--- a/platform/javascript/js/libs/library_godot_display.js
+++ b/platform/javascript/js/libs/library_godot_display.js
@ -329,7 +329,7 @@ mergeInto(LibraryManager.library, GodotDisplayScreen);
 /**
 * Display server interface.
 *
- * Exposes all the functions needed by DisplayServer implementation.
+ * Exposes all the functions needed by OS implementation.
 */
 const GodotDisplay = {
 	$GodotDisplay__deps: ['$GodotConfig', '$GodotRuntime', '$GodotDisplayCursor', '$GodotEventListeners', '$GodotDisplayScreen', '$GodotDisplayVK'],
@ -380,6 +380,91 @@ const GodotDisplay = {
 		return 0;
 	},

+	godot_js_tts_is_speaking__sig: 'i',
+	godot_js_tts_is_speaking: function () {
+		return window.speechSynthesis.speaking;
+	},
+
+	godot_js_tts_is_paused__sig: 'i',
+	godot_js_tts_is_paused: function () {
+		return window.speechSynthesis.paused;
+	},
+
+	godot_js_tts_get_voices__sig: 'vi',
+	godot_js_tts_get_voices: function (p_callback) {
+		const func = GodotRuntime.get_func(p_callback);
+		try {
+			const arr = [];
+			const voices = window.speechSynthesis.getVoices();
+			for (let i = 0; i < voices.length; i++) {
+				arr.push(`${voices[i].lang};${voices[i].name}`);
+			}
+			const c_ptr = GodotRuntime.allocStringArray(arr);
+			func(arr.length, c_ptr);
+			GodotRuntime.freeStringArray(c_ptr, arr.length);
+		} catch (e) {
+			// Fail graciously.
+		}
+	},
+
+	godot_js_tts_speak__sig: 'viiiffii',
+	godot_js_tts_speak: function (p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_callback) {
+		const func = GodotRuntime.get_func(p_callback);
+
+		function listener_end(evt) {
+			evt.currentTarget.cb(1 /*TTS_UTTERANCE_ENDED*/, evt.currentTarget.id, 0);
+		}
+
+		function listener_start(evt) {
+			evt.currentTarget.cb(0 /*TTS_UTTERANCE_STARTED*/, evt.currentTarget.id, 0);
+		}
+
+		function listener_error(evt) {
+			evt.currentTarget.cb(2 /*TTS_UTTERANCE_CANCELED*/, evt.currentTarget.id, 0);
+		}
+
+		function listener_bound(evt) {
+			evt.currentTarget.cb(3 /*TTS_UTTERANCE_BOUNDARY*/, evt.currentTarget.id, evt.charIndex);
+		}
+
+		const utterance = new SpeechSynthesisUtterance(GodotRuntime.parseString(p_text));
+		utterance.rate = p_rate;
+		utterance.pitch = p_pitch;
+		utterance.volume = p_volume / 100.0;
+		utterance.addEventListener('end', listener_end);
+		utterance.addEventListener('start', listener_start);
+		utterance.addEventListener('error', listener_error);
+		utterance.addEventListener('boundary', listener_bound);
+		utterance.id = p_utterance_id;
+		utterance.cb = func;
+		const voice = GodotRuntime.parseString(p_voice);
+		const voices = window.speechSynthesis.getVoices();
+		for (let i = 0; i < voices.length; i++) {
+			if (voices[i].name === voice) {
+				utterance.voice = voices[i];
+				break;
+			}
+		}
+		window.speechSynthesis.resume();
+		window.speechSynthesis.speak(utterance);
+	},
+
+	godot_js_tts_pause__sig: 'v',
+	godot_js_tts_pause: function () {
+		window.speechSynthesis.pause();
+	},
+
+	godot_js_tts_resume__sig: 'v',
+	godot_js_tts_resume: function () {
+		window.speechSynthesis.resume();
+	},
+
+	godot_js_tts_stop__sig: 'v',
+	godot_js_tts_stop: function () {
+		window.speechSynthesis.cancel();
+		window.speechSynthesis.resume();
+	},
+
 	godot_js_display_alert__sig: 'vi',
 	godot_js_display_alert: function (p_text) {
 		window.alert(GodotRuntime.parseString(p_text)); // eslint-disable-line no-alert
@ -676,6 +761,11 @@ const GodotDisplay = {
 		return GodotDisplayVK.available();
 	},

+	godot_js_display_tts_available__sig: 'i',
+	godot_js_display_tts_available: function () {
+		return 'speechSynthesis' in window;
+	},
+
 	godot_js_display_vk_cb__sig: 'vi',
 	godot_js_display_vk_cb: function (p_input_cb) {
 		const input_cb = GodotRuntime.get_func(p_input_cb);
--- a/platform/javascript/os_javascript.cpp
+++ b/platform/javascript/os_javascript.cpp
@ -64,6 +64,90 @@ void OS_JavaScript::request_quit_callback() {
 	}
 }

+bool OS_JavaScript::tts_is_speaking() const {
+	return godot_js_tts_is_speaking();
+}
+
+bool OS_JavaScript::tts_is_paused() const {
+	return godot_js_tts_is_paused();
+}
+
+void OS_JavaScript::update_voices_callback(int p_size, const char **p_voice) {
+	get_singleton()->voices.clear();
+	for (int i = 0; i < p_size; i++) {
+		Vector<String> tokens = String::utf8(p_voice[i]).split(";", true, 2);
+		if (tokens.size() == 2) {
+			Dictionary voice_d;
+			voice_d["name"] = tokens[1];
+			voice_d["id"] = tokens[1];
+			voice_d["language"] = tokens[0];
+			get_singleton()->voices.push_back(voice_d);
+		}
+	}
+}
+
+Array OS_JavaScript::tts_get_voices() const {
+	godot_js_tts_get_voices(update_voices_callback);
+	return voices;
+}
+
+void OS_JavaScript::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	if (p_interrupt) {
+		tts_stop();
+	}
+
+	if (p_text.empty()) {
+		tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, p_utterance_id);
+		return;
+	}
+
+	CharString string = p_text.utf8();
+	utterance_ids[p_utterance_id] = string;
+
+	godot_js_tts_speak(string.get_data(), p_voice.utf8().get_data(), CLAMP(p_volume, 0, 100), CLAMP(p_pitch, 0.f, 2.f), CLAMP(p_rate, 0.1f, 10.f), p_utterance_id, OS_JavaScript::_js_utterance_callback);
+}
+
+void OS_JavaScript::tts_pause() {
+	godot_js_tts_pause();
+}
+
+void OS_JavaScript::tts_resume() {
+	godot_js_tts_resume();
+}
+
+void OS_JavaScript::tts_stop() {
+	for (Map<int, CharString>::Element *E = utterance_ids.front(); E; E = E->next()) {
+		tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, E->key());
+	}
+	utterance_ids.clear();
+	godot_js_tts_stop();
+}
+
+void OS_JavaScript::_js_utterance_callback(int p_event, int p_id, int p_pos) {
+	OS_JavaScript *ds = (OS_JavaScript *)OS::get_singleton();
+	if (ds->utterance_ids.has(p_id)) {
+		int pos = 0;
+		if ((TTSUtteranceEvent)p_event == OS::TTS_UTTERANCE_BOUNDARY) {
+			// Convert position from UTF-8 to UTF-32.
+			const CharString &string = ds->utterance_ids[p_id];
+			for (int i = 0; i < MIN(p_pos, string.length()); i++) {
+				uint8_t c = string[i];
+				if ((c & 0xe0) == 0xc0) {
+					i += 1;
+				} else if ((c & 0xf0) == 0xe0) {
+					i += 2;
+				} else if ((c & 0xf8) == 0xf0) {
+					i += 3;
+				}
+				pos++;
+			}
+		} else if ((TTSUtteranceEvent)p_event != OS::TTS_UTTERANCE_STARTED) {
+			ds->utterance_ids.erase(p_id);
+		}
+		ds->tts_post_utterance_event((TTSUtteranceEvent)p_event, p_id, pos);
+	}
+}
+
 // Files drop (implemented in JS for now).
 void OS_JavaScript::drop_files_callback(char **p_filev, int p_filec) {
 	OS_JavaScript *os = get_singleton();
--- a/platform/javascript/os_javascript.h
+++ b/platform/javascript/os_javascript.h
@ -80,6 +80,9 @@ private:
 	bool idb_is_syncing;
 	bool pwa_is_waiting;

+	Map<int, CharString> utterance_ids;
+	Array voices;
+
 	static void fullscreen_change_callback(int p_fullscreen);
 	static int mouse_button_callback(int p_pressed, int p_button, double p_x, double p_y, int p_modifiers);
 	static void mouse_move_callback(double p_x, double p_y, double p_rel_x, double p_rel_y, int p_modifiers);
@ -100,6 +103,8 @@ private:
 	static void fs_sync_callback();
 	static void update_clipboard_callback(const char *p_text);
 	static void update_pwa_state_callback();
+	static void _js_utterance_callback(int p_event, int p_id, int p_pos);
+	static void update_voices_callback(int p_size, const char **p_voice);

 protected:
 	void resume_audio();
@ -124,6 +129,15 @@ public:
 	// Override return type to make writing static callbacks less tedious.
 	static OS_JavaScript *get_singleton();

+	virtual bool tts_is_speaking() const;
+	virtual bool tts_is_paused() const;
+	virtual Array tts_get_voices() const;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	virtual void tts_pause();
+	virtual void tts_resume();
+	virtual void tts_stop();
+
 	virtual bool has_virtual_keyboard() const;
 	virtual void show_virtual_keyboard(const String &p_existing_text, const Rect2 &p_screen_rect = Rect2(), VirtualKeyboardType p_type = KEYBOARD_TYPE_DEFAULT, int p_max_input_length = -1, int p_cursor_start = -1, int p_cursor_end = -1);
 	virtual void hide_virtual_keyboard();
--- a/platform/osx/SCsub
+++ b/platform/osx/SCsub
@ -10,6 +10,7 @@ files = [
    "os_osx.mm",
    "godot_main_osx.mm",
    "dir_access_osx.mm",
+    "tts_osx.mm",
    "joypad_osx.cpp",
    "power_osx.cpp",
 ]
--- a/platform/osx/os_osx.h
+++ b/platform/osx/os_osx.h
@ -151,6 +151,8 @@ public:

 	PowerOSX *power_manager;

+	id tts = nullptr;
+
 	CrashHandler crash_handler;

 	void _update_window();
@ -192,6 +194,15 @@ protected:
 public:
 	static OS_OSX *singleton;

+	virtual bool tts_is_speaking() const;
+	virtual bool tts_is_paused() const;
+	virtual Array tts_get_voices() const;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	virtual void tts_pause();
+	virtual void tts_resume();
+	virtual void tts_stop();
+
 	void global_menu_add_item(const String &p_menu, const String &p_label, const Variant &p_signal, const Variant &p_meta);
 	void global_menu_add_separator(const String &p_menu);
 	void global_menu_remove_item(const String &p_menu, int p_idx);
--- a/platform/osx/os_osx.mm
+++ b/platform/osx/os_osx.mm
@ -60,6 +60,8 @@
 #include <sys/types.h>
 #include <unistd.h>

+#include "tts_osx.h"
+
 #if MAC_OS_X_VERSION_MAX_ALLOWED < 101200
 #define NSEventMaskAny NSAnyEventMask
 #define NSEventTypeKeyDown NSKeyDown
@ -1574,6 +1576,41 @@ int OS_OSX::get_current_video_driver() const {
 	return video_driver_index;
 }

+bool OS_OSX::tts_is_speaking() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return [tts isSpeaking];
+}
+
+bool OS_OSX::tts_is_paused() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return [tts isPaused];
+}
+
+Array OS_OSX::tts_get_voices() const {
+	ERR_FAIL_COND_V(!tts, Array());
+	return [tts getVoices];
+}
+
+void OS_OSX::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	ERR_FAIL_COND(!tts);
+	[tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt];
+}
+
+void OS_OSX::tts_pause() {
+	ERR_FAIL_COND(!tts);
+	[tts pauseSpeaking];
+}
+
+void OS_OSX::tts_resume() {
+	ERR_FAIL_COND(!tts);
+	[tts resumeSpeaking];
+}
+
+void OS_OSX::tts_stop() {
+	ERR_FAIL_COND(!tts);
+	[tts stopSpeaking];
+}
+
 Error OS_OSX::initialize(const VideoMode &p_desired, int p_video_driver, int p_audio_driver) {
 	/*** OSX INITIALIZATION ***/
 	/*** OSX INITIALIZATION ***/
@ -1592,6 +1629,9 @@ Error OS_OSX::initialize(const VideoMode &p_desired, int p_video_driver, int p_a
 	// Register to be notified on displays arrangement changes
 	CGDisplayRegisterReconfigurationCallback(displays_arrangement_changed, NULL);

+	// Init TTS
+	tts = [[TTS_OSX alloc] init];
+
 	window_delegate = [[GodotWindowDelegate alloc] init];

 	// Don't use accumulation buffer support; it's not accelerated
--- a/platform/osx/tts_osx.h
+++ b/platform/osx/tts_osx.h
@ -0,0 +1,71 @@
+/*************************************************************************/
+/*  tts_osx.h                                                            */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef TTS_OSX_H
+#define TTS_OSX_H
+
+#include "core/array.h"
+#include "core/list.h"
+#include "core/map.h"
+#include "core/os/os.h"
+#include "core/ustring.h"
+
+#import <AppKit/AppKit.h>
+
+#if __has_include(<AVFAudio/AVSpeechSynthesis.h>)
+#import <AVFAudio/AVSpeechSynthesis.h>
+#else
+#import <AVFoundation/AVFoundation.h>
+#endif
+
+@interface TTS_OSX : NSObject <AVSpeechSynthesizerDelegate> {
+	// AVSpeechSynthesizer
+	bool speaking;
+	Map<id, int> ids;
+
+	// NSSpeechSynthesizer
+	bool paused;
+	bool have_utterance;
+	int last_utterance;
+
+	id synth; // NSSpeechSynthesizer or AVSpeechSynthesizer
+	List<OS::TTSUtterance> queue;
+}
+
+- (void)pauseSpeaking;
+- (void)resumeSpeaking;
+- (void)stopSpeaking;
+- (bool)isSpeaking;
+- (bool)isPaused;
+- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt;
+- (Array)getVoices;
+@end
+
+#endif // TTS_OSX_H
--- a/platform/osx/tts_osx.mm
+++ b/platform/osx/tts_osx.mm
@ -0,0 +1,267 @@
+/*************************************************************************/
+/*  tts_osx.mm                                                           */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "tts_osx.h"
+
+@implementation TTS_OSX
+
+- (id)init {
+	self = [super init];
+	self->speaking = false;
+	self->have_utterance = false;
+	self->last_utterance = -1;
+	self->paused = false;
+	if (@available(macOS 10.14, *)) {
+		self->synth = [[AVSpeechSynthesizer alloc] init];
+		[self->synth setDelegate:self];
+		print_verbose("Text-to-Speech: AVSpeechSynthesizer initialized.");
+	} else {
+		self->synth = [[NSSpeechSynthesizer alloc] init];
+		[self->synth setDelegate:self];
+		print_verbose("Text-to-Speech: NSSpeechSynthesizer initialized.");
+	}
+	return self;
+}
+
+// AVSpeechSynthesizer callback (macOS 10.14+)
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth willSpeakRangeOfSpeechString:(NSRange)characterRange utterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) {
+	NSString *string = [utterance speechString];
+
+	// Convert from UTF-16 to UTF-32 position.
+	int pos = 0;
+	for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) {
+		unichar c = [string characterAtIndex:i];
+		if ((c & 0xfffffc00) == 0xd800) {
+			i++;
+		}
+		pos++;
+	}
+
+	OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_BOUNDARY, ids[utterance], pos);
+}
+
+// AVSpeechSynthesizer callback (macOS 10.14+)
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didCancelSpeechUtterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) {
+	OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, ids[utterance]);
+	ids.erase(utterance);
+	speaking = false;
+	[self update];
+}
+
+// AVSpeechSynthesizer callback (macOS 10.14+)
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didFinishSpeechUtterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) {
+	OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_ENDED, ids[utterance]);
+	ids.erase(utterance);
+	speaking = false;
+	[self update];
+}
+
+// NSSpeechSynthesizer callback (macOS 10.4+)
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer *)ns_synth willSpeakWord:(NSRange)characterRange ofString:(NSString *)string {
+	if (!paused && have_utterance) {
+		// Convert from UTF-16 to UTF-32 position.
+		int pos = 0;
+		for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) {
+			unichar c = [string characterAtIndex:i];
+			if ((c & 0xfffffc00) == 0xd800) {
+				i++;
+			}
+			pos++;
+		}
+
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_BOUNDARY, last_utterance, pos);
+	}
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer *)ns_synth didFinishSpeaking:(BOOL)success {
+	if (!paused && have_utterance) {
+		if (success) {
+			OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_ENDED, last_utterance);
+		} else {
+			OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, last_utterance);
+		}
+		have_utterance = false;
+	}
+	speaking = false;
+	[self update];
+}
+
+- (void)update {
+	if (!speaking && queue.size() > 0) {
+		OS::TTSUtterance &message = queue.front()->get();
+
+		if (@available(macOS 10.14, *)) {
+			AVSpeechSynthesizer *av_synth = synth;
+			AVSpeechUtterance *new_utterance = [[AVSpeechUtterance alloc] initWithString:[NSString stringWithUTF8String:message.text.utf8().get_data()]];
+			[new_utterance setVoice:[AVSpeechSynthesisVoice voiceWithIdentifier:[NSString stringWithUTF8String:message.voice.utf8().get_data()]]];
+			if (message.rate > 1.f) {
+				[new_utterance setRate:Math::range_lerp(message.rate, 1.f, 10.f, AVSpeechUtteranceDefaultSpeechRate, AVSpeechUtteranceMaximumSpeechRate)];
+			} else if (message.rate < 1.f) {
+				[new_utterance setRate:Math::range_lerp(message.rate, 0.1f, 1.f, AVSpeechUtteranceMinimumSpeechRate, AVSpeechUtteranceDefaultSpeechRate)];
+			}
+			[new_utterance setPitchMultiplier:message.pitch];
+			[new_utterance setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))];
+
+			ids[new_utterance] = message.id;
+			[av_synth speakUtterance:new_utterance];
+		} else {
+			NSSpeechSynthesizer *ns_synth = synth;
+			[ns_synth setObject:nil forProperty:NSSpeechResetProperty error:nil];
+			[ns_synth setVoice:[NSString stringWithUTF8String:message.voice.utf8().get_data()]];
+			int base_pitch = [[ns_synth objectForProperty:NSSpeechPitchBaseProperty error:nil] intValue];
+			[ns_synth setObject:[NSNumber numberWithInt:(base_pitch * (message.pitch / 2.f + 0.5f))] forProperty:NSSpeechPitchBaseProperty error:nullptr];
+			[ns_synth setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))];
+			[ns_synth setRate:(message.rate * 200)];
+
+			last_utterance = message.id;
+			have_utterance = true;
+			[ns_synth startSpeakingString:[NSString stringWithUTF8String:message.text.utf8().get_data()]];
+		}
+		queue.pop_front();
+
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_STARTED, message.id);
+		speaking = true;
+	}
+}
+
+- (void)pauseSpeaking {
+	if (@available(macOS 10.14, *)) {
+		AVSpeechSynthesizer *av_synth = synth;
+		[av_synth pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate];
+	} else {
+		NSSpeechSynthesizer *ns_synth = synth;
+		[ns_synth pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
+	}
+	paused = true;
+}
+
+- (void)resumeSpeaking {
+	if (@available(macOS 10.14, *)) {
+		AVSpeechSynthesizer *av_synth = synth;
+		[av_synth continueSpeaking];
+	} else {
+		NSSpeechSynthesizer *ns_synth = synth;
+		[ns_synth continueSpeaking];
+	}
+	paused = false;
+}
+
+- (void)stopSpeaking {
+	for (List<OS::TTSUtterance>::Element *E = queue.front(); E; E = E->next()) {
+		OS::TTSUtterance &message = E->get();
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, message.id);
+	}
+	queue.clear();
+	if (@available(macOS 10.14, *)) {
+		AVSpeechSynthesizer *av_synth = synth;
+		[av_synth stopSpeakingAtBoundary:AVSpeechBoundaryImmediate];
+	} else {
+		NSSpeechSynthesizer *ns_synth = synth;
+		if (have_utterance) {
+			OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, last_utterance);
+		}
+		[ns_synth stopSpeaking];
+	}
+	have_utterance = false;
+	speaking = false;
+	paused = false;
+}
+
+- (bool)isSpeaking {
+	return speaking || (queue.size() > 0);
+}
+
+- (bool)isPaused {
+	if (@available(macOS 10.14, *)) {
+		AVSpeechSynthesizer *av_synth = synth;
+		return [av_synth isPaused];
+	} else {
+		return paused;
+	}
+}
+
+- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt {
+	if (interrupt) {
+		[self stopSpeaking];
+	}
+
+	if (text.empty()) {
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, utterance_id);
+		return;
+	}
+
+	OS::TTSUtterance message;
+	message.text = text;
+	message.voice = voice;
+	message.volume = CLAMP(volume, 0, 100);
+	message.pitch = CLAMP(pitch, 0.f, 2.f);
+	message.rate = CLAMP(rate, 0.1f, 10.f);
+	message.id = utterance_id;
+	queue.push_back(message);
+
+	if ([self isPaused]) {
+		[self resumeSpeaking];
+	} else {
+		[self update];
+	}
+}
+
+- (Array)getVoices {
+	Array list;
+	if (@available(macOS 10.14, *)) {
+		for (AVSpeechSynthesisVoice *voice in [AVSpeechSynthesisVoice speechVoices]) {
+			NSString *voiceIdentifierString = [voice identifier];
+			NSString *voiceLocaleIdentifier = [voice language];
+			NSString *voiceName = [voice name];
+			Dictionary voice_d;
+			voice_d["name"] = String::utf8([voiceName UTF8String]);
+			voice_d["id"] = String::utf8([voiceIdentifierString UTF8String]);
+			voice_d["language"] = String::utf8([voiceLocaleIdentifier UTF8String]);
+			list.push_back(voice_d);
+		}
+	} else {
+		for (NSString *voiceIdentifierString in [NSSpeechSynthesizer availableVoices]) {
+			NSString *voiceLocaleIdentifier = [[NSSpeechSynthesizer attributesForVoice:voiceIdentifierString] objectForKey:NSVoiceLocaleIdentifier];
+			NSString *voiceName = [[NSSpeechSynthesizer attributesForVoice:voiceIdentifierString] objectForKey:NSVoiceName];
+			Dictionary voice_d;
+			voice_d["name"] = String([voiceName UTF8String]);
+			voice_d["id"] = String([voiceIdentifierString UTF8String]);
+			voice_d["language"] = String([voiceLocaleIdentifier UTF8String]);
+			list.push_back(voice_d);
+		}
+	}
+	return list;
+}
+
+@end
--- a/platform/windows/SCsub
+++ b/platform/windows/SCsub
@ -13,6 +13,7 @@ common_win = [
    "os_windows.cpp",
    "key_mapping_windows.cpp",
    "joypad_windows.cpp",
+    "tts_windows.cpp",
    "power_windows.cpp",
    "windows_terminal_logger.cpp",
 ]
--- a/platform/windows/detect.py
+++ b/platform/windows/detect.py
@ -256,6 +256,7 @@ def configure_msvc(env, manual_msvc_config):
        "kernel32",
        "ole32",
        "oleaut32",
+        "sapi",
        "user32",
        "gdi32",
        "IPHLPAPI",
@ -427,6 +428,7 @@ def configure_mingw(env):
            "ws2_32",
            "kernel32",
            "oleaut32",
+            "sapi",
            "dinput8",
            "dxguid",
            "ksuser",
--- a/platform/windows/os_windows.cpp
+++ b/platform/windows/os_windows.cpp
@ -255,6 +255,41 @@ void OS_Windows::_touch_event(bool p_pressed, float p_x, float p_y, int idx) {
 	}
 };

+bool OS_Windows::tts_is_speaking() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return tts->is_speaking();
+}
+
+bool OS_Windows::tts_is_paused() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return tts->is_paused();
+}
+
+Array OS_Windows::tts_get_voices() const {
+	ERR_FAIL_COND_V(!tts, Array());
+	return tts->get_voices();
+}
+
+void OS_Windows::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	ERR_FAIL_COND(!tts);
+	tts->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
+}
+
+void OS_Windows::tts_pause() {
+	ERR_FAIL_COND(!tts);
+	tts->pause();
+}
+
+void OS_Windows::tts_resume() {
+	ERR_FAIL_COND(!tts);
+	tts->resume();
+}
+
+void OS_Windows::tts_stop() {
+	ERR_FAIL_COND(!tts);
+	tts->stop();
+}
+
 void OS_Windows::_drag_event(float p_x, float p_y, int idx) {
 	Map<int, Vector2>::Element *curr = touch_state.find(idx);
 	// Defensive
@ -1352,6 +1387,9 @@ Error OS_Windows::initialize(const VideoMode &p_desired, int p_video_driver, int
 		return ERR_UNAVAILABLE;
 	}

+	// Init TTS
+	tts = memnew(TTS_Windows);
+
 	use_raw_input = true;

 	RAWINPUTDEVICE Rid[1];
@ -1782,6 +1820,11 @@ void OS_Windows::finalize() {
 	if (restore_mouse_trails > 1) {
 		SystemParametersInfoA(SPI_SETMOUSETRAILS, restore_mouse_trails, 0, 0);
 	}
+
+	if (tts) {
+		memdelete(tts);
+	}
+	CoUninitialize();
 }

 void OS_Windows::finalize_core() {
--- a/platform/windows/os_windows.h
+++ b/platform/windows/os_windows.h
@ -48,6 +48,7 @@
 #ifdef XAUDIO2_ENABLED
 #include "drivers/xaudio2/audio_driver_xaudio2.h"
 #endif
+#include "tts_windows.h"

 #include <dwmapi.h>
 #include <fcntl.h>
@ -313,6 +314,8 @@ class OS_Windows : public OS {
 	uint64_t ticks_start;
 	uint64_t ticks_per_second;

+	TTS_Windows *tts = nullptr;
+
 	bool old_invalid;
 	bool outside;
 	int old_x, old_y;
@ -439,6 +442,15 @@ public:
 	void set_mouse_mode(MouseMode p_mode);
 	MouseMode get_mouse_mode() const;

+	virtual bool tts_is_speaking() const;
+	virtual bool tts_is_paused() const;
+	virtual Array tts_get_voices() const;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	virtual void tts_pause();
+	virtual void tts_resume();
+	virtual void tts_stop();
+
 	virtual void warp_mouse_position(const Point2 &p_to);
 	virtual Point2 get_mouse_position() const;
 	void update_real_mouse_position();
--- a/platform/windows/tts_windows.cpp
+++ b/platform/windows/tts_windows.cpp
@ -0,0 +1,261 @@
+/*************************************************************************/
+/*  tts_windows.cpp                                                      */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "tts_windows.h"
+
+TTS_Windows *TTS_Windows::singleton = nullptr;
+
+void __stdcall TTS_Windows::speech_event_callback(WPARAM wParam, LPARAM lParam) {
+	TTS_Windows *tts = TTS_Windows::get_singleton();
+	SPEVENT event;
+	while (tts->synth->GetEvents(1, &event, NULL) == S_OK) {
+		if (tts->ids.has(event.ulStreamNum)) {
+			if (event.eEventId == SPEI_START_INPUT_STREAM) {
+				OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_STARTED, tts->ids[event.ulStreamNum].id);
+			} else if (event.eEventId == SPEI_END_INPUT_STREAM) {
+				OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_ENDED, tts->ids[event.ulStreamNum].id);
+				tts->ids.erase(event.ulStreamNum);
+				tts->_update_tts();
+			} else if (event.eEventId == SPEI_WORD_BOUNDARY) {
+				OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_BOUNDARY, tts->ids[event.ulStreamNum].id, event.lParam - tts->ids[event.ulStreamNum].offset);
+			}
+		}
+	}
+}
+
+void TTS_Windows::_update_tts() {
+	if (!is_speaking() && !paused && queue.size() > 0) {
+		OS::TTSUtterance &message = queue.front()->get();
+
+		String text;
+		DWORD flags = SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_IS_XML;
+		String pitch_tag = String("<pitch absmiddle=\"") + String::num_int64(message.pitch * 10 - 10, 10) + String("\">");
+		text = pitch_tag + message.text + String("</pitch>");
+
+		IEnumSpObjectTokens *cpEnum;
+		ISpObjectToken *cpVoiceToken;
+		ULONG ulCount = 0;
+		ULONG stream_number = 0;
+		ISpObjectTokenCategory *cpCategory;
+		HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory);
+		if (SUCCEEDED(hr)) {
+			hr = cpCategory->SetId(SPCAT_VOICES, false);
+			if (SUCCEEDED(hr)) {
+				hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum);
+				if (SUCCEEDED(hr)) {
+					hr = cpEnum->GetCount(&ulCount);
+					while (SUCCEEDED(hr) && ulCount--) {
+						wchar_t *w_id = 0L;
+						hr = cpEnum->Next(1, &cpVoiceToken, nullptr);
+						cpVoiceToken->GetId(&w_id);
+						if (String((const wchar_t *)w_id) == message.voice) {
+							synth->SetVoice(cpVoiceToken);
+							cpVoiceToken->Release();
+							break;
+						}
+						cpVoiceToken->Release();
+					}
+					cpEnum->Release();
+				}
+			}
+			cpCategory->Release();
+		}
+
+		UTData ut;
+		ut.string = text;
+		ut.offset = pitch_tag.length(); // Substract injected <pitch> tag offset.
+		ut.id = message.id;
+
+		synth->SetVolume(message.volume);
+		synth->SetRate(10.f * log10(message.rate) / log10(3.f));
+		synth->Speak((LPCWSTR)ut.string.ptr(), flags, &stream_number);
+
+		ids[stream_number] = ut;
+
+		queue.pop_front();
+	}
+}
+
+bool TTS_Windows::is_speaking() const {
+	ERR_FAIL_COND_V(!synth, false);
+
+	SPVOICESTATUS status;
+	synth->GetStatus(&status, nullptr);
+	return (status.dwRunningState == SPRS_IS_SPEAKING);
+}
+
+bool TTS_Windows::is_paused() const {
+	ERR_FAIL_COND_V(!synth, false);
+	return paused;
+}
+
+Array TTS_Windows::get_voices() const {
+	Array list;
+	IEnumSpObjectTokens *cpEnum;
+	ISpObjectToken *cpVoiceToken;
+	ISpDataKey *cpDataKeyAttribs;
+	ULONG ulCount = 0;
+	ISpObjectTokenCategory *cpCategory;
+	HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory);
+	if (SUCCEEDED(hr)) {
+		hr = cpCategory->SetId(SPCAT_VOICES, false);
+		if (SUCCEEDED(hr)) {
+			hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum);
+			if (SUCCEEDED(hr)) {
+				hr = cpEnum->GetCount(&ulCount);
+				while (SUCCEEDED(hr) && ulCount--) {
+					hr = cpEnum->Next(1, &cpVoiceToken, nullptr);
+					HRESULT hr_attr = cpVoiceToken->OpenKey(SPTOKENKEY_ATTRIBUTES, &cpDataKeyAttribs);
+					if (SUCCEEDED(hr_attr)) {
+						wchar_t *w_id = nullptr;
+						wchar_t *w_lang = nullptr;
+						wchar_t *w_name = nullptr;
+						cpVoiceToken->GetId(&w_id);
+						cpDataKeyAttribs->GetStringValue(L"Language", &w_lang);
+						cpDataKeyAttribs->GetStringValue(nullptr, &w_name);
+						LCID locale = wcstol(w_lang, nullptr, 16);
+
+						int locale_chars = GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, nullptr, 0);
+						int region_chars = GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, nullptr, 0);
+						wchar_t *w_lang_code = new wchar_t[locale_chars];
+						wchar_t *w_reg_code = new wchar_t[region_chars];
+						GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, w_lang_code, locale_chars);
+						GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, w_reg_code, region_chars);
+
+						Dictionary voice_d;
+						voice_d["id"] = String((const wchar_t *)w_id);
+						if (w_name) {
+							voice_d["name"] = String((const wchar_t *)w_name);
+						} else {
+							voice_d["name"] = voice_d["id"].operator String().replace("HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\", "");
+						}
+						voice_d["language"] = String((const wchar_t *)w_lang_code) + "_" + String((const wchar_t *)w_reg_code);
+						list.push_back(voice_d);
+
+						delete[] w_lang_code;
+						delete[] w_reg_code;
+
+						cpDataKeyAttribs->Release();
+					}
+					cpVoiceToken->Release();
+				}
+				cpEnum->Release();
+			}
+		}
+		cpCategory->Release();
+	}
+	return list;
+}
+
+void TTS_Windows::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	ERR_FAIL_COND(!synth);
+	if (p_interrupt) {
+		stop();
+	}
+
+	if (p_text.empty()) {
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, p_utterance_id);
+		return;
+	}
+
+	OS::TTSUtterance message;
+	message.text = p_text;
+	message.voice = p_voice;
+	message.volume = CLAMP(p_volume, 0, 100);
+	message.pitch = CLAMP(p_pitch, 0.f, 2.f);
+	message.rate = CLAMP(p_rate, 0.1f, 10.f);
+	message.id = p_utterance_id;
+	queue.push_back(message);
+
+	if (is_paused()) {
+		resume();
+	} else {
+		_update_tts();
+	}
+}
+
+void TTS_Windows::pause() {
+	ERR_FAIL_COND(!synth);
+	if (!paused) {
+		if (synth->Pause() == S_OK) {
+			paused = true;
+		}
+	}
+}
+
+void TTS_Windows::resume() {
+	ERR_FAIL_COND(!synth);
+	synth->Resume();
+	paused = false;
+}
+
+void TTS_Windows::stop() {
+	ERR_FAIL_COND(!synth);
+
+	SPVOICESTATUS status;
+	synth->GetStatus(&status, nullptr);
+	if (ids.has(status.ulCurrentStream)) {
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, ids[status.ulCurrentStream].id);
+		ids.erase(status.ulCurrentStream);
+	}
+	for (List<OS::TTSUtterance>::Element *E = queue.front(); E; E = E->next()) {
+		OS::TTSUtterance &message = E->get();
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, message.id);
+	}
+	queue.clear();
+	synth->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr);
+	synth->Resume();
+	paused = false;
+}
+
+TTS_Windows *TTS_Windows::get_singleton() {
+	return singleton;
+}
+
+TTS_Windows::TTS_Windows() {
+	singleton = this;
+	CoInitialize(nullptr);
+
+	if (SUCCEEDED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice, (void **)&synth))) {
+		ULONGLONG event_mask = SPFEI(SPEI_END_INPUT_STREAM) | SPFEI(SPEI_START_INPUT_STREAM) | SPFEI(SPEI_WORD_BOUNDARY);
+		synth->SetInterest(event_mask, event_mask);
+		synth->SetNotifyCallbackFunction(&speech_event_callback, (WPARAM)(this), 0);
+		print_verbose("Text-to-Speech: SAPI initialized.");
+	} else {
+		print_verbose("Text-to-Speech: Cannot initialize ISpVoice!");
+	}
+}
+
+TTS_Windows::~TTS_Windows() {
+	if (synth) {
+		synth->Release();
+	}
+	singleton = nullptr;
+}
--- a/platform/windows/tts_windows.h
+++ b/platform/windows/tts_windows.h
@ -0,0 +1,80 @@
+/*************************************************************************/
+/*  tts_windows.h                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef TTS_WINDOWS_H
+#define TTS_WINDOWS_H
+
+#include "core/array.h"
+#include "core/list.h"
+#include "core/map.h"
+#include "core/os/os.h"
+#include "core/ustring.h"
+
+#include <objbase.h>
+#include <sapi.h>
+#include <wchar.h>
+#include <winnls.h>
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+class TTS_Windows {
+	List<OS::TTSUtterance> queue;
+	ISpVoice *synth = nullptr;
+	bool paused = false;
+	struct UTData {
+		String string;
+		int offset;
+		int id;
+	};
+	Map<ULONG, UTData> ids;
+
+	static void __stdcall speech_event_callback(WPARAM wParam, LPARAM lParam);
+	void _update_tts();
+
+	static TTS_Windows *singleton;
+
+public:
+	static TTS_Windows *get_singleton();
+
+	bool is_speaking() const;
+	bool is_paused() const;
+	Array get_voices() const;
+
+	void speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	void pause();
+	void resume();
+	void stop();
+
+	TTS_Windows();
+	~TTS_Windows();
+};
+
+#endif // TTS_WINDOWS_H
--- a/platform/x11/SCsub
+++ b/platform/x11/SCsub
@ -18,6 +18,9 @@ common_x11 = [
 if "udev" in env and env["udev"]:
    common_x11.append("libudev-so_wrap.c")

+if "speechd" in env and env["speechd"]:
+    common_x11.append(["speechd-so_wrap.c", "tts_linux.cpp"])
+
 prog = env.add_program("#bin/godot", ["godot_x11.cpp"] + common_x11)

 if env["debug_symbols"] and env["separate_debug_symbols"]:
--- a/platform/x11/detect.py
+++ b/platform/x11/detect.py
@ -75,6 +75,7 @@ def get_opts():
        BoolVariable("use_tsan", "Use LLVM/GCC compiler thread sanitizer (TSAN))", False),
        BoolVariable("use_msan", "Use LLVM/GCC compiler memory sanitizer (MSAN))", False),
        BoolVariable("pulseaudio", "Detect and use PulseAudio", True),
+        BoolVariable("speechd", "Detect and use Speech Dispatcher for Text-to-Speech support", True),
        BoolVariable("udev", "Use udev for gamepad connection callbacks", True),
        BoolVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", True),
        BoolVariable("separate_debug_symbols", "Create a separate file containing debugging symbols", False),
@ -370,6 +371,13 @@ def configure(env):
        else:
            print("Warning: PulseAudio development libraries not found. Disabling the PulseAudio audio driver.")

+    if env["speechd"]:
+        if os.system("pkg-config --exists speech-dispatcher") == 0:  # 0 means found
+            env.Append(CPPDEFINES=["SPEECHD_ENABLED"])
+            env.ParseConfig("pkg-config speech-dispatcher --cflags")  # Only cflags, we dlopen the library.
+        else:
+            print("Warning: Speech Dispatcher development libraries not found. Disabling Text-to-Speech support.")
+
    if platform.system() == "Linux":
        env.Append(CPPDEFINES=["JOYDEV_ENABLED"])
        if env["udev"]:
--- a/platform/x11/os_x11.cpp
+++ b/platform/x11/os_x11.cpp
@ -107,6 +107,45 @@ static String get_atom_name(Display *p_disp, Atom p_atom) {
 	return ret;
 }

+#ifdef SPEECHD_ENABLED
+
+bool OS_X11::tts_is_speaking() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return tts->is_speaking();
+}
+
+bool OS_X11::tts_is_paused() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return tts->is_paused();
+}
+
+Array OS_X11::tts_get_voices() const {
+	ERR_FAIL_COND_V(!tts, Array());
+	return tts->get_voices();
+}
+
+void OS_X11::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	ERR_FAIL_COND(!tts);
+	tts->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
+}
+
+void OS_X11::tts_pause() {
+	ERR_FAIL_COND(!tts);
+	tts->pause();
+}
+
+void OS_X11::tts_resume() {
+	ERR_FAIL_COND(!tts);
+	tts->resume();
+}
+
+void OS_X11::tts_stop() {
+	ERR_FAIL_COND(!tts);
+	tts->stop();
+}
+
+#endif
+
 void OS_X11::initialize_core() {
 	crash_handler.initialize();

@ -380,6 +419,11 @@ Error OS_X11::initialize(const VideoMode &p_desired, int p_video_driver, int p_a

 #endif

+#ifdef SPEECHD_ENABLED
+	// Init TTS
+	tts = memnew(TTS_Linux);
+#endif
+
 	visual_server = memnew(VisualServerRaster);
 	if (get_render_thread_mode() != RENDER_THREAD_UNSAFE) {
 		visual_server = memnew(VisualServerWrapMT(visual_server, get_render_thread_mode() == RENDER_SEPARATE_THREAD));
@ -849,6 +893,10 @@ void OS_X11::finalize() {
 	driver_alsamidi.close();
 #endif

+#ifdef SPEECHD_ENABLED
+	memdelete(tts);
+#endif
+
 #ifdef JOYDEV_ENABLED
 	memdelete(joypad);
 #endif
--- a/platform/x11/os_x11.h
+++ b/platform/x11/os_x11.h
@ -52,6 +52,10 @@
 #include <X11/extensions/Xrandr.h>
 #include <X11/keysym.h>

+#if defined(SPEECHD_ENABLED)
+#include "tts_linux.h"
+#endif
+
 // Hints for X11 fullscreen
 typedef struct {
 	unsigned long flags;
@ -214,6 +218,10 @@ class OS_X11 : public OS_Unix {
 	AudioDriverPulseAudio driver_pulseaudio;
 #endif

+#ifdef SPEECHD_ENABLED
+	TTS_Linux *tts = nullptr;
+#endif
+
 	PowerX11 *power_manager;

 	bool layered_window;
@ -255,6 +263,17 @@ protected:
 public:
 	virtual String get_name() const;

+#ifdef SPEECHD_ENABLED
+	virtual bool tts_is_speaking() const;
+	virtual bool tts_is_paused() const;
+	virtual Array tts_get_voices() const;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	virtual void tts_pause();
+	virtual void tts_resume();
+	virtual void tts_stop();
+#endif
+
 	virtual void set_cursor_shape(CursorShape p_shape);
 	virtual CursorShape get_cursor_shape() const;
 	virtual void set_custom_mouse_cursor(const RES &p_cursor, CursorShape p_shape, const Vector2 &p_hotspot);
--- a/platform/x11/speechd-so_wrap.c
+++ b/platform/x11/speechd-so_wrap.c
@ -0,0 +1,881 @@
+// This file is generated. Do not edit!
+// see https://github.com/hpvb/dynload-wrapper for details
+// generated by ./dynload-wrapper/generate-wrapper.py 0.3 on 2022-04-28 14:34:21
+// flags: ./dynload-wrapper/generate-wrapper.py --sys-include <libspeechd.h> --include /usr/include/speech-dispatcher/libspeechd.h --soname libspeechd.so.2 --init-name speechd --omit-prefix spd_get_client_list --output-header speechd-so_wrap.h --output-implementation speechd-so_wrap.c
+//
+#include <stdint.h>
+
+#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_orig_speechd
+#define spd_get_default_address spd_get_default_address_dylibloader_orig_speechd
+#define spd_open spd_open_dylibloader_orig_speechd
+#define spd_open2 spd_open2_dylibloader_orig_speechd
+#define spd_close spd_close_dylibloader_orig_speechd
+#define spd_say spd_say_dylibloader_orig_speechd
+#define spd_sayf spd_sayf_dylibloader_orig_speechd
+#define spd_stop spd_stop_dylibloader_orig_speechd
+#define spd_stop_all spd_stop_all_dylibloader_orig_speechd
+#define spd_stop_uid spd_stop_uid_dylibloader_orig_speechd
+#define spd_cancel spd_cancel_dylibloader_orig_speechd
+#define spd_cancel_all spd_cancel_all_dylibloader_orig_speechd
+#define spd_cancel_uid spd_cancel_uid_dylibloader_orig_speechd
+#define spd_pause spd_pause_dylibloader_orig_speechd
+#define spd_pause_all spd_pause_all_dylibloader_orig_speechd
+#define spd_pause_uid spd_pause_uid_dylibloader_orig_speechd
+#define spd_resume spd_resume_dylibloader_orig_speechd
+#define spd_resume_all spd_resume_all_dylibloader_orig_speechd
+#define spd_resume_uid spd_resume_uid_dylibloader_orig_speechd
+#define spd_key spd_key_dylibloader_orig_speechd
+#define spd_char spd_char_dylibloader_orig_speechd
+#define spd_wchar spd_wchar_dylibloader_orig_speechd
+#define spd_sound_icon spd_sound_icon_dylibloader_orig_speechd
+#define spd_set_voice_type spd_set_voice_type_dylibloader_orig_speechd
+#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_orig_speechd
+#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_orig_speechd
+#define spd_get_voice_type spd_get_voice_type_dylibloader_orig_speechd
+#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_orig_speechd
+#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_orig_speechd
+#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_orig_speechd
+#define spd_set_data_mode spd_set_data_mode_dylibloader_orig_speechd
+#define spd_set_notification_on spd_set_notification_on_dylibloader_orig_speechd
+#define spd_set_notification_off spd_set_notification_off_dylibloader_orig_speechd
+#define spd_set_notification spd_set_notification_dylibloader_orig_speechd
+#define spd_set_voice_rate spd_set_voice_rate_dylibloader_orig_speechd
+#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_orig_speechd
+#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_orig_speechd
+#define spd_get_voice_rate spd_get_voice_rate_dylibloader_orig_speechd
+#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_orig_speechd
+#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_orig_speechd
+#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_orig_speechd
+#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_orig_speechd
+#define spd_set_volume spd_set_volume_dylibloader_orig_speechd
+#define spd_set_volume_all spd_set_volume_all_dylibloader_orig_speechd
+#define spd_set_volume_uid spd_set_volume_uid_dylibloader_orig_speechd
+#define spd_get_volume spd_get_volume_dylibloader_orig_speechd
+#define spd_set_punctuation spd_set_punctuation_dylibloader_orig_speechd
+#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_orig_speechd
+#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_orig_speechd
+#define spd_set_capital_letters spd_set_capital_letters_dylibloader_orig_speechd
+#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_orig_speechd
+#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_orig_speechd
+#define spd_set_spelling spd_set_spelling_dylibloader_orig_speechd
+#define spd_set_spelling_all spd_set_spelling_all_dylibloader_orig_speechd
+#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_orig_speechd
+#define spd_set_language spd_set_language_dylibloader_orig_speechd
+#define spd_set_language_all spd_set_language_all_dylibloader_orig_speechd
+#define spd_set_language_uid spd_set_language_uid_dylibloader_orig_speechd
+#define spd_get_language spd_get_language_dylibloader_orig_speechd
+#define spd_set_output_module spd_set_output_module_dylibloader_orig_speechd
+#define spd_set_output_module_all spd_set_output_module_all_dylibloader_orig_speechd
+#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_orig_speechd
+#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_orig_speechd
+#define spd_list_modules spd_list_modules_dylibloader_orig_speechd
+#define free_spd_modules free_spd_modules_dylibloader_orig_speechd
+#define spd_get_output_module spd_get_output_module_dylibloader_orig_speechd
+#define spd_list_voices spd_list_voices_dylibloader_orig_speechd
+#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_orig_speechd
+#define free_spd_voices free_spd_voices_dylibloader_orig_speechd
+#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_orig_speechd
+#define spd_execute_command spd_execute_command_dylibloader_orig_speechd
+#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_orig_speechd
+#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_orig_speechd
+#define spd_send_data spd_send_data_dylibloader_orig_speechd
+#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_orig_speechd
+#include <libspeechd.h>
+#undef SPDConnectionAddress__free
+#undef spd_get_default_address
+#undef spd_open
+#undef spd_open2
+#undef spd_close
+#undef spd_say
+#undef spd_sayf
+#undef spd_stop
+#undef spd_stop_all
+#undef spd_stop_uid
+#undef spd_cancel
+#undef spd_cancel_all
+#undef spd_cancel_uid
+#undef spd_pause
+#undef spd_pause_all
+#undef spd_pause_uid
+#undef spd_resume
+#undef spd_resume_all
+#undef spd_resume_uid
+#undef spd_key
+#undef spd_char
+#undef spd_wchar
+#undef spd_sound_icon
+#undef spd_set_voice_type
+#undef spd_set_voice_type_all
+#undef spd_set_voice_type_uid
+#undef spd_get_voice_type
+#undef spd_set_synthesis_voice
+#undef spd_set_synthesis_voice_all
+#undef spd_set_synthesis_voice_uid
+#undef spd_set_data_mode
+#undef spd_set_notification_on
+#undef spd_set_notification_off
+#undef spd_set_notification
+#undef spd_set_voice_rate
+#undef spd_set_voice_rate_all
+#undef spd_set_voice_rate_uid
+#undef spd_get_voice_rate
+#undef spd_set_voice_pitch
+#undef spd_set_voice_pitch_all
+#undef spd_set_voice_pitch_uid
+#undef spd_get_voice_pitch
+#undef spd_set_voice_pitch_range
+#undef spd_set_voice_pitch_range_all
+#undef spd_set_voice_pitch_range_uid
+#undef spd_set_volume
+#undef spd_set_volume_all
+#undef spd_set_volume_uid
+#undef spd_get_volume
+#undef spd_set_punctuation
+#undef spd_set_punctuation_all
+#undef spd_set_punctuation_uid
+#undef spd_set_capital_letters
+#undef spd_set_capital_letters_all
+#undef spd_set_capital_letters_uid
+#undef spd_set_spelling
+#undef spd_set_spelling_all
+#undef spd_set_spelling_uid
+#undef spd_set_language
+#undef spd_set_language_all
+#undef spd_set_language_uid
+#undef spd_get_language
+#undef spd_set_output_module
+#undef spd_set_output_module_all
+#undef spd_set_output_module_uid
+#undef spd_get_message_list_fd
+#undef spd_list_modules
+#undef free_spd_modules
+#undef spd_get_output_module
+#undef spd_list_voices
+#undef spd_list_synthesis_voices
+#undef free_spd_voices
+#undef spd_execute_command_with_list_reply
+#undef spd_execute_command
+#undef spd_execute_command_with_reply
+#undef spd_execute_command_wo_mutex
+#undef spd_send_data
+#undef spd_send_data_wo_mutex
+#include <dlfcn.h>
+#include <stdio.h>
+void (*SPDConnectionAddress__free_dylibloader_wrapper_speechd)( SPDConnectionAddress*);
+SPDConnectionAddress* (*spd_get_default_address_dylibloader_wrapper_speechd)( char**);
+SPDConnection* (*spd_open_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode);
+SPDConnection* (*spd_open2_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode, SPDConnectionAddress*, int, char**);
+void (*spd_close_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_say_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+int (*spd_sayf_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*,...);
+int (*spd_stop_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_stop_all_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_stop_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+int (*spd_cancel_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_cancel_all_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_cancel_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+int (*spd_pause_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_pause_all_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_pause_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+int (*spd_resume_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_resume_all_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_resume_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+int (*spd_key_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+int (*spd_char_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+int (*spd_wchar_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority, wchar_t);
+int (*spd_sound_icon_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+int (*spd_set_voice_type_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType);
+int (*spd_set_voice_type_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType);
+int (*spd_set_voice_type_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType, unsigned int);
+SPDVoiceType (*spd_get_voice_type_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_set_synthesis_voice_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_synthesis_voice_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+int (*spd_set_data_mode_dylibloader_wrapper_speechd)( SPDConnection*, SPDDataMode);
+int (*spd_set_notification_on_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification);
+int (*spd_set_notification_off_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification);
+int (*spd_set_notification_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification,const char*);
+int (*spd_set_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_rate_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_rate_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+int (*spd_get_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_set_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_pitch_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_pitch_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+int (*spd_get_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_set_voice_pitch_range_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+int (*spd_set_volume_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_volume_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_volume_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+int (*spd_get_volume_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_set_punctuation_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation);
+int (*spd_set_punctuation_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation);
+int (*spd_set_punctuation_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation, unsigned int);
+int (*spd_set_capital_letters_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters);
+int (*spd_set_capital_letters_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters);
+int (*spd_set_capital_letters_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters, unsigned int);
+int (*spd_set_spelling_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling);
+int (*spd_set_spelling_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling);
+int (*spd_set_spelling_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling, unsigned int);
+int (*spd_set_language_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_language_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_language_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+char* (*spd_get_language_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_set_output_module_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_output_module_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_output_module_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+int (*spd_get_message_list_fd_dylibloader_wrapper_speechd)( SPDConnection*, int, int*, char**);
+char** (*spd_list_modules_dylibloader_wrapper_speechd)( SPDConnection*);
+void (*free_spd_modules_dylibloader_wrapper_speechd)( char**);
+char* (*spd_get_output_module_dylibloader_wrapper_speechd)( SPDConnection*);
+char** (*spd_list_voices_dylibloader_wrapper_speechd)( SPDConnection*);
+SPDVoice** (*spd_list_synthesis_voices_dylibloader_wrapper_speechd)( SPDConnection*);
+void (*free_spd_voices_dylibloader_wrapper_speechd)( SPDVoice**);
+char** (*spd_execute_command_with_list_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+int (*spd_execute_command_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+int (*spd_execute_command_with_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*, char**);
+int (*spd_execute_command_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+char* (*spd_send_data_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int);
+char* (*spd_send_data_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int);
+int initialize_speechd(int verbose) {
+  void *handle;
+  char *error;
+  handle = dlopen("libspeechd.so.2", RTLD_LAZY);
+  if (!handle) {
+    if (verbose) {
+      fprintf(stderr, "%s\n", dlerror());
+    }
+    return(1);
+  }
+  dlerror();
+// SPDConnectionAddress__free
+  *(void **) (&SPDConnectionAddress__free_dylibloader_wrapper_speechd) = dlsym(handle, "SPDConnectionAddress__free");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_default_address
+  *(void **) (&spd_get_default_address_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_default_address");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_open
+  *(void **) (&spd_open_dylibloader_wrapper_speechd) = dlsym(handle, "spd_open");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_open2
+  *(void **) (&spd_open2_dylibloader_wrapper_speechd) = dlsym(handle, "spd_open2");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_close
+  *(void **) (&spd_close_dylibloader_wrapper_speechd) = dlsym(handle, "spd_close");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_say
+  *(void **) (&spd_say_dylibloader_wrapper_speechd) = dlsym(handle, "spd_say");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_sayf
+  *(void **) (&spd_sayf_dylibloader_wrapper_speechd) = dlsym(handle, "spd_sayf");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_stop
+  *(void **) (&spd_stop_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_stop_all
+  *(void **) (&spd_stop_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_stop_uid
+  *(void **) (&spd_stop_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_cancel
+  *(void **) (&spd_cancel_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_cancel_all
+  *(void **) (&spd_cancel_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_cancel_uid
+  *(void **) (&spd_cancel_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_pause
+  *(void **) (&spd_pause_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_pause_all
+  *(void **) (&spd_pause_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_pause_uid
+  *(void **) (&spd_pause_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_resume
+  *(void **) (&spd_resume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_resume_all
+  *(void **) (&spd_resume_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_resume_uid
+  *(void **) (&spd_resume_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_key
+  *(void **) (&spd_key_dylibloader_wrapper_speechd) = dlsym(handle, "spd_key");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_char
+  *(void **) (&spd_char_dylibloader_wrapper_speechd) = dlsym(handle, "spd_char");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_wchar
+  *(void **) (&spd_wchar_dylibloader_wrapper_speechd) = dlsym(handle, "spd_wchar");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_sound_icon
+  *(void **) (&spd_sound_icon_dylibloader_wrapper_speechd) = dlsym(handle, "spd_sound_icon");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_type
+  *(void **) (&spd_set_voice_type_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_type_all
+  *(void **) (&spd_set_voice_type_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_type_uid
+  *(void **) (&spd_set_voice_type_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_voice_type
+  *(void **) (&spd_get_voice_type_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_type");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_synthesis_voice
+  *(void **) (&spd_set_synthesis_voice_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_synthesis_voice_all
+  *(void **) (&spd_set_synthesis_voice_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_synthesis_voice_uid
+  *(void **) (&spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_data_mode
+  *(void **) (&spd_set_data_mode_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_data_mode");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_notification_on
+  *(void **) (&spd_set_notification_on_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification_on");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_notification_off
+  *(void **) (&spd_set_notification_off_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification_off");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_notification
+  *(void **) (&spd_set_notification_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_rate
+  *(void **) (&spd_set_voice_rate_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_rate_all
+  *(void **) (&spd_set_voice_rate_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_rate_uid
+  *(void **) (&spd_set_voice_rate_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_voice_rate
+  *(void **) (&spd_get_voice_rate_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_rate");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch
+  *(void **) (&spd_set_voice_pitch_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch_all
+  *(void **) (&spd_set_voice_pitch_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch_uid
+  *(void **) (&spd_set_voice_pitch_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_voice_pitch
+  *(void **) (&spd_get_voice_pitch_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_pitch");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch_range
+  *(void **) (&spd_set_voice_pitch_range_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch_range_all
+  *(void **) (&spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch_range_uid
+  *(void **) (&spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_volume
+  *(void **) (&spd_set_volume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_volume_all
+  *(void **) (&spd_set_volume_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_volume_uid
+  *(void **) (&spd_set_volume_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_volume
+  *(void **) (&spd_get_volume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_volume");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_punctuation
+  *(void **) (&spd_set_punctuation_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_punctuation_all
+  *(void **) (&spd_set_punctuation_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_punctuation_uid
+  *(void **) (&spd_set_punctuation_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_capital_letters
+  *(void **) (&spd_set_capital_letters_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_capital_letters_all
+  *(void **) (&spd_set_capital_letters_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_capital_letters_uid
+  *(void **) (&spd_set_capital_letters_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_spelling
+  *(void **) (&spd_set_spelling_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_spelling_all
+  *(void **) (&spd_set_spelling_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_spelling_uid
+  *(void **) (&spd_set_spelling_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_language
+  *(void **) (&spd_set_language_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_language_all
+  *(void **) (&spd_set_language_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_language_uid
+  *(void **) (&spd_set_language_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_language
+  *(void **) (&spd_get_language_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_language");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_output_module
+  *(void **) (&spd_set_output_module_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_output_module_all
+  *(void **) (&spd_set_output_module_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_output_module_uid
+  *(void **) (&spd_set_output_module_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_message_list_fd
+  *(void **) (&spd_get_message_list_fd_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_message_list_fd");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_list_modules
+  *(void **) (&spd_list_modules_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_modules");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// free_spd_modules
+  *(void **) (&free_spd_modules_dylibloader_wrapper_speechd) = dlsym(handle, "free_spd_modules");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_output_module
+  *(void **) (&spd_get_output_module_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_output_module");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_list_voices
+  *(void **) (&spd_list_voices_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_voices");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_list_synthesis_voices
+  *(void **) (&spd_list_synthesis_voices_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_synthesis_voices");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// free_spd_voices
+  *(void **) (&free_spd_voices_dylibloader_wrapper_speechd) = dlsym(handle, "free_spd_voices");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_execute_command_with_list_reply
+  *(void **) (&spd_execute_command_with_list_reply_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_with_list_reply");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_execute_command
+  *(void **) (&spd_execute_command_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_execute_command_with_reply
+  *(void **) (&spd_execute_command_with_reply_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_with_reply");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_execute_command_wo_mutex
+  *(void **) (&spd_execute_command_wo_mutex_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_wo_mutex");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_send_data
+  *(void **) (&spd_send_data_dylibloader_wrapper_speechd) = dlsym(handle, "spd_send_data");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_send_data_wo_mutex
+  *(void **) (&spd_send_data_wo_mutex_dylibloader_wrapper_speechd) = dlsym(handle, "spd_send_data_wo_mutex");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+return 0;
+}
--- a/platform/x11/speechd-so_wrap.h
+++ b/platform/x11/speechd-so_wrap.h
@ -0,0 +1,330 @@
+#ifndef DYLIBLOAD_WRAPPER_SPEECHD
+#define DYLIBLOAD_WRAPPER_SPEECHD
+// This file is generated. Do not edit!
+// see https://github.com/hpvb/dynload-wrapper for details
+// generated by ./dynload-wrapper/generate-wrapper.py 0.3 on 2022-04-28 14:34:21
+// flags: ./dynload-wrapper/generate-wrapper.py --sys-include <libspeechd.h> --include /usr/include/speech-dispatcher/libspeechd.h --soname libspeechd.so.2 --init-name speechd --omit-prefix spd_get_client_list --output-header speechd-so_wrap.h --output-implementation speechd-so_wrap.c
+//
+#include <stdint.h>
+
+#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_orig_speechd
+#define spd_get_default_address spd_get_default_address_dylibloader_orig_speechd
+#define spd_open spd_open_dylibloader_orig_speechd
+#define spd_open2 spd_open2_dylibloader_orig_speechd
+#define spd_close spd_close_dylibloader_orig_speechd
+#define spd_say spd_say_dylibloader_orig_speechd
+#define spd_sayf spd_sayf_dylibloader_orig_speechd
+#define spd_stop spd_stop_dylibloader_orig_speechd
+#define spd_stop_all spd_stop_all_dylibloader_orig_speechd
+#define spd_stop_uid spd_stop_uid_dylibloader_orig_speechd
+#define spd_cancel spd_cancel_dylibloader_orig_speechd
+#define spd_cancel_all spd_cancel_all_dylibloader_orig_speechd
+#define spd_cancel_uid spd_cancel_uid_dylibloader_orig_speechd
+#define spd_pause spd_pause_dylibloader_orig_speechd
+#define spd_pause_all spd_pause_all_dylibloader_orig_speechd
+#define spd_pause_uid spd_pause_uid_dylibloader_orig_speechd
+#define spd_resume spd_resume_dylibloader_orig_speechd
+#define spd_resume_all spd_resume_all_dylibloader_orig_speechd
+#define spd_resume_uid spd_resume_uid_dylibloader_orig_speechd
+#define spd_key spd_key_dylibloader_orig_speechd
+#define spd_char spd_char_dylibloader_orig_speechd
+#define spd_wchar spd_wchar_dylibloader_orig_speechd
+#define spd_sound_icon spd_sound_icon_dylibloader_orig_speechd
+#define spd_set_voice_type spd_set_voice_type_dylibloader_orig_speechd
+#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_orig_speechd
+#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_orig_speechd
+#define spd_get_voice_type spd_get_voice_type_dylibloader_orig_speechd
+#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_orig_speechd
+#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_orig_speechd
+#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_orig_speechd
+#define spd_set_data_mode spd_set_data_mode_dylibloader_orig_speechd
+#define spd_set_notification_on spd_set_notification_on_dylibloader_orig_speechd
+#define spd_set_notification_off spd_set_notification_off_dylibloader_orig_speechd
+#define spd_set_notification spd_set_notification_dylibloader_orig_speechd
+#define spd_set_voice_rate spd_set_voice_rate_dylibloader_orig_speechd
+#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_orig_speechd
+#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_orig_speechd
+#define spd_get_voice_rate spd_get_voice_rate_dylibloader_orig_speechd
+#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_orig_speechd
+#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_orig_speechd
+#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_orig_speechd
+#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_orig_speechd
+#define spd_set_volume spd_set_volume_dylibloader_orig_speechd
+#define spd_set_volume_all spd_set_volume_all_dylibloader_orig_speechd
+#define spd_set_volume_uid spd_set_volume_uid_dylibloader_orig_speechd
+#define spd_get_volume spd_get_volume_dylibloader_orig_speechd
+#define spd_set_punctuation spd_set_punctuation_dylibloader_orig_speechd
+#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_orig_speechd
+#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_orig_speechd
+#define spd_set_capital_letters spd_set_capital_letters_dylibloader_orig_speechd
+#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_orig_speechd
+#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_orig_speechd
+#define spd_set_spelling spd_set_spelling_dylibloader_orig_speechd
+#define spd_set_spelling_all spd_set_spelling_all_dylibloader_orig_speechd
+#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_orig_speechd
+#define spd_set_language spd_set_language_dylibloader_orig_speechd
+#define spd_set_language_all spd_set_language_all_dylibloader_orig_speechd
+#define spd_set_language_uid spd_set_language_uid_dylibloader_orig_speechd
+#define spd_get_language spd_get_language_dylibloader_orig_speechd
+#define spd_set_output_module spd_set_output_module_dylibloader_orig_speechd
+#define spd_set_output_module_all spd_set_output_module_all_dylibloader_orig_speechd
+#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_orig_speechd
+#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_orig_speechd
+#define spd_list_modules spd_list_modules_dylibloader_orig_speechd
+#define free_spd_modules free_spd_modules_dylibloader_orig_speechd
+#define spd_get_output_module spd_get_output_module_dylibloader_orig_speechd
+#define spd_list_voices spd_list_voices_dylibloader_orig_speechd
+#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_orig_speechd
+#define free_spd_voices free_spd_voices_dylibloader_orig_speechd
+#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_orig_speechd
+#define spd_execute_command spd_execute_command_dylibloader_orig_speechd
+#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_orig_speechd
+#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_orig_speechd
+#define spd_send_data spd_send_data_dylibloader_orig_speechd
+#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_orig_speechd
+#include <libspeechd.h>
+#undef SPDConnectionAddress__free
+#undef spd_get_default_address
+#undef spd_open
+#undef spd_open2
+#undef spd_close
+#undef spd_say
+#undef spd_sayf
+#undef spd_stop
+#undef spd_stop_all
+#undef spd_stop_uid
+#undef spd_cancel
+#undef spd_cancel_all
+#undef spd_cancel_uid
+#undef spd_pause
+#undef spd_pause_all
+#undef spd_pause_uid
+#undef spd_resume
+#undef spd_resume_all
+#undef spd_resume_uid
+#undef spd_key
+#undef spd_char
+#undef spd_wchar
+#undef spd_sound_icon
+#undef spd_set_voice_type
+#undef spd_set_voice_type_all
+#undef spd_set_voice_type_uid
+#undef spd_get_voice_type
+#undef spd_set_synthesis_voice
+#undef spd_set_synthesis_voice_all
+#undef spd_set_synthesis_voice_uid
+#undef spd_set_data_mode
+#undef spd_set_notification_on
+#undef spd_set_notification_off
+#undef spd_set_notification
+#undef spd_set_voice_rate
+#undef spd_set_voice_rate_all
+#undef spd_set_voice_rate_uid
+#undef spd_get_voice_rate
+#undef spd_set_voice_pitch
+#undef spd_set_voice_pitch_all
+#undef spd_set_voice_pitch_uid
+#undef spd_get_voice_pitch
+#undef spd_set_voice_pitch_range
+#undef spd_set_voice_pitch_range_all
+#undef spd_set_voice_pitch_range_uid
+#undef spd_set_volume
+#undef spd_set_volume_all
+#undef spd_set_volume_uid
+#undef spd_get_volume
+#undef spd_set_punctuation
+#undef spd_set_punctuation_all
+#undef spd_set_punctuation_uid
+#undef spd_set_capital_letters
+#undef spd_set_capital_letters_all
+#undef spd_set_capital_letters_uid
+#undef spd_set_spelling
+#undef spd_set_spelling_all
+#undef spd_set_spelling_uid
+#undef spd_set_language
+#undef spd_set_language_all
+#undef spd_set_language_uid
+#undef spd_get_language
+#undef spd_set_output_module
+#undef spd_set_output_module_all
+#undef spd_set_output_module_uid
+#undef spd_get_message_list_fd
+#undef spd_list_modules
+#undef free_spd_modules
+#undef spd_get_output_module
+#undef spd_list_voices
+#undef spd_list_synthesis_voices
+#undef free_spd_voices
+#undef spd_execute_command_with_list_reply
+#undef spd_execute_command
+#undef spd_execute_command_with_reply
+#undef spd_execute_command_wo_mutex
+#undef spd_send_data
+#undef spd_send_data_wo_mutex
+#ifdef __cplusplus
+extern "C" {
+#endif
+#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_wrapper_speechd
+#define spd_get_default_address spd_get_default_address_dylibloader_wrapper_speechd
+#define spd_open spd_open_dylibloader_wrapper_speechd
+#define spd_open2 spd_open2_dylibloader_wrapper_speechd
+#define spd_close spd_close_dylibloader_wrapper_speechd
+#define spd_say spd_say_dylibloader_wrapper_speechd
+#define spd_sayf spd_sayf_dylibloader_wrapper_speechd
+#define spd_stop spd_stop_dylibloader_wrapper_speechd
+#define spd_stop_all spd_stop_all_dylibloader_wrapper_speechd
+#define spd_stop_uid spd_stop_uid_dylibloader_wrapper_speechd
+#define spd_cancel spd_cancel_dylibloader_wrapper_speechd
+#define spd_cancel_all spd_cancel_all_dylibloader_wrapper_speechd
+#define spd_cancel_uid spd_cancel_uid_dylibloader_wrapper_speechd
+#define spd_pause spd_pause_dylibloader_wrapper_speechd
+#define spd_pause_all spd_pause_all_dylibloader_wrapper_speechd
+#define spd_pause_uid spd_pause_uid_dylibloader_wrapper_speechd
+#define spd_resume spd_resume_dylibloader_wrapper_speechd
+#define spd_resume_all spd_resume_all_dylibloader_wrapper_speechd
+#define spd_resume_uid spd_resume_uid_dylibloader_wrapper_speechd
+#define spd_key spd_key_dylibloader_wrapper_speechd
+#define spd_char spd_char_dylibloader_wrapper_speechd
+#define spd_wchar spd_wchar_dylibloader_wrapper_speechd
+#define spd_sound_icon spd_sound_icon_dylibloader_wrapper_speechd
+#define spd_set_voice_type spd_set_voice_type_dylibloader_wrapper_speechd
+#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_wrapper_speechd
+#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_wrapper_speechd
+#define spd_get_voice_type spd_get_voice_type_dylibloader_wrapper_speechd
+#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_wrapper_speechd
+#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_wrapper_speechd
+#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd
+#define spd_set_data_mode spd_set_data_mode_dylibloader_wrapper_speechd
+#define spd_set_notification_on spd_set_notification_on_dylibloader_wrapper_speechd
+#define spd_set_notification_off spd_set_notification_off_dylibloader_wrapper_speechd
+#define spd_set_notification spd_set_notification_dylibloader_wrapper_speechd
+#define spd_set_voice_rate spd_set_voice_rate_dylibloader_wrapper_speechd
+#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_wrapper_speechd
+#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_wrapper_speechd
+#define spd_get_voice_rate spd_get_voice_rate_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_wrapper_speechd
+#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd
+#define spd_set_volume spd_set_volume_dylibloader_wrapper_speechd
+#define spd_set_volume_all spd_set_volume_all_dylibloader_wrapper_speechd
+#define spd_set_volume_uid spd_set_volume_uid_dylibloader_wrapper_speechd
+#define spd_get_volume spd_get_volume_dylibloader_wrapper_speechd
+#define spd_set_punctuation spd_set_punctuation_dylibloader_wrapper_speechd
+#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_wrapper_speechd
+#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_wrapper_speechd
+#define spd_set_capital_letters spd_set_capital_letters_dylibloader_wrapper_speechd
+#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_wrapper_speechd
+#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_wrapper_speechd
+#define spd_set_spelling spd_set_spelling_dylibloader_wrapper_speechd
+#define spd_set_spelling_all spd_set_spelling_all_dylibloader_wrapper_speechd
+#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_wrapper_speechd
+#define spd_set_language spd_set_language_dylibloader_wrapper_speechd
+#define spd_set_language_all spd_set_language_all_dylibloader_wrapper_speechd
+#define spd_set_language_uid spd_set_language_uid_dylibloader_wrapper_speechd
+#define spd_get_language spd_get_language_dylibloader_wrapper_speechd
+#define spd_set_output_module spd_set_output_module_dylibloader_wrapper_speechd
+#define spd_set_output_module_all spd_set_output_module_all_dylibloader_wrapper_speechd
+#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_wrapper_speechd
+#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_wrapper_speechd
+#define spd_list_modules spd_list_modules_dylibloader_wrapper_speechd
+#define free_spd_modules free_spd_modules_dylibloader_wrapper_speechd
+#define spd_get_output_module spd_get_output_module_dylibloader_wrapper_speechd
+#define spd_list_voices spd_list_voices_dylibloader_wrapper_speechd
+#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_wrapper_speechd
+#define free_spd_voices free_spd_voices_dylibloader_wrapper_speechd
+#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_wrapper_speechd
+#define spd_execute_command spd_execute_command_dylibloader_wrapper_speechd
+#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_wrapper_speechd
+#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_wrapper_speechd
+#define spd_send_data spd_send_data_dylibloader_wrapper_speechd
+#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_wrapper_speechd
+extern void (*SPDConnectionAddress__free_dylibloader_wrapper_speechd)( SPDConnectionAddress*);
+extern SPDConnectionAddress* (*spd_get_default_address_dylibloader_wrapper_speechd)( char**);
+extern SPDConnection* (*spd_open_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode);
+extern SPDConnection* (*spd_open2_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode, SPDConnectionAddress*, int, char**);
+extern void (*spd_close_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_say_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+extern int (*spd_sayf_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*,...);
+extern int (*spd_stop_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_stop_all_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_stop_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+extern int (*spd_cancel_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_cancel_all_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_cancel_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+extern int (*spd_pause_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_pause_all_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_pause_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+extern int (*spd_resume_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_resume_all_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_resume_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+extern int (*spd_key_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+extern int (*spd_char_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+extern int (*spd_wchar_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority, wchar_t);
+extern int (*spd_sound_icon_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+extern int (*spd_set_voice_type_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType);
+extern int (*spd_set_voice_type_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType);
+extern int (*spd_set_voice_type_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType, unsigned int);
+extern SPDVoiceType (*spd_get_voice_type_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_set_synthesis_voice_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_synthesis_voice_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+extern int (*spd_set_data_mode_dylibloader_wrapper_speechd)( SPDConnection*, SPDDataMode);
+extern int (*spd_set_notification_on_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification);
+extern int (*spd_set_notification_off_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification);
+extern int (*spd_set_notification_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification,const char*);
+extern int (*spd_set_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_rate_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_rate_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+extern int (*spd_get_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_set_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_pitch_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_pitch_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+extern int (*spd_get_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_set_voice_pitch_range_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+extern int (*spd_set_volume_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_volume_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_volume_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+extern int (*spd_get_volume_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_set_punctuation_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation);
+extern int (*spd_set_punctuation_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation);
+extern int (*spd_set_punctuation_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation, unsigned int);
+extern int (*spd_set_capital_letters_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters);
+extern int (*spd_set_capital_letters_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters);
+extern int (*spd_set_capital_letters_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters, unsigned int);
+extern int (*spd_set_spelling_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling);
+extern int (*spd_set_spelling_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling);
+extern int (*spd_set_spelling_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling, unsigned int);
+extern int (*spd_set_language_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_language_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_language_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+extern char* (*spd_get_language_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_set_output_module_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_output_module_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_output_module_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+extern int (*spd_get_message_list_fd_dylibloader_wrapper_speechd)( SPDConnection*, int, int*, char**);
+extern char** (*spd_list_modules_dylibloader_wrapper_speechd)( SPDConnection*);
+extern void (*free_spd_modules_dylibloader_wrapper_speechd)( char**);
+extern char* (*spd_get_output_module_dylibloader_wrapper_speechd)( SPDConnection*);
+extern char** (*spd_list_voices_dylibloader_wrapper_speechd)( SPDConnection*);
+extern SPDVoice** (*spd_list_synthesis_voices_dylibloader_wrapper_speechd)( SPDConnection*);
+extern void (*free_spd_voices_dylibloader_wrapper_speechd)( SPDVoice**);
+extern char** (*spd_execute_command_with_list_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+extern int (*spd_execute_command_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+extern int (*spd_execute_command_with_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*, char**);
+extern int (*spd_execute_command_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+extern char* (*spd_send_data_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int);
+extern char* (*spd_send_data_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int);
+int initialize_speechd(int verbose);
+#ifdef __cplusplus
+}
+#endif
+#endif
--- a/platform/x11/tts_linux.cpp
+++ b/platform/x11/tts_linux.cpp
@ -0,0 +1,270 @@
+/*************************************************************************/
+/*  tts_linux.cpp                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "tts_linux.h"
+
+#include "core/project_settings.h"
+
+TTS_Linux *TTS_Linux::singleton = nullptr;
+
+static bool _is_whitespace(CharType c) {
+	return c == '\t' || c == ' ';
+}
+
+void TTS_Linux::speech_init_thread_func(void *p_userdata) {
+	TTS_Linux *tts = (TTS_Linux *)p_userdata;
+	if (tts) {
+		MutexLock thread_safe_method(tts->_thread_safe_);
+#ifdef DEBUG_ENABLED
+		int dylibloader_verbose = 1;
+#else
+		int dylibloader_verbose = 0;
+#endif
+		if (initialize_speechd(dylibloader_verbose) == 0) {
+			CharString class_str;
+			String config_name = GLOBAL_GET("application/config/name");
+			if (config_name.length() == 0) {
+				class_str = "Godot_Engine";
+			} else {
+				class_str = config_name.utf8();
+			}
+			tts->synth = spd_open(class_str, "Godot_Engine_Speech_API", "Godot_Engine", SPD_MODE_THREADED);
+			if (tts->synth) {
+				tts->synth->callback_end = &speech_event_callback;
+				tts->synth->callback_cancel = &speech_event_callback;
+				tts->synth->callback_im = &speech_event_index_mark;
+				spd_set_notification_on(tts->synth, SPD_END);
+				spd_set_notification_on(tts->synth, SPD_CANCEL);
+
+				print_verbose("Text-to-Speech: Speech Dispatcher initialized.");
+			} else {
+				print_verbose("Text-to-Speech: Cannot initialize Speech Dispatcher synthesizer!");
+			}
+		} else {
+			print_verbose("Text-to-Speech: Cannot load Speech Dispatcher library!");
+		}
+	}
+}
+
+void TTS_Linux::speech_event_index_mark(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type, char *p_index_mark) {
+	TTS_Linux *tts = TTS_Linux::get_singleton();
+	if (tts && tts->ids.has(p_msg_id)) {
+		MutexLock thread_safe_method(tts->_thread_safe_);
+		// Get word offset from the index mark injected to the text stream.
+		String mark = String::utf8(p_index_mark);
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_BOUNDARY, tts->ids[p_msg_id], mark.to_int());
+	}
+}
+
+void TTS_Linux::speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type) {
+	TTS_Linux *tts = TTS_Linux::get_singleton();
+	if (tts) {
+		MutexLock thread_safe_method(tts->_thread_safe_);
+		List<OS::TTSUtterance> &queue = tts->queue;
+		if (!tts->paused && tts->ids.has(p_msg_id)) {
+			if (p_type == SPD_EVENT_END) {
+				OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_ENDED, tts->ids[p_msg_id]);
+				tts->ids.erase(p_msg_id);
+				tts->last_msg_id = -1;
+				tts->speaking = false;
+			} else if (p_type == SPD_EVENT_CANCEL) {
+				OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, tts->ids[p_msg_id]);
+				tts->ids.erase(p_msg_id);
+				tts->last_msg_id = -1;
+				tts->speaking = false;
+			}
+		}
+		if (!tts->speaking && queue.size() > 0) {
+			OS::TTSUtterance &message = queue.front()->get();
+
+			// Inject index mark after each word.
+			String text;
+			String language;
+			SPDVoice **voices = spd_list_synthesis_voices(tts->synth);
+			if (voices != nullptr) {
+				SPDVoice **voices_ptr = voices;
+				while (*voices_ptr != nullptr) {
+					if (String::utf8((*voices_ptr)->name) == message.voice) {
+						language = String::utf8((*voices_ptr)->language);
+						break;
+					}
+					voices_ptr++;
+				}
+				free_spd_voices(voices);
+			}
+			PoolIntArray breaks;
+			for (int i = 0; i < message.text.size(); i++) {
+				if (_is_whitespace(message.text[i])) {
+					breaks.push_back(i);
+				}
+			}
+			int prev = 0;
+			for (int i = 0; i < breaks.size(); i++) {
+				text += message.text.substr(prev, breaks[i] - prev);
+				text += "<mark name=\"" + String::num_int64(breaks[i], 10) + "\"/>";
+				prev = breaks[i];
+			}
+			text += message.text.substr(prev, -1);
+
+			spd_set_synthesis_voice(tts->synth, message.voice.utf8().get_data());
+			spd_set_volume(tts->synth, message.volume * 2 - 100);
+			spd_set_voice_pitch(tts->synth, (message.pitch - 1) * 100);
+			float rate = 0;
+			if (message.rate > 1.f) {
+				rate = log10(MIN(message.rate, 2.5f)) / log10(2.5f) * 100;
+			} else if (message.rate < 1.f) {
+				rate = log10(MAX(message.rate, 0.5f)) / log10(0.5f) * -100;
+			}
+			spd_set_voice_rate(tts->synth, rate);
+			spd_set_data_mode(tts->synth, SPD_DATA_SSML);
+			tts->last_msg_id = spd_say(tts->synth, SPD_TEXT, text.utf8().get_data());
+			tts->ids[tts->last_msg_id] = message.id;
+			OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_STARTED, message.id);
+
+			queue.pop_front();
+			tts->speaking = true;
+		}
+	}
+}
+
+bool TTS_Linux::is_speaking() const {
+	return speaking;
+}
+
+bool TTS_Linux::is_paused() const {
+	return paused;
+}
+
+Array TTS_Linux::get_voices() const {
+	_THREAD_SAFE_METHOD_
+
+	ERR_FAIL_COND_V(!synth, Array());
+	Array list;
+	SPDVoice **voices = spd_list_synthesis_voices(synth);
+	if (voices != nullptr) {
+		SPDVoice **voices_ptr = voices;
+		while (*voices_ptr != nullptr) {
+			Dictionary voice_d;
+			voice_d["name"] = String::utf8((*voices_ptr)->name);
+			voice_d["id"] = String::utf8((*voices_ptr)->name);
+			voice_d["language"] = String::utf8((*voices_ptr)->language) + "_" + String::utf8((*voices_ptr)->variant);
+			list.push_back(voice_d);
+
+			voices_ptr++;
+		}
+		free_spd_voices(voices);
+	}
+	return list;
+}
+
+void TTS_Linux::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	_THREAD_SAFE_METHOD_
+
+	ERR_FAIL_COND(!synth);
+	if (p_interrupt) {
+		stop();
+	}
+
+	if (p_text.empty()) {
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, p_utterance_id);
+		return;
+	}
+
+	OS::TTSUtterance message;
+	message.text = p_text;
+	message.voice = p_voice;
+	message.volume = CLAMP(p_volume, 0, 100);
+	message.pitch = CLAMP(p_pitch, 0.f, 2.f);
+	message.rate = CLAMP(p_rate, 0.1f, 10.f);
+	message.id = p_utterance_id;
+	queue.push_back(message);
+
+	if (is_paused()) {
+		resume();
+	} else {
+		speech_event_callback(0, 0, SPD_EVENT_BEGIN);
+	}
+}
+
+void TTS_Linux::pause() {
+	_THREAD_SAFE_METHOD_
+
+	ERR_FAIL_COND(!synth);
+	if (spd_pause(synth) == 0) {
+		paused = true;
+	}
+}
+
+void TTS_Linux::resume() {
+	_THREAD_SAFE_METHOD_
+
+	ERR_FAIL_COND(!synth);
+	spd_resume(synth);
+	paused = false;
+}
+
+void TTS_Linux::stop() {
+	_THREAD_SAFE_METHOD_
+
+	ERR_FAIL_COND(!synth);
+	for (List<OS::TTSUtterance>::Element *E = queue.front(); E; E = E->next()) {
+		OS::TTSUtterance &message = E->get();
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, message.id);
+	}
+	if ((last_msg_id != -1) && ids.has(last_msg_id)) {
+		OS::get_singleton()->tts_post_utterance_event(OS::TTS_UTTERANCE_CANCELED, ids[last_msg_id]);
+	}
+	queue.clear();
+	ids.clear();
+	last_msg_id = -1;
+	spd_cancel(synth);
+	spd_resume(synth);
+	speaking = false;
+	paused = false;
+}
+
+TTS_Linux *TTS_Linux::get_singleton() {
+	return singleton;
+}
+
+TTS_Linux::TTS_Linux() {
+	singleton = this;
+	// Speech Dispatcher init can be slow, it might wait for helper process to start on background, so run it in the thread.
+	init_thread.start(speech_init_thread_func, this);
+}
+
+TTS_Linux::~TTS_Linux() {
+	init_thread.wait_to_finish();
+	if (synth) {
+		spd_close(synth);
+	}
+
+	singleton = nullptr;
+}
--- a/platform/x11/tts_linux.h
+++ b/platform/x11/tts_linux.h
@ -0,0 +1,78 @@
+/*************************************************************************/
+/*  tts_linux.h                                                          */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef TTS_LINUX_H
+#define TTS_LINUX_H
+
+#include "core/array.h"
+#include "core/list.h"
+#include "core/map.h"
+#include "core/os/os.h"
+#include "core/os/thread.h"
+#include "core/os/thread_safe.h"
+#include "core/ustring.h"
+
+#include "speechd-so_wrap.h"
+
+class TTS_Linux {
+	_THREAD_SAFE_CLASS_
+
+	List<OS::TTSUtterance> queue;
+	SPDConnection *synth = nullptr;
+	bool speaking = false;
+	bool paused = false;
+	int last_msg_id = -1;
+	HashMap<int, int> ids;
+
+	Thread init_thread;
+
+	static void speech_init_thread_func(void *p_userdata);
+	static void speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type);
+	static void speech_event_index_mark(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type, char *p_index_mark);
+
+	static TTS_Linux *singleton;
+
+public:
+	static TTS_Linux *get_singleton();
+
+	bool is_speaking() const;
+	bool is_paused() const;
+	Array get_voices() const;
+
+	void speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	void pause();
+	void resume();
+	void stop();
+
+	TTS_Linux();
+	~TTS_Linux();
+};
+
+#endif // TTS_LINUX_H