diff --git a/core/bind/core_bind.cpp b/core/bind/core_bind.cpp index 78119976378..c26fa7ef9b1 100644 --- a/core/bind/core_bind.cpp +++ b/core/bind/core_bind.cpp @@ -2807,7 +2807,27 @@ void _Thread::_start_func(void *ud) { Thread::set_name(t->target_method); - t->ret = target_instance->call(t->target_method, arg, argc, ce); + // To avoid a circular reference between the thread and the script which can possibly contain a reference + // to the thread, we will do the call (keeping a reference up to that point) and then break chains with it. + // When the call returns, we will reference the thread again if possible. + ObjectID th_instance_id = t->get_instance_id(); + StringName target_method = t->target_method; + t = Ref<_Thread>(); + + Variant ret; + ret = target_instance->call(target_method, arg, argc, ce); + + // If script properly kept a reference to the thread, we should be able to re-reference it now + // (well, or if the call failed, since we had to break chains anyway because the outcome isn't known upfront). + t = Ref<_Thread>(ObjectDB::get_instance(th_instance_id)); + if (t.is_valid()) { + t->ret = ret; + t->running.clear(); + } else { + // We could print a warning here, but the Thread object will be eventually destroyed + // noticing wait_to_finish() hasn't been called on it, and it will print a warning itself. + } + if (ce.error != Variant::CallError::CALL_OK) { String reason; switch (ce.error) { @@ -2826,12 +2846,8 @@ void _Thread::_start_func(void *ud) { default: { } } - - t->running.clear(); - ERR_FAIL_MSG("Could not call function '" + t->target_method.operator String() + "' to start thread " + t->get_id() + ": " + reason + "."); + ERR_FAIL_MSG("Could not call function '" + target_method.operator String() + "' to start thread " + uitos(Thread::get_caller_id()) + ": " + reason + "."); } - - t->running.clear(); } Error _Thread::start(Object *p_instance, const StringName &p_method, const Variant &p_userdata, Priority p_priority) { diff --git a/core/os/semaphore.h b/core/os/semaphore.h index 918074031a5..fc15a57b32e 100644 --- a/core/os/semaphore.h +++ b/core/os/semaphore.h @@ -33,6 +33,9 @@ #include "core/error_list.h" #include "core/typedefs.h" +#ifdef DEBUG_ENABLED +#include "core/error_macros.h" +#endif #if !defined(NO_THREADS) @@ -44,6 +47,9 @@ private: mutable std::mutex mutex; mutable std::condition_variable condition; mutable uint32_t count = 0; // Initialized as locked. +#ifdef DEBUG_ENABLED + mutable uint32_t awaiters = 0; +#endif public: _ALWAYS_INLINE_ void post() const { @@ -54,10 +60,16 @@ public: _ALWAYS_INLINE_ void wait() const { std::unique_lock lock(mutex); +#ifdef DEBUG_ENABLED + ++awaiters; +#endif while (!count) { // Handle spurious wake-ups. condition.wait(lock); } - count--; + --count; +#ifdef DEBUG_ENABLED + --awaiters; +#endif } _ALWAYS_INLINE_ bool try_wait() const { @@ -74,6 +86,47 @@ public: std::lock_guard lock(mutex); return count; } + +#ifdef DEBUG_ENABLED + ~Semaphore() { + // Destroying an std::condition_variable when not all threads waiting on it have been notified + // invokes undefined behavior (e.g., it may be nicely destroyed or it may be awaited forever.) + // That means other threads could still be running the body of std::condition_variable::wait() + // but already past the safety checkpoint. That's the case for instance if that function is already + // waiting to lock again. + // + // We will make the rule a bit more restrictive and simpler to understand at the same time: there + // should not be any threads at any stage of the waiting by the time the semaphore is destroyed. + // + // We do so because of the following reasons: + // - We have the guideline that threads must be awaited (i.e., completed), so the waiting thread + // must be completely done by the time the thread controlling it finally destroys the semaphore. + // Therefore, only a coding mistake could make the program run into such a attempt at premature + // destruction of the semaphore. + // - In scripting, given that Semaphores are wrapped by RefCounted classes, in general it can't + // happen that a thread is trying to destroy a Semaphore while another is still doing whatever with + // it, so the simplification is mostly transparent to script writers. + // - The redefined rule can be checked for failure to meet it, which is what this implementation does. + // This is useful to detect a few cases of potential misuse; namely: + // a) In scripting: + // * The coder is naughtily dealing with the reference count causing a semaphore to die prematurely. + // * The coder is letting the project reach its termination without having cleanly finished threads + // that await on semaphores (or at least, let the usual semaphore-controlled loop exit). + // b) In the native side, where Semaphore is not a ref-counted beast and certain coding mistakes can + // lead to its premature destruction as well. + // + // Let's let users know they are doing it wrong, but apply a, somewhat hacky, countermeasure against UB + // in debug builds. + std::lock_guard lock(mutex); + if (awaiters) { + WARN_PRINT( + "A Semaphore object is being destroyed while one or more threads are still waiting on it.\n" + "Please call post() on it as necessary to prevent such a situation and so ensure correct cleanup."); + // And now, the hacky countermeasure (i.e., leak the condition variable). + new (&condition) std::condition_variable(); + } + } +#endif }; #else diff --git a/core/os/thread.cpp b/core/os/thread.cpp index 962e489a9dc..180f2e3ac1d 100644 --- a/core/os/thread.cpp +++ b/core/os/thread.cpp @@ -121,7 +121,9 @@ Error Thread::set_name(const String &p_name) { Thread::~Thread() { if (id != _thread_id_hash(std::thread::id())) { #ifdef DEBUG_ENABLED - WARN_PRINT("A Thread object has been destroyed without wait_to_finish() having been called on it. Please do so to ensure correct cleanup of the thread."); + WARN_PRINT( + "A Thread object is being destroyed without its completion having been realized.\n" + "Please call wait_to_finish() on it to ensure correct cleanup."); #endif thread.detach(); } diff --git a/doc/classes/Mutex.xml b/doc/classes/Mutex.xml index f3bf9efa64d..49d6886ae9e 100644 --- a/doc/classes/Mutex.xml +++ b/doc/classes/Mutex.xml @@ -5,6 +5,11 @@ A synchronization mutex (mutual exclusion). This is used to synchronize multiple [Thread]s, and is equivalent to a binary [Semaphore]. It guarantees that only one thread can ever acquire the lock at a time. A mutex can be used to protect a critical section; however, be careful to avoid deadlocks. + It's of the recursive kind, so it can be locked multiple times by one thread, provided it also unlocks it as many times. + [b]Warning:[/b] + To guarantee that the operating system is able to perform proper cleanup (no crashes, no deadlocks), these conditions must be met: + - By the time a [Mutex]'s reference count reaches zero and therefore it is destroyed, no threads (including the one on which the destruction will happen) must have it locked. + - By the time a [Thread]'s reference count reaches zero and therefore it is destroyed, it must not have any mutex locked. $DOCS_URL/tutorials/performance/threads/using_multiple_threads.html @@ -29,6 +34,7 @@ Unlocks this [Mutex], leaving it to other threads. [b]Note:[/b] If a thread called [method lock] or [method try_lock] multiple times while already having ownership of the mutex, it must also call [method unlock] the same number of times in order to unlock it correctly. + [b]Warning:[/b] Calling [method unlock] more times that [method lock] on a given thread, thus ending up trying to unlock a non-locked mutex, is wrong and may causes crashes or deadlocks. diff --git a/doc/classes/Semaphore.xml b/doc/classes/Semaphore.xml index 8701c2027cd..33ddee4ddb4 100644 --- a/doc/classes/Semaphore.xml +++ b/doc/classes/Semaphore.xml @@ -5,6 +5,10 @@ A synchronization semaphore which can be used to synchronize multiple [Thread]s. Initialized to zero on creation. Be careful to avoid deadlocks. For a binary version, see [Mutex]. + [b]Warning:[/b] + To guarantee that the operating system is able to perform proper cleanup (no crashes, no deadlocks), these conditions must be met: + - By the time a [Semaphore]'s reference count reaches zero and therefore it is destroyed, no threads must be waiting on it. + - By the time a [Thread]'s reference count reaches zero and therefore it is destroyed, it must not be waiting on any semaphore. $DOCS_URL/tutorials/performance/threads/using_multiple_threads.html diff --git a/doc/classes/Thread.xml b/doc/classes/Thread.xml index f35293fad25..ff481717dd8 100644 --- a/doc/classes/Thread.xml +++ b/doc/classes/Thread.xml @@ -6,6 +6,11 @@ A unit of execution in a process. Can run methods on [Object]s simultaneously. The use of synchronization via [Mutex] or [Semaphore] is advised if working with shared objects. [b]Note:[/b] Breakpoints won't break on code if it's running in a thread. This is a current limitation of the GDScript debugger. + [b]Warning:[/b] + To guarantee that the operating system is able to perform proper cleanup (no crashes, no deadlocks), these conditions must be met by the time a [Thread]'s reference count reaches zero and therefore it is destroyed: + - It must not have any [Mutex] objects locked. + - It must not be waiting on any [Semaphore] objects. + - [method wait_to_finish] should have been called on it. $DOCS_URL/tutorials/performance/threads/using_multiple_threads.html