From 8d81ca6ae7fcd6c2f9e673c990d5432f5457716a Mon Sep 17 00:00:00 2001 From: Kevin Malachowski <kevin@chowski.com> Date: Tue, 12 Dec 2017 17:07:45 -0800 Subject: [PATCH 1/4] i#2157 re-attach: Add timeout to os_thread_suspend After 5 seconds of waiting for a thread to acknowledge a received signal, os_thread_suspend now returns false so that the caller can retry. Issue #2157 --- core/synch.c | 4 ++-- core/unix/os.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/core/synch.c b/core/synch.c index 4f6609941..3a10dbde7 100644 --- a/core/synch.c +++ b/core/synch.c @@ -1922,10 +1922,10 @@ detach_on_permanent_stack(bool internal, bool do_cleanup) DEBUG_DECLARE(bool ok;) DEBUG_DECLARE(int exit_res;) /* synch-all flags: if we fail to suspend a thread (e.g., privilege - * problems) ignore it. XXX Should we retry instead? + * problems) retry it. */ /* i#297: we only synch client threads after process exit event. */ - uint flags = THREAD_SYNCH_SUSPEND_FAILURE_IGNORE | THREAD_SYNCH_SKIP_CLIENT_THREAD; + uint flags = THREAD_SYNCH_SUSPEND_FAILURE_RETRY | THREAD_SYNCH_SKIP_CLIENT_THREAD; ENTERING_DR(); diff --git a/core/unix/os.c b/core/unix/os.c index a538cec3d..0512ae422 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -3474,10 +3474,17 @@ os_thread_suspend(thread_record_t *tr) */ mutex_unlock(&ostd->suspend_lock); while (ksynch_get_value(&ostd->suspended) == 0) { - /* For Linux, waits only if the suspended flag is not set as 1. Return value - * doesn't matter because the flag will be re-checked. + /* For Linux, waits only if the suspended flag is not set as 1. Other + * than a timeout value, the return value doesn't matter because the + * flag will be re-checked. */ - ksynch_wait(&ostd->suspended, 0, 0); + if (ksynch_wait(&ostd->suspended, 0, 5000) == -ETIMEDOUT) { + mutex_lock(&ostd->suspend_lock); + ASSERT(ostd->suspend_count > 0); + ostd->suspend_count--; + mutex_unlock(&ostd->suspend_lock); + return false; + } if (ksynch_get_value(&ostd->suspended) == 0) { /* If it still has to wait, give up the cpu. */ os_thread_yield(); -- GitLab From f1602798dd46619a974fc41fde5e59a5cb12ab62 Mon Sep 17 00:00:00 2001 From: Kevin Malachowski <kevin@chowski.com> Date: Thu, 14 Dec 2017 13:58:56 -0800 Subject: [PATCH 2/4] Address review comments. * Add timeout parameter to os_suspend_thread. * Move decision about whether a thread suspend request times out to the caller of synch_with_thread, specifically its 'block' parameter. * Implemented correctly for macos * Changed code around so that Windows is unaffected. --- core/dynamo.c | 2 +- core/os_shared.h | 6 +++++- core/synch.c | 25 +++++++++++++++++++++---- core/unix/ksynch.h | 3 ++- core/unix/ksynch_macos.c | 11 ++++++++++- core/unix/os.c | 4 ++-- core/win32/os.c | 2 +- 7 files changed, 42 insertions(+), 11 deletions(-) diff --git a/core/dynamo.c b/core/dynamo.c index 2882b5547..c7b935fad 100644 --- a/core/dynamo.c +++ b/core/dynamo.c @@ -3111,7 +3111,7 @@ dynamorio_unprotect(void) for (tr = all_threads[i]; tr; tr = tr->next) { if (tr->under_dynamo_control) { DEBUG_DECLARE(bool ok =) - os_thread_suspend(all_threads[i]); + os_thread_suspend(all_threads[i], 0); ASSERT(ok); protect_info->num_threads_suspended++; } diff --git a/core/os_shared.h b/core/os_shared.h index 112907503..074b79ed6 100644 --- a/core/os_shared.h +++ b/core/os_shared.h @@ -151,7 +151,11 @@ thread_id_t get_thread_id(void); process_id_t get_process_id(void); void os_thread_yield(void); void os_thread_sleep(uint64 milliseconds); -bool os_thread_suspend(thread_record_t *tr); +/* os_thread_suspend may return false in the case of a timeout. Note that the + * timeout field is best effort and may be ignored by implementations (i.e. + * Windows); setting timeout to 0 results in blocking forever. + */ +bool os_thread_suspend(thread_record_t *tr, int timeout_ms); bool os_thread_resume(thread_record_t *tr); bool os_thread_terminate(thread_record_t *tr); diff --git a/core/synch.c b/core/synch.c index 3a10dbde7..968b38f94 100644 --- a/core/synch.c +++ b/core/synch.c @@ -45,6 +45,9 @@ #include "native_exec.h" #include <string.h> /* for memcpy */ +/* Give threads 5 seconds to suspend when given a non-blocking synch request */ +#define SUSPEND_THREAD_TIMEOUT 5000 + extern vm_area_vector_t *fcache_unit_areas; /* from fcache.c */ static bool started_detach = false; /* set before synchall */ @@ -888,6 +891,7 @@ synch_with_thread(thread_id_t id, bool block, bool hold_initexit_lock, IF_UNIX(bool actually_suspended = true;) const uint max_loops = TEST(THREAD_SYNCH_SMALL_LOOP_MAX, flags) ? (SYNCH_MAXIMUM_LOOPS/10) : SYNCH_MAXIMUM_LOOPS; + int thread_suspend_timeout_ms = block ? 0 : SUSPEND_THREAD_TIMEOUT; ASSERT(id != my_id); /* Must set ABORT or IGNORE. Only caller can RETRY as need a new @@ -960,7 +964,7 @@ synch_with_thread(thread_id_t id, bool block, bool hold_initexit_lock, adjust_wait_at_safe_spot(trec->dcontext, 1); first_loop = false; } - if (!os_thread_suspend(trec)) { + if (!os_thread_suspend(trec, thread_suspend_timeout_ms)) { /* FIXME : eventually should be a real assert once we figure out * how to handle threads with low privilege handles */ /* For dr_api_exit, we may have missed a thread exit. */ @@ -1921,11 +1925,24 @@ detach_on_permanent_stack(bool internal, bool do_cleanup) #endif DEBUG_DECLARE(bool ok;) DEBUG_DECLARE(int exit_res;) - /* synch-all flags: if we fail to suspend a thread (e.g., privilege - * problems) retry it. + + /* synch-all flags: */ + uint flags = 0; +#ifdef WINDOWS + /* For Windows we may fail to suspend a thread (e.g., privilege + * problems), and in that case we want to just ignore the failure. */ + flags |= THREAD_SYNCH_SUSPEND_FAILURE_IGNORE; +#elif defined(UNIX) + /* For Unix, we don't have that sort of permissions troubles, just timing + * races which may cause the SUSPEND_SIGNAL to be dropped (xref i#26). In + * that case, we know we want to retry any failures. + */ + flags |= THREAD_SYNCH_SUSPEND_FAILURE_RETRY; +#endif + /* i#297: we only synch client threads after process exit event. */ - uint flags = THREAD_SYNCH_SUSPEND_FAILURE_RETRY | THREAD_SYNCH_SKIP_CLIENT_THREAD; + flags |= THREAD_SYNCH_SKIP_CLIENT_THREAD; ENTERING_DR(); diff --git a/core/unix/ksynch.h b/core/unix/ksynch.h index cff310b86..8a642670d 100644 --- a/core/unix/ksynch.h +++ b/core/unix/ksynch.h @@ -87,7 +87,8 @@ ksynch_set_value(mac_synch_t *synch, int new_val); KSYNCH_TYPE * mutex_get_contended_event(mutex_t *lock); -/* These return 0 on success: */ +/* These return 0 on success and a negative value on failure. ksynch_wait + * returns -ETIMEDOUT if there was a timeout condition. */ ptr_int_t ksynch_wait(KSYNCH_TYPE *var, int mustbe, int timeout_ms); diff --git a/core/unix/ksynch_macos.c b/core/unix/ksynch_macos.c index 5fb7be7df..a00e167b5 100644 --- a/core/unix/ksynch_macos.c +++ b/core/unix/ksynch_macos.c @@ -125,7 +125,16 @@ ksynch_wait(mac_synch_t *synch, int mustbe, int timeout_ms) res = semaphore_timedwait(synch->sem, timeout); } else res = semaphore_wait(synch->sem); - return (res == KERN_SUCCESS ? 0 : -1); + + /* Conform to the API specified in ksynch.h */ + switch (res) { + case KERN_SUCCESS: + return 0; + case KERN_OPERATION_TIMED_OUT: + return -ETIMEDOUT; + default: + return -1; + } } ptr_int_t diff --git a/core/unix/os.c b/core/unix/os.c index 0512ae422..9036e6699 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -3440,7 +3440,7 @@ os_thread_sleep(uint64 milliseconds) } bool -os_thread_suspend(thread_record_t *tr) +os_thread_suspend(thread_record_t *tr, int timeout_ms) { os_thread_data_t *ostd = (os_thread_data_t *) tr->dcontext->os_field; ASSERT(ostd != NULL); @@ -3478,7 +3478,7 @@ os_thread_suspend(thread_record_t *tr) * than a timeout value, the return value doesn't matter because the * flag will be re-checked. */ - if (ksynch_wait(&ostd->suspended, 0, 5000) == -ETIMEDOUT) { + if (ksynch_wait(&ostd->suspended, 0, timeout_ms) == -ETIMEDOUT) { mutex_lock(&ostd->suspend_lock); ASSERT(ostd->suspend_count > 0); ostd->suspend_count--; diff --git a/core/win32/os.c b/core/win32/os.c index cfa9f039d..5381cc8da 100644 --- a/core/win32/os.c +++ b/core/win32/os.c @@ -4905,7 +4905,7 @@ os_timeout(int time_in_milliseconds) } bool -os_thread_suspend(thread_record_t *tr) +os_thread_suspend(thread_record_t *tr, int timeout_ms) { return nt_thread_suspend(tr->handle, NULL); } -- GitLab From 95b2a24ea7be8f2fc8436c00937ba353bc1fbf6a Mon Sep 17 00:00:00 2001 From: Kevin Malachowski <kevin@chowski.com> Date: Thu, 14 Dec 2017 15:52:55 -0800 Subject: [PATCH 3/4] Fix windows build --- core/win32/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/win32/os.c b/core/win32/os.c index 5381cc8da..892ace828 100644 --- a/core/win32/os.c +++ b/core/win32/os.c @@ -7679,7 +7679,7 @@ os_dump_core_live_dump(const char *msg, char *path OUT, size_t path_sz) nt_get_handle_access_rights(tr->handle); TEB *teb_addr = get_teb(tr->handle); DEBUG_DECLARE(bool res = ) - os_thread_suspend(tr); + os_thread_suspend(tr, 0); /* we can't assert here (could infinite loop) */ DODEBUG({ suspend_failures = suspend_failures || !res; }); if (thread_get_context(tr, &cxt)) { -- GitLab From 9594e901a5cf2b3a3d0168d136fb3f363ca28ee0 Mon Sep 17 00:00:00 2001 From: Kevin Malachowski <kevin@chowski.com> Date: Fri, 15 Dec 2017 07:30:40 -0800 Subject: [PATCH 4/4] Fix comment format --- core/unix/ksynch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/unix/ksynch.h b/core/unix/ksynch.h index 8a642670d..ad0ccfba4 100644 --- a/core/unix/ksynch.h +++ b/core/unix/ksynch.h @@ -88,7 +88,8 @@ KSYNCH_TYPE * mutex_get_contended_event(mutex_t *lock); /* These return 0 on success and a negative value on failure. ksynch_wait - * returns -ETIMEDOUT if there was a timeout condition. */ + * returns -ETIMEDOUT if there was a timeout condition. + */ ptr_int_t ksynch_wait(KSYNCH_TYPE *var, int mustbe, int timeout_ms); -- GitLab