diff --git a/core/arch/x86/x86.asm b/core/arch/x86/x86.asm index 55ca3091a39261fb28ccaf3826c2a26756ed2aeb..4daa2756b8dfa4de925d4493c3b71e03e36343f7 100644 --- a/core/arch/x86/x86.asm +++ b/core/arch/x86/x86.asm @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2017 Google, Inc. All rights reserved. + * Copyright (c) 2011-2018 Google, Inc. All rights reserved. * Copyright (c) 2001-2010 VMware, Inc. All rights reserved. * ********************************************************** */ @@ -594,7 +594,7 @@ GLOBAL_LABEL(cleanup_and_terminate:) mov REG_XBX, PTRSZ [1*ARG_SZ + REG_XBP] /* dcontext */ SAVE_TO_DCONTEXT_VIA_REG(REG_XBX,is_exiting_OFFSET,1) CALLC1(GLOBAL_REF(is_currently_on_dstack), REG_XBX) /* xbx is callee-saved */ - cmp REG_XAX, 0 + cmp al, 0 jnz cat_save_dstack mov REG_XBX, 0 /* save 0 for dstack to avoid double-free */ jmp cat_done_saving_dstack @@ -1486,7 +1486,7 @@ GLOBAL_LABEL(master_signal_handler:) mov REG_XAX, REG_XSP /* call a C routine rather than writing everything in asm */ CALLC2(GLOBAL_REF(sig_should_swap_stack), REG_XAX, REG_XDX) - cmp REG_XAX, 0 + cmp al, 0 pop REG_XAX /* clone_and_swap_args.stack */ pop REG_XCX /* clone_and_swap_args.tos */ je no_swap diff --git a/core/dynamo.c b/core/dynamo.c index 4443502e0beffdb4b54661b25d37029ecd2e94a2..25d2725253e257480f46475824dd6cff5c369f98 100644 --- a/core/dynamo.c +++ b/core/dynamo.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2010-2017 Google, Inc. All rights reserved. + * Copyright (c) 2010-2018 Google, Inc. All rights reserved. * Copyright (c) 2000-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -1662,6 +1662,7 @@ delete_dynamo_context(dcontext_t *dcontext, bool free_stack) if (free_stack) { ASSERT(dcontext->dstack != NULL); ASSERT(!is_currently_on_dstack(dcontext)); + LOG(GLOBAL, LOG_THREADS, 1, "Freeing DR stack "PFX"\n", dcontext->dstack); stack_free(dcontext->dstack, DYNAMORIO_STACK_SIZE); } /* else will be cleaned up by caller */ @@ -2313,8 +2314,8 @@ dynamo_thread_init(byte *dstack_in, priv_mcontext_t *mc IF_CLIENT_INTERFACE_ELSE(client_thread ? "CLIENT " : "", ""), get_thread_id(), dcontext); LOG(THREAD, LOG_TOP|LOG_THREADS, 1, - "DR stack is "PFX"-"PFX"\n", dcontext->dstack - DYNAMORIO_STACK_SIZE, - dcontext->dstack); + "DR stack is "PFX"-"PFX" (passed in "PFX")\n", + dcontext->dstack - DYNAMORIO_STACK_SIZE, dcontext->dstack, dstack_in); #endif #ifdef DEADLOCK_AVOIDANCE diff --git a/core/nudge.c b/core/nudge.c index 2449f3581a8c5e4ff15fbceb7985195b6034e655..cac29dd57252b0d9f74b5d5e7422166ed0df3a4d 100644 --- a/core/nudge.c +++ b/core/nudge.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2017 Google, Inc. All rights reserved. + * Copyright (c) 2011-2018 Google, Inc. All rights reserved. * Copyright (c) 2008-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -125,14 +125,17 @@ nudge_thread_cleanup(dcontext_t *dcontext, bool exit_process, uint exit_code) * terminate nudge threads instead of allowing them to return and exit normally. * On XP and 2k3 none of our nudge creation routines inform csrss of the new thread * (which is who typically frees the stacks). - * On Vista we don't use NtCreateThreadEx to create the nudge threads so the kernel - * doesn't free the stack. + * On Vista and Win7 we don't use NtCreateThreadEx to create the nudge threads so + * the kernel doesn't free the stack. * As such we are left with two options: free the app stack here (nudgee free) or - * have the nudge thread creator free the app stack (nudger free). Going with - * nudgee free means we leak exit race nudge stacks whereas if we go with nudger free - * for external nudges then we'll leak timed out nudge stacks (for internal nudges - * we pretty much have to do nudgee free). A nudge_arg_t flag is used to specify - * which model we use, but currently we always nudgee free. + * have the nudge thread creator free the app stack (nudger free). Going with + * nudgee free means we leak exit race nudge stacks whereas if we go with nudger free + * for external nudges then we'll leak timed out nudge stacks (for internal nudges + * we pretty much have to do nudgee free). A nudge_arg_t flag is used to specify + * which model we use, but currently we always nudgee free. + * On Win8+ we do use NtCreateThreadEx to create the nudge threads so the kernel + * does free the stack. We could use this on Vista and Win7 too -- should we? + * It requires someone to free the argument buffer (NUDGE_FREE_ARG). * * dynamo_thread_exit_common() is where the app stack is actually freed, not here. */ @@ -219,8 +222,6 @@ generic_nudge_handler(nudge_arg_t *arg_dont_use) /* if needed tell thread exit to free the application stack */ if (!TEST(NUDGE_NUDGER_FREE_STACK, safe_arg.flags)) { dcontext->free_app_stack = true; - } else { - ASSERT_NOT_TESTED(); } /* FIXME - would be nice to inform nudge creator if we need to nop the nudge. */ @@ -262,7 +263,6 @@ generic_nudge_handler(nudge_arg_t *arg_dont_use) /* Free the arg if requested. */ if (TEST(NUDGE_FREE_ARG, safe_arg.flags)) { - ASSERT_NOT_TESTED(); nt_free_virtual_memory(arg_dont_use); } @@ -488,10 +488,18 @@ nudge_internal(process_id_t pid, uint nudge_action_mask, nudge_arg.version = NUDGE_ARG_CURRENT_VERSION; nudge_arg.nudge_action_mask = nudge_action_mask; - /* we do not set NUDGE_NUDGER_FREE_STACK so the stack will be freed - * in the target process + /* We do not set NUDGE_NUDGER_FREE_STACK so the stack will be freed + * in the target process, for <=win7. */ nudge_arg.flags = (internal ? NUDGE_IS_INTERNAL : 0); +#ifdef WINDOWS + if (get_os_version() >= WINDOWS_VERSION_8) { + /* The kernel owns and frees the stack. */ + nudge_arg.flags |= NUDGE_NUDGER_FREE_STACK; + /* The arg was placed in a new kernel alloc. */ + nudge_arg.flags |= NUDGE_FREE_ARG; + } +#endif nudge_arg.client_arg = client_arg; nudge_arg.client_id = client_id; diff --git a/core/win32/ntdll.c b/core/win32/ntdll.c index cb6e9443769be297e3add65107f566685115e93e..d945c7ae1b585634ccceec63402b25bf645aaeea 100644 --- a/core/win32/ntdll.c +++ b/core/win32/ntdll.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2010-2017 Google, Inc. All rights reserved. + * Copyright (c) 2010-2018 Google, Inc. All rights reserved. * Copyright (c) 2003-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -4596,7 +4596,7 @@ create_thread_common(HANDLE hProcess, bool target_64bit, void *start_addr, res = NT_SYSCALL(CreateThread, &hthread, THREAD_ALL_ACCESS, &oa, hProcess, &cid, context, stack, (byte)TRUE); if (!NT_SUCCESS(res)) { - NTPRINT("create_thread: failed to create thread\n"); + NTPRINT("create_thread: failed to create thread: %x\n", res); return INVALID_HANDLE_VALUE; } /* Xref PR 252008 & PR 252745 - on 32-bit Windows the kernel will set esp @@ -4617,6 +4617,74 @@ create_thread_common(HANDLE hProcess, bool target_64bit, void *start_addr, return hthread; } +static HANDLE +our_create_thread_ex(HANDLE hProcess, bool target_64bit, void *start_addr, + void *arg, const void *arg_buf, size_t arg_buf_size, + uint stack_reserve, uint stack_commit, bool suspended, + thread_id_t *tid) +{ + HANDLE hthread; + OBJECT_ATTRIBUTES oa; + CLIENT_ID cid; + TEB *teb; + void *thread_arg = arg; + create_thread_info_t info = {0}; + NTSTATUS res; + /* NtCreateThreadEx doesn't exist prior to Vista. */ + ASSERT(syscalls[SYS_CreateThreadEx] != SYSCALL_NOT_PRESENT); + GET_RAW_SYSCALL(CreateThreadEx, + OUT PHANDLE ThreadHandle, + IN ACCESS_MASK DesiredAccess, + IN POBJECT_ATTRIBUTES ObjectAttributes, + IN HANDLE ProcessHandle, + IN LPTHREAD_START_ROUTINE Win32StartAddress, + IN LPVOID StartParameter, + IN BOOL CreateSuspended, + IN uint StackZeroBits, + IN SIZE_T StackCommitSize, + IN SIZE_T StackReserveSize, + INOUT create_thread_info_t *thread_info); + + InitializeObjectAttributes(&oa, NULL, OBJ_CASE_INSENSITIVE, NULL, NULL); + + if (arg_buf != NULL) { + /* XXX: Currently we leak this memory, except for nudge where the caller + * sets NUDGE_FREE_ARG. + */ + if (!NT_SUCCESS + (nt_remote_allocate_virtual_memory + (hProcess, &thread_arg, arg_buf_size, PAGE_READWRITE, MEM_COMMIT))) { + NTPRINT("create_thread: failed to allocate arg buf\n"); + return INVALID_HANDLE_VALUE; + } + if (!nt_write_virtual_memory(hProcess, thread_arg, arg_buf, + arg_buf_size, NULL)) { + NTPRINT("create_thread: failed to write arguments\n"); + return INVALID_HANDLE_VALUE; + } + } + + info.struct_size = sizeof(info); + info.client_id.flags = THREAD_INFO_ELEMENT_CLIENT_ID | THREAD_INFO_ELEMENT_UNKNOWN_2; + info.client_id.buffer_size = sizeof(cid); + info.client_id.buffer = &cid; + /* We get STATUS_INVALID_PARAMETER unless we also ask for teb. */ + info.teb.flags = THREAD_INFO_ELEMENT_TEB | THREAD_INFO_ELEMENT_UNKNOWN_2; + info.teb.buffer_size = sizeof(TEB*); + info.teb.buffer = &teb; + res = NT_RAW_SYSCALL(CreateThreadEx, &hthread, THREAD_ALL_ACCESS, &oa, hProcess, + (LPTHREAD_START_ROUTINE)convert_data_to_function(start_addr), + thread_arg, !!suspended, 0, stack_commit, stack_reserve, + &info); + if (!NT_SUCCESS(res)) { + NTPRINT("create_thread_ex: failed to create thread: %x\n", res); + return INVALID_HANDLE_VALUE; + } + if (tid != NULL) + *tid = (thread_id_t)cid.UniqueThread; + return hthread; +} + /* Creates a new stack w/ guard page */ HANDLE our_create_thread(HANDLE hProcess, bool target_64bit, void *start_addr, @@ -4631,6 +4699,14 @@ our_create_thread(HANDLE hProcess, bool target_64bit, void *start_addr, ASSERT(stack_commit + PAGE_SIZE <= stack_reserve && ALIGNED(stack_commit, PAGE_SIZE) && ALIGNED(stack_reserve, PAGE_SIZE)); + + if (get_os_version() >= WINDOWS_VERSION_8) { + /* NtCreateThread not available: use Ex where the kernel makes the stack. */ + return our_create_thread_ex(hProcess, target_64bit, start_addr, arg, arg_buf, + arg_buf_size, stack_reserve, stack_commit, + suspended, tid); + } + if (!NT_SUCCESS(nt_remote_allocate_virtual_memory(hProcess, &stack.ExpandableStackBottom, stack_reserve, PAGE_READWRITE, @@ -4638,8 +4714,7 @@ our_create_thread(HANDLE hProcess, bool target_64bit, void *start_addr, NTPRINT("create_thread: failed to allocate stack\n"); return INVALID_HANDLE_VALUE; } - /* FIXME : for failures beyond this point we don't bother deallocating the - * stack. */ + /* For failures beyond this point we don't bother deallocating the stack. */ stack.ExpandableStackBase = ((byte *)stack.ExpandableStackBottom) + stack_reserve; stack.ExpandableStackLimit = @@ -4662,13 +4737,22 @@ our_create_thread(HANDLE hProcess, bool target_64bit, void *start_addr, arg_buf, arg_buf_size, &stack, suspended, tid); } -/* Uses caller-allocated stack */ +/* Uses caller-allocated stack. hProcess must be NT_CURRENT_PROCESS for win8+. */ HANDLE our_create_thread_have_stack(HANDLE hProcess, bool target_64bit, void *start_addr, void *arg, const void *arg_buf, size_t arg_buf_size, byte *stack_base, size_t stack_size, bool suspended, thread_id_t *tid) { + if (get_os_version() >= WINDOWS_VERSION_8) { + /* FIXME i#1309: we need a wrapper function so we can use NtCreateThreadEx + * and then switch stacks. This is too hard to arrange in another process. + * NtCreateThread seems to work in some cases (on Appveyor) so until we have + * an NtCreateThreadEx solution we fall through. + */ + ASSERT(hProcess == NT_CURRENT_PROCESS && + "No support for creating a remote thread with a custom stack"); + } USER_STACK stack = {0}; stack.ExpandableStackBase = stack_base; stack.ExpandableStackLimit = stack_base - stack_size; diff --git a/core/win32/ntdll.h b/core/win32/ntdll.h index b9cc2d654209e4875658fb99cb5dab7738755cc5..7494a53b585aec62e623a410c346add6f8e2a4c5 100644 --- a/core/win32/ntdll.h +++ b/core/win32/ntdll.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2017 Google, Inc. All rights reserved. + * Copyright (c) 2011-2018 Google, Inc. All rights reserved. * Copyright (c) 2003-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -939,6 +939,7 @@ typedef enum { /* NOTE - these are speculative */ * - INOUT */ THREAD_INFO_ELEMENT_UNKNOWN_1 = 0x9, /* Unknown - ptr_uint_t sized * [ observed 1 ] - IN */ + THREAD_INFO_ELEMENT_UNKNOWN_2 = 0x10000, } thread_info_elm_buf_type_t; typedef struct _thread_info_element_t { /* NOTE - this is speculative */ @@ -2102,13 +2103,18 @@ nt_stop_profile(HANDLE profile_handle); HANDLE create_process(wchar_t *exe, wchar_t *cmdline); -/* NOTE see important usage information in ntdll.c, threads created with this - * function can NOT return from their start routine */ +/* See important usage information in ntdll.c: threads created with this + * function can NOT return from their start routine. + * On Win8+, the kernel owns the created stack; o/w, we own it. + * On Win8+, if arg_buf != NULL, it's placed in a new virtual alloc and it's + * up to the caller to free it. + */ HANDLE our_create_thread(HANDLE hProcess, bool target_64bit, void *start_addr, void *arg, const void *arg_buf, size_t arg_buf_size, uint stack_reserve, uint stack_commit, bool suspended, thread_id_t *tid); +/* Uses caller-allocated stack. hProcess must be NT_CURRENT_PROCESS for win8+. */ HANDLE our_create_thread_have_stack(HANDLE hProcess, bool target_64bit, void *start_addr, void *arg, const void *arg_buf, size_t arg_buf_size,