From 72584d703c7e1967bdb46dfa13f66b4cf9a6b02b Mon Sep 17 00:00:00 2001
From: Derek Bruening <bruening@google.com>
Date: Mon, 26 Sep 2022 18:01:41 -0400
Subject: [PATCH 1/4] i#5658: Add register barrier at rseq endpoints

Adds a new label DR_NOTE_REG_BARRIER at each rseq endpoint which is
read by drreg and treated as a barrier where all app values are
brought back into their registers.  This ensures the native rseq copy
has the right input values for cases such as drbbdup where with
multiple app block copies registers might not be restored until after
the final block instr.

Augments the tool.drcacheoff.rseq test to load from enough registers
(on x86 at least) to force drreg to use one of them, reproducing the
crash.  With the fix, the crash disappears.

Fixes #5658
---
 core/arch/interp.c        |  6 ++++++
 core/arch/mangle_shared.c | 10 ++++------
 core/lib/globals_api.h    |  5 +++++
 core/vmareas.c            |  2 ++
 ext/drreg/drreg.c         | 14 ++++++++++----
 suite/tests/linux/rseq.c  | 18 ++++++++++++++++--
 6 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/core/arch/interp.c b/core/arch/interp.c
index e80789603..22f2dc81b 100644
--- a/core/arch/interp.c
+++ b/core/arch/interp.c
@@ -3968,6 +3968,12 @@ build_bb_ilist(dcontext_t *dcontext, build_bb_t *bb)
     bb->end_pc = bb->cur_pc;
     BBPRINT(bb, 3, "end_pc = " PFX "\n\n", bb->end_pc);
 
+    if (TEST(FRAG_HAS_RSEQ_ENDPOINT, bb->flags)) {
+        instr_t *label = INSTR_CREATE_label(dcontext);
+        instr_set_note(label, (void *)DR_NOTE_REG_BARRIER);
+        instrlist_meta_preinsert(bb->ilist, bb->instr, label);
+    }
+
     /* We could put this in check_new_page_jmp where it already checks
      * for native_exec overlap, but selfmod ubrs don't even call that routine
      */
diff --git a/core/arch/mangle_shared.c b/core/arch/mangle_shared.c
index 5985f08f5..f8e25fb47 100644
--- a/core/arch/mangle_shared.c
+++ b/core/arch/mangle_shared.c
@@ -931,11 +931,8 @@ mangle_rseq_insert_native_sequence(dcontext_t *dcontext, instrlist_t *ilist,
     RSTATS_INC(num_rseq_native_calls_inserted);
     instr_t *insert_at = *next_instr;
 
-    /* We assume that by making this a block end, clients will restore app state
-     * before this native invocation.
-     * TODO i#2350: Take some further action to better guarantee this in the face
-     * of future drreg optimizations, etc.  Do we need new interface features, or
-     * do we live with a fake app jump or sthg?
+    /* We've already inserted a DR_NOTE_REG_BARRIER label to ensure that clients will
+     * restore app state before this native invocation.
      */
 
     /* Create a scratch register. Use slot 1 to avoid conflict with segment
@@ -1358,7 +1355,8 @@ mangle_rseq(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr,
     int len = instr_length(dcontext, instr);
     if (pc + len >= end) {
         ilist->flags |= INSTR_RSEQ_ENDPOINT;
-        *flags |= FRAG_HAS_RSEQ_ENDPOINT;
+        /* We should already have this flag set by the bb builder. */
+        ASSERT(TEST(FRAG_HAS_RSEQ_ENDPOINT, *flags));
         if (pc + len != end) {
             REPORT_FATAL_ERROR_AND_EXIT(
                 RSEQ_BEHAVIOR_UNSUPPORTED, 3, get_application_name(),
diff --git a/core/lib/globals_api.h b/core/lib/globals_api.h
index fcccbd259..229d474e8 100644
--- a/core/lib/globals_api.h
+++ b/core/lib/globals_api.h
@@ -849,6 +849,11 @@ enum {
     /** Identifies the end of a clean call. */
     /* This is used to allow instrumentation pre-and-post a clean call for i#4128. */
     DR_NOTE_CLEAN_CALL_END,
+    /**
+     * Identifies a point at which clients should restore all registers to
+     * their application values, as required for DR's internal block mangling.
+     */
+    DR_NOTE_REG_BARRIER,
 };
 
 /**
diff --git a/core/vmareas.c b/core/vmareas.c
index 090301d79..6f9e9fc0f 100644
--- a/core/vmareas.c
+++ b/core/vmareas.c
@@ -8298,6 +8298,8 @@ check_thread_vm_area(dcontext_t *dcontext, app_pc pc, app_pc tag, void **vmlist,
             if (xfer && (entered_rseq || exited_rseq || pc == next_boundary)) {
                 LOG(THREAD, LOG_VMAREAS | LOG_INTERP, 3,
                     "Stopping bb at rseq boundary " PFX "\n", pc);
+                if (exited_rseq)
+                    *flags |= FRAG_HAS_RSEQ_ENDPOINT;
                 result = false;
             }
         }
diff --git a/ext/drreg/drreg.c b/ext/drreg/drreg.c
index 5818b4126..9ecf7c7d0 100644
--- a/ext/drreg/drreg.c
+++ b/ext/drreg/drreg.c
@@ -480,9 +480,12 @@ drreg_insert_restore_all(void *drcontext, instrlist_t *bb, instr_t *inst,
          /* Writing just a subset needs to combine with the original unwritten */
          (TESTANY(EFLAGS_WRITE_ARITH, instr_get_eflags(inst, DR_QUERY_INCLUDE_ALL)) &&
           aflags != 0 /*0 means everything is dead*/) ||
-         /* Annotation handlers require app values (XXX i#5160: Remove special case). */
+         /* Annotation handlers and rseq mangling require app values.
+          * (XXX i#5160: Refactor drreg vs core to remove special cases?)
+          */
          (instr_is_label(inst) &&
-          (ptr_uint_t)instr_get_note(inst) == DR_NOTE_ANNOTATION) ||
+          ((ptr_uint_t)instr_get_note(inst) == DR_NOTE_ANNOTATION ||
+           (ptr_uint_t)instr_get_note(inst) == DR_NOTE_REG_BARRIER)) ||
          /* DR slots are not guaranteed across app instrs */
          (pt->aflags.slot != MAX_SPILLS &&
           pt->aflags.slot >= (int)ops.num_spill_slots))) {
@@ -522,9 +525,12 @@ drreg_insert_restore_all(void *drcontext, instrlist_t *bb, instr_t *inst,
             regs_restored[GPR_IDX(reg)] = false;
         if (!pt->reg[GPR_IDX(reg)].native) {
             if (force_restore || instr_reads_from_reg(inst, reg, DR_QUERY_INCLUDE_ALL) ||
-                /* Annotations require app values (XXX i#5160: Remove special case). */
+                /* Annotation handlers and rseq mangling require app values.
+                 * (XXX i#5160: Refactor drreg vs core to remove special cases?)
+                 */
                 (instr_is_label(inst) &&
-                 (ptr_uint_t)instr_get_note(inst) == DR_NOTE_ANNOTATION) ||
+                 ((ptr_uint_t)instr_get_note(inst) == DR_NOTE_ANNOTATION ||
+                  (ptr_uint_t)instr_get_note(inst) == DR_NOTE_REG_BARRIER)) ||
                 /* Treat a partial write as a read, to restore rest of reg */
                 (instr_writes_to_reg(inst, reg, DR_QUERY_INCLUDE_ALL) &&
                  !instr_writes_to_exact_reg(inst, reg, DR_QUERY_INCLUDE_ALL)) ||
diff --git a/suite/tests/linux/rseq.c b/suite/tests/linux/rseq.c
index c71175437..fe4d22e2a 100644
--- a/suite/tests/linux/rseq.c
+++ b/suite/tests/linux/rseq.c
@@ -142,8 +142,13 @@ test_rseq_call_once(bool force_restart_in, int *completions_out, int *restarts_o
         /* Store the entry into the ptr. */
         "leaq rseq_cs_simple(%%rip), %%rax\n\t"
         "movq %%rax, %[rseq_tls]\n\t"
-        /* Test a register input to the sequence. */
+        /* Test register inputs to the sequence. */
         "movl %[cpu_id], %%eax\n\t"
+        "movq %%rsp, %%rcx\n\t"
+        "movq %%rsp, %%rdx\n\t"
+        "movq %%rsp, %%rbx\n\t"
+        "movq %%rsp, %%rdi\n\t"
+        "movq %%rsp, %%rsi\n\t"
         /* Test "falling into" the rseq region. */
 
         /* Restartable sequence. */
@@ -151,6 +156,15 @@ test_rseq_call_once(bool force_restart_in, int *completions_out, int *restarts_o
         "movl %%eax, %[id]\n\t"
         /* Test clobbering an input register. */
         "movl %[cpu_id_uninit], %%eax\n\t"
+        /* Test reading many registers to stress drreg (e.g., i#5668).
+         * This is easier in x86 where we can exhaust drreg's choices.
+         * We do not attempt on aarch64 for simplicity.
+         */
+        "movq (%%rcx), %%rax\n\t"
+        "movq (%%rdx), %%rax\n\t"
+        "movq (%%rbx), %%rax\n\t"
+        "movq (%%rdi), %%rax\n\t"
+        "movq (%%rsi), %%rax\n\t"
         /* Test a restart in the middle of the sequence via ud2a SIGILL. */
         "cmpb $0, %[force_restart]\n\t"
         "jz 7f\n\t"
@@ -186,7 +200,7 @@ test_rseq_call_once(bool force_restart_in, int *completions_out, int *restarts_o
           [force_restart_write] "=m"(force_restart)
         : [cpu_id] "m"(rseq_tls.cpu_id), [cpu_id_uninit] "i"(RSEQ_CPU_ID_UNINITIALIZED),
           [force_restart] "m"(force_restart)
-        : "rax", "memory");
+        : "rax", "rcx", "rdx", "rbx", "rdi", "rsi", "memory");
 #elif defined(AARCH64)
     __asm__ __volatile__(
         RSEQ_ADD_TABLE_ENTRY(simple, 2f, 3f, 4f)
-- 
GitLab


From 523f41d2c56023fd6145a94077fea40767fe739f Mon Sep 17 00:00:00 2001
From: Derek Bruening <bruening@google.com>
Date: Tue, 27 Sep 2022 23:55:21 -0400
Subject: [PATCH 2/4] Fix Windows build

---
 core/arch/interp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/core/arch/interp.c b/core/arch/interp.c
index 22f2dc81b..5c5564aaf 100644
--- a/core/arch/interp.c
+++ b/core/arch/interp.c
@@ -3968,11 +3968,13 @@ build_bb_ilist(dcontext_t *dcontext, build_bb_t *bb)
     bb->end_pc = bb->cur_pc;
     BBPRINT(bb, 3, "end_pc = " PFX "\n\n", bb->end_pc);
 
+#ifdef LINUX
     if (TEST(FRAG_HAS_RSEQ_ENDPOINT, bb->flags)) {
         instr_t *label = INSTR_CREATE_label(dcontext);
         instr_set_note(label, (void *)DR_NOTE_REG_BARRIER);
         instrlist_meta_preinsert(bb->ilist, bb->instr, label);
     }
+#endif
 
     /* We could put this in check_new_page_jmp where it already checks
      * for native_exec overlap, but selfmod ubrs don't even call that routine
-- 
GitLab


From 5d39bba4998f005ae2ebb9b2d358883d6a6896aa Mon Sep 17 00:00:00 2001
From: Derek Bruening <bruening@google.com>
Date: Wed, 28 Sep 2022 01:18:02 -0400
Subject: [PATCH 3/4] Propagate rseq flag to traces

---
 core/monitor.c | 5 ++++-
 core/vmareas.c | 8 +++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/core/monitor.c b/core/monitor.c
index bdf2bb47f..908de250c 100644
--- a/core/monitor.c
+++ b/core/monitor.c
@@ -1109,7 +1109,10 @@ get_and_check_add_size(dcontext_t *dcontext, fragment_t *f, uint *res_add_size,
 static inline uint
 trace_flags_from_component_flags(uint flags)
 {
-    return (flags & (FRAG_HAS_SYSCALL | FRAG_HAS_DIRECT_CTI IF_X86_64(| FRAG_32_BIT)));
+    return (flags &
+            (FRAG_HAS_SYSCALL |
+             FRAG_HAS_DIRECT_CTI IF_X86_64(
+                 | FRAG_32_BIT IF_LINUX(| FRAG_HAS_RSEQ_ENDPOINT))));
 }
 
 static inline uint
diff --git a/core/vmareas.c b/core/vmareas.c
index 6f9e9fc0f..f36e7bb3e 100644
--- a/core/vmareas.c
+++ b/core/vmareas.c
@@ -8260,11 +8260,9 @@ check_thread_vm_area(dcontext_t *dcontext, app_pc pc, app_pc tag, void **vmlist,
         if (!vmvector_empty(d_r_rseq_areas)) {
             /* XXX i#3798: While for core operation we do not need to end a block at
              * an rseq endpoint, we need clients to treat the endpoint as a barrier and
-             * restore app state.  drreg today treats a block end as a barrier.  If
-             * drreg adds optimizations that cross blocks (such as in traces), we may
-             * need to add some other feature here: a fake app cti?  That affects
-             * clients measuring app behavior, though with rseq fidelity is already not
-             * 100%.  Similarly, we don't really need to not have a block span the start
+             * restore app state (which we do have DR_NOTE_REG_BARRIER for) and we
+             * prefer to simplify the block as much as we can.
+             * Similarly, we don't really need to not have a block span the start
              * of an rseq region.  But, we need to save app values at the start, which
              * is best done prior to drreg storing them elsewhere; plus, it makes it
              * easier to turn on full_decode for simpler mangling.
-- 
GitLab


From 126a6b241bff866d85869c52d3063816a5f454ab Mon Sep 17 00:00:00 2001
From: Derek Bruening <bruening@google.com>
Date: Wed, 28 Sep 2022 23:25:40 -0400
Subject: [PATCH 4/4] Adds docs on the new label to the state preservation
 section and dr_save_reg

---
 api/docs/bt.dox        | 13 +++++++++----
 core/lib/dr_ir_utils.h |  7 ++++++-
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/api/docs/bt.dox b/api/docs/bt.dox
index 84e24d5f2..4a16e2334 100644
--- a/api/docs/bt.dox
+++ b/api/docs/bt.dox
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2021 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2022 Google, Inc.  All rights reserved.
  * Copyright (c) 2007-2009 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -913,9 +913,10 @@ that affect inlining.
 \subsection sec_state State Preservation
 
 To facilitate code transformations, DynamoRIO makes available its register
-spill slots and other state preservation functionality.  It exports API
-routines for saving and restoring registers to and from thread-local spill
-slots:
+spill slots and other state preservation functionality.
+The \ref page_drreg Extension Library is recommended to manage registers.
+DynamoRIO's direct interfaces for saving and restoring registers to
+and from thread-local spill slots may also be used:
 
 \code dr_save_reg(), dr_restore_reg(), and dr_reg_spill_slot_opnd() \endcode
 
@@ -924,6 +925,10 @@ The values stored in these spill slots remain valid until the next application
 
 \code dr_read_saved_reg(), dr_write_saved_reg() \endcode
 
+When using DynamoRIO's interfaces instead of drreg, be sure to look
+for the labels #DR_NOTE_ANNOTATION and #DR_NOTE_REG_BARRIER at which
+all application values should be restored to registers.
+
 For longer term persistence DynamoRIO also provides a generic dedicated
 thread-local storage field for use by clients, making it easy to write
 thread-aware clients.  From C code, use:
diff --git a/core/lib/dr_ir_utils.h b/core/lib/dr_ir_utils.h
index dcefb559f..567bcfb5e 100644
--- a/core/lib/dr_ir_utils.h
+++ b/core/lib/dr_ir_utils.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2010-2021 Google, Inc.  All rights reserved.
+ * Copyright (c) 2010-2022 Google, Inc.  All rights reserved.
  * Copyright (c) 2002-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -108,6 +108,11 @@ DR_API
  * application) instruction. Use dr_insert_write_tls_field() and
  * dr_insert_read_tls_field() for a persistent (but more costly to access)
  * thread-local-storage location.  See also dr_raw_tls_calloc().
+ *
+ * Generally, using the \ref page_drreg Extension Library instead is recommended.
+ * When using custom spills and restores, be sure to look for the labels
+ * #DR_NOTE_ANNOTATION and #DR_NOTE_REG_BARRIER at which all application values
+ * should be restored to registers.
  */
 void
 dr_save_reg(void *drcontext, instrlist_t *ilist, instr_t *where, reg_id_t reg,
-- 
GitLab