x86: Don't backtrace from trampoline asm
authorBarret Rhoden <brho@cs.berkeley.edu>
Thu, 28 Jul 2016 22:11:25 +0000 (18:11 -0400)
committerBarret Rhoden <brho@cs.berkeley.edu>
Fri, 29 Jul 2016 21:43:08 +0000 (17:43 -0400)
Imagine this.  Userspace happens to load %rbp with a value that the kernel
cannot dereference.  Then it enters the kernel.  In the brief period before
the kernel sets rbp = 0 and starts its day, a profiling NMI arrives.  The
backtrace would flip out.

We could use something like a safe copy_from sort of call (copy_from_user()
won't work as is, incidentally, since it has built-in checks for user
addresses).  However, even if we set the address correctly, the user just
gained the ability to read kernel memory.  Set rbp, then do a bunch of null
syscalls in a loop with perf running.  Eventually you'll hit the bug.
Yeah, that's really rare.  Luckily, this did not come up - I just noticed
it in passing.

The nice thing about this approach is that the kernel should not be
backtracing beyond its entry point, regardless of the value of rbp.

Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
kern/arch/x86/kdebug.c
kern/arch/x86/process64.c
kern/arch/x86/trapentry64.S

index 139353f..2e2693b 100644 (file)
@@ -327,6 +327,28 @@ void gen_backtrace(void (*pfunc)(void *, const char *), void *opaque)
        print_backtrace_list(pcs, nr_pcs, pfunc, opaque);
 }
 
+static bool pc_is_asm_trampoline(uintptr_t pc)
+{
+       extern char __asm_entry_points_start[], __asm_entry_points_end[];
+       extern char __asm_pop_hwtf_start[], __asm_pop_hwtf_end[];
+       extern char __asm_pop_swtf_start[], __asm_pop_swtf_end[];
+       extern char __asm_pop_vmtf_start[], __asm_pop_vmtf_end[];
+
+       if (((uintptr_t)__asm_entry_points_start <= pc) &&
+           (pc < (uintptr_t)__asm_entry_points_end))
+               return TRUE;
+       if (((uintptr_t)__asm_pop_hwtf_start <= pc) &&
+           (pc < (uintptr_t)__asm_pop_hwtf_end))
+               return TRUE;
+       if (((uintptr_t)__asm_pop_swtf_start <= pc) &&
+           (pc < (uintptr_t)__asm_pop_swtf_end))
+               return TRUE;
+       if (((uintptr_t)__asm_pop_vmtf_start <= pc) &&
+           (pc < (uintptr_t)__asm_pop_vmtf_end))
+               return TRUE;
+       return FALSE;
+}
+
 size_t backtrace_list(uintptr_t pc, uintptr_t fp, uintptr_t *pcs,
                       size_t nr_slots)
 {
@@ -334,8 +356,11 @@ size_t backtrace_list(uintptr_t pc, uintptr_t fp, uintptr_t *pcs,
 
        while (nr_pcs < nr_slots) {
                pcs[nr_pcs++] = pc;
-               if (!fp || fp < KERNBASE)
+               if (pc_is_asm_trampoline(pc))
+                       break;
+               if (!fp)
                        break;
+               assert(KERNBASE <= fp);
                printd("PC %p FP %p\n", pc, fp);
                /* We used to set PC = retaddr - 1, where the -1 would put our PC back
                 * inside the function that called us.  This was for obscure cases where
index dd31ddb..d122748 100644 (file)
@@ -19,7 +19,9 @@ static void __attribute__((noreturn)) proc_pop_hwtf(struct hw_trapframe *tf)
                write_gsbase(tf->tf_gsbase);
                write_fsbase(tf->tf_fsbase);
        }
-       asm volatile ("movq %0, %%rsp;          "
+       asm volatile (".globl __asm_pop_hwtf_start;"
+                     "__asm_pop_hwtf_start:    "
+                     "movq %0, %%rsp;          "
                      "popq %%rax;              "
                      "popq %%rbx;              "
                      "popq %%rcx;              "
@@ -36,7 +38,9 @@ static void __attribute__((noreturn)) proc_pop_hwtf(struct hw_trapframe *tf)
                      "popq %%r14;              "
                      "popq %%r15;              "
                      "addq $0x10, %%rsp;       "
-                     "iretq                    "
+                     "iretq;                   "
+                     ".globl __asm_pop_hwtf_end;"
+                     "__asm_pop_hwtf_end:      "
                      : : "g" (&tf->tf_rax) : "memory");
        panic("iretq failed");
 }
@@ -52,7 +56,9 @@ static void __attribute__((noreturn)) proc_pop_swtf(struct sw_trapframe *tf)
        /* We need to 0 out any registers that aren't part of the sw_tf and that we
         * won't use/clobber on the out-path.  While these aren't part of the sw_tf,
         * we also don't want to leak any kernel register content. */
-       asm volatile ("movq %0, %%rsp;          "
+       asm volatile (".globl __asm_pop_swtf_start;"
+                     "__asm_pop_swtf_start:    "
+                     "movq %0, %%rsp;          "
                      "movq $0, %%rax;          "
                      "movq $0, %%rdx;          "
                      "movq $0, %%rsi;          "
@@ -69,7 +75,9 @@ static void __attribute__((noreturn)) proc_pop_swtf(struct sw_trapframe *tf)
                      "movq %1, %%r11;          "
                      "popq %%rcx;              "
                      "popq %%rsp;              "
-                     "rex.w sysret             "
+                     "rex.w sysret;            "
+                     ".globl __asm_pop_swtf_end;"
+                     "__asm_pop_swtf_end:      "
                      : : "g"(&tf->tf_rbx), "i"(FL_IF) : "memory");
        panic("sysret failed");
 }
@@ -126,7 +134,9 @@ static void __attribute__((noreturn)) proc_pop_vmtf(struct vm_trapframe *tf)
         * Likewise, we don't want to lose rbp via the clobber list.
         *
         * Partial contexts have already been launched, so we resume them. */
-       asm volatile ("testl $"STRINGIFY(VMCTX_FL_PARTIAL)", %c[flags](%0);"
+       asm volatile (".globl __asm_pop_vmtf_start;"
+                     "__asm_pop_vmtf_start:     "
+                     "testl $"STRINGIFY(VMCTX_FL_PARTIAL)", %c[flags](%0);"
                      "pushq %%rbp;              "      /* save in case we fail */
                      "movq %c[rbx](%0), %%rbx;  "
                      "movq %c[rcx](%0), %%rcx;  "
@@ -148,6 +158,8 @@ static void __attribute__((noreturn)) proc_pop_vmtf(struct vm_trapframe *tf)
                      "jmp 2f;                   "
                      "1: "ASM_VMX_VMRESUME";    "      /* partials get resumed */
                      "2: popq %%rbp;            "      /* vmlaunch failed */
+                     ".globl __asm_pop_vmtf_end;"
+                     "__asm_pop_vmtf_end:       "
                      :
                      : "a" (tf),
                        [rax]"i"(offsetof(struct vm_trapframe, tf_rax)),
index 0128331..7c4175d 100644 (file)
        .quad name;                                                     \
        .long num
 
+# Set data for the trap_tbl symbol.  Set text for the entry_points.  The various
+# trap/irq handler macros will toggle data on and off as needed.
 .data
 .globl trap_tbl
 trap_tbl:
+.text
+.globl __asm_entry_points_start
+__asm_entry_points_start:
 
 /* Generate entry points for the different traps.  Note that all of these bounce
  * off the corresponding trap.c function, such as handle_irqs, and that the name
@@ -895,3 +900,6 @@ vmexit_handler:
        call handle_vmexit
 vmexit_spin:
        jmp vmexit_spin
+
+.globl __asm_entry_points_end
+__asm_entry_points_end: