Moves backtrace_kframe()
[akaros.git] / kern / arch / x86 / process64.c
1 #include <arch/arch.h>
2 #include <trap.h>
3 #include <process.h>
4 #include <pmap.h>
5 #include <smp.h>
6
7 #include <string.h>
8 #include <assert.h>
9 #include <stdio.h>
10
11 void proc_pop_ctx(struct user_context *ctx)
12 {
13         disable_irq();
14         /* for both HW and SW, note we pass an offset into the TF, beyond the fs and
15          * gs bases */
16         if (ctx->type == ROS_HW_CTX) {
17                 struct hw_trapframe *tf = &ctx->tf.hw_tf;
18                 write_msr(MSR_GS_BASE, (uint64_t)tf->tf_gsbase);
19                 write_msr(MSR_FS_BASE, (uint64_t)tf->tf_fsbase);
20                 asm volatile ("movq %0, %%rsp;          "
21                               "popq %%rax;              "
22                               "popq %%rbx;              "
23                               "popq %%rcx;              "
24                               "popq %%rdx;              "
25                               "popq %%rbp;              "
26                               "popq %%rsi;              "
27                               "popq %%rdi;              "
28                               "popq %%r8;               "
29                               "popq %%r9;               "
30                               "popq %%r10;              "
31                               "popq %%r11;              "
32                               "popq %%r12;              "
33                               "popq %%r13;              "
34                               "popq %%r14;              "
35                               "popq %%r15;              "
36                               "addq $0x10, %%rsp;       "
37                               "iretq                    "
38                               : : "g" (&tf->tf_rax) : "memory");
39                 panic("iretq failed");
40         } else {
41                 struct sw_trapframe *tf = &ctx->tf.sw_tf;
42                 write_msr(MSR_GS_BASE, (uint64_t)tf->tf_gsbase);
43                 write_msr(MSR_FS_BASE, (uint64_t)tf->tf_fsbase);
44                 /* We need to 0 out any registers that aren't part of the sw_tf and that
45                  * we won't use/clobber on the out-path.  While these aren't part of the
46                  * sw_tf, we also don't want to leak any kernel register content. */
47                 asm volatile ("movq %0, %%rsp;          "
48                               "movq $0, %%rax;          "
49                                           "movq $0, %%rdx;          "
50                                           "movq $0, %%rsi;          "
51                                           "movq $0, %%rdi;          "
52                                           "movq $0, %%r8;           "
53                                           "movq $0, %%r9;           "
54                                           "movq $0, %%r10;          "
55                               "popq %%rbx;              "
56                               "popq %%rbp;              "
57                               "popq %%r12;              "
58                               "popq %%r13;              "
59                               "popq %%r14;              "
60                               "popq %%r15;              "
61                                           "movq %1, %%r11;          "
62                               "popq %%rcx;              "
63                               "popq %%rsp;              "
64                               "rex.w sysret             "
65                               : : "g"(&tf->tf_rbx), "i"(FL_IF) : "memory");
66                 panic("sysret failed");
67         }
68         panic("Unknown context type!\n");
69 }
70
71 /* Helper: if *addr isn't a canonical user address, poison it.  Use this when
72  * you need a canonical address (like MSR_FS_BASE) */
73 static void enforce_user_canon(uintptr_t *addr)
74 {
75         if (*addr >> 47 != 0)
76                 *addr = 0x5a5a5a5a;
77 }
78
79 void proc_init_ctx(struct user_context *ctx, uint32_t vcoreid, uintptr_t entryp,
80                    uintptr_t stack_top, uintptr_t tls_desc)
81 {
82         struct sw_trapframe *sw_tf = &ctx->tf.sw_tf;
83         /* zero the entire structure for any type, prevent potential disclosure */
84         memset(ctx, 0, sizeof(struct user_context));
85         ctx->type = ROS_SW_CTX;
86         /* Stack pointers in a fresh stackframe need to be such that adding or
87          * subtracting 8 will result in 16 byte alignment (AMD64 ABI).  The reason
88          * is so that input arguments (on the stack) are 16 byte aligned.  The
89          * extra 8 bytes is the retaddr, pushed on the stack.  Compilers know they
90          * can subtract 8 to get 16 byte alignment for instructions like movaps. */
91         sw_tf->tf_rsp = ROUNDDOWN(stack_top, 16) - 8;
92         sw_tf->tf_rip = entryp;
93         sw_tf->tf_rbp = 0;      /* for potential backtraces */
94         sw_tf->tf_mxcsr = 0x00001f80;   /* x86 default mxcsr */
95         sw_tf->tf_fpucw = 0x037f;               /* x86 default FP CW */
96         /* Coupled closely with user's entry.S.  id is the vcoreid, which entry.S
97          * uses to determine what to do.  vcoreid == 0 is the main core/context. */
98         sw_tf->tf_rbx = vcoreid;
99         sw_tf->tf_fsbase = tls_desc;
100         proc_secure_ctx(ctx);
101 }
102
103 void proc_secure_ctx(struct user_context *ctx)
104 {
105         if (ctx->type == ROS_SW_CTX) {
106                 struct sw_trapframe *tf = &ctx->tf.sw_tf;
107                 enforce_user_canon(&tf->tf_gsbase);
108                 enforce_user_canon(&tf->tf_fsbase);
109                 enforce_user_canon(&tf->tf_rip);
110         } else {
111                 /* If we aren't SW, we're assuming (and forcing) a HW ctx.  If this is
112                  * somehow fucked up, userspace should die rather quickly. */
113                 struct hw_trapframe *tf = &ctx->tf.hw_tf;
114                 ctx->type = ROS_HW_CTX;
115                 enforce_user_canon(&tf->tf_gsbase);
116                 enforce_user_canon(&tf->tf_fsbase);
117                 /* GD_UD is the user data segment selector in the GDT, and
118                  * GD_UT is the user text segment selector (see inc/memlayout.h).
119                  * The low 2 bits of each segment register contains the
120                  * Requestor Privilege Level (RPL); 3 means user mode. */
121                 tf->tf_ss = GD_UD | 3;
122                 tf->tf_cs = GD_UT | 3;
123                 tf->tf_rflags |= FL_IF;
124         }
125 }
126
127 /* Called when we are currently running an address space on our core and want to
128  * abandon it.  We need a known good pgdir before releasing the old one.  We
129  * decref, since current no longer tracks the proc (and current no longer
130  * protects the cr3). */
131 void __abandon_core(void)
132 {
133         struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
134         lcr3(boot_cr3);
135         proc_decref(pcpui->cur_proc);
136         pcpui->cur_proc = 0;
137 }