Finalize arg, env, aux migration (1/3) (CXX) (BB)
[akaros.git] / kern / arch / x86 / process64.c
1 #include <arch/arch.h>
2 #include <trap.h>
3 #include <process.h>
4 #include <pmap.h>
5 #include <smp.h>
6
7 #include <string.h>
8 #include <assert.h>
9 #include <stdio.h>
10
11 void proc_pop_ctx(struct user_context *ctx)
12 {
13         disable_irq();
14         /* for both HW and SW, note we pass an offset into the TF, beyond the fs and
15          * gs bases */
16         if (ctx->type == ROS_HW_CTX) {
17                 struct hw_trapframe *tf = &ctx->tf.hw_tf;
18                 write_msr(MSR_GS_BASE, (uint64_t)tf->tf_gsbase);
19                 write_msr(MSR_FS_BASE, (uint64_t)tf->tf_fsbase);
20                 asm volatile ("movq %0, %%rsp;          "
21                               "popq %%rax;              "
22                               "popq %%rbx;              "
23                               "popq %%rcx;              "
24                               "popq %%rdx;              "
25                               "popq %%rbp;              "
26                               "popq %%rsi;              "
27                               "popq %%rdi;              "
28                               "popq %%r8;               "
29                               "popq %%r9;               "
30                               "popq %%r10;              "
31                               "popq %%r11;              "
32                               "popq %%r12;              "
33                               "popq %%r13;              "
34                               "popq %%r14;              "
35                               "popq %%r15;              "
36                               "addq $0x10, %%rsp;       "
37                               "iretq                    "
38                               : : "g" (&tf->tf_rax) : "memory");
39                 panic("iretq failed");
40         } else {
41                 struct sw_trapframe *tf = &ctx->tf.sw_tf;
42                 write_msr(MSR_GS_BASE, (uint64_t)tf->tf_gsbase);
43                 write_msr(MSR_FS_BASE, (uint64_t)tf->tf_fsbase);
44                 /* We need to 0 out any registers that aren't part of the sw_tf and that
45                  * we won't use/clobber on the out-path.  While these aren't part of the
46                  * sw_tf, we also don't want to leak any kernel register content. */
47                 asm volatile ("movq %0, %%rsp;          "
48                               "movq $0, %%rax;          "
49                                           "movq $0, %%rdx;          "
50                                           "movq $0, %%rsi;          "
51                                           "movq $0, %%rdi;          "
52                                           "movq $0, %%r8;           "
53                                           "movq $0, %%r9;           "
54                                           "movq $0, %%r10;          "
55                               "popq %%rbx;              "
56                               "popq %%rbp;              "
57                               "popq %%r12;              "
58                               "popq %%r13;              "
59                               "popq %%r14;              "
60                               "popq %%r15;              "
61                                           "movq %1, %%r11;          "
62                               "popq %%rcx;              "
63                               "popq %%rsp;              "
64                               "rex.w sysret             "
65                               : : "g"(&tf->tf_rbx), "i"(FL_IF) : "memory");
66                 panic("sysret failed");
67         }
68         panic("Unknown context type!\n");
69 }
70
71 /* Helper: if *addr isn't a canonical user address, poison it.  Use this when
72  * you need a canonical address (like MSR_FS_BASE) */
73 static void enforce_user_canon(uintptr_t *addr)
74 {
75         if (*addr >> 47 != 0)
76                 *addr = 0x5a5a5a5a;
77 }
78
79 void proc_init_ctx(struct user_context *ctx, uint32_t vcoreid, uintptr_t entryp,
80                    uintptr_t stack_top, uintptr_t tls_desc)
81 {
82         struct sw_trapframe *sw_tf = &ctx->tf.sw_tf;
83         /* zero the entire structure for any type, prevent potential disclosure */
84         memset(ctx, 0, sizeof(struct user_context));
85         ctx->type = ROS_SW_CTX;
86         /* Stack pointers in a fresh stack frame need to be 16 byte aligned
87          * (AMD64 ABI). If we call this function from within load_elf(), it
88          * should already be aligned properly, but we round again here for good
89          * measure. We used to subtract an extra 8 bytes here to allow us to
90          * write our _start() function in C instead of assembly. This was
91          * necessary to account for a preamble inserted the compiler which
92          * assumed a return address was pushed on the stack. Now that we properly
93          * pass our arguments on the stack, we will have to rewrite our _start()
94          * function in assembly to handle things properly. */
95         sw_tf->tf_rsp = ROUNDDOWN(stack_top, 16);
96         sw_tf->tf_rip = entryp;
97         sw_tf->tf_rbp = 0;      /* for potential backtraces */
98         sw_tf->tf_mxcsr = 0x00001f80;   /* x86 default mxcsr */
99         sw_tf->tf_fpucw = 0x037f;               /* x86 default FP CW */
100         /* Coupled closely with user's entry.S.  id is the vcoreid, which entry.S
101          * uses to determine what to do.  vcoreid == 0 is the main core/context. */
102         sw_tf->tf_rbx = vcoreid;
103         sw_tf->tf_fsbase = tls_desc;
104         proc_secure_ctx(ctx);
105 }
106
107 void proc_secure_ctx(struct user_context *ctx)
108 {
109         if (ctx->type == ROS_SW_CTX) {
110                 struct sw_trapframe *tf = &ctx->tf.sw_tf;
111                 enforce_user_canon(&tf->tf_gsbase);
112                 enforce_user_canon(&tf->tf_fsbase);
113                 enforce_user_canon(&tf->tf_rip);
114         } else {
115                 /* If we aren't SW, we're assuming (and forcing) a HW ctx.  If this is
116                  * somehow fucked up, userspace should die rather quickly. */
117                 struct hw_trapframe *tf = &ctx->tf.hw_tf;
118                 ctx->type = ROS_HW_CTX;
119                 enforce_user_canon(&tf->tf_gsbase);
120                 enforce_user_canon(&tf->tf_fsbase);
121                 /* GD_UD is the user data segment selector in the GDT, and
122                  * GD_UT is the user text segment selector (see inc/memlayout.h).
123                  * The low 2 bits of each segment register contains the
124                  * Requestor Privilege Level (RPL); 3 means user mode. */
125                 tf->tf_ss = GD_UD | 3;
126                 tf->tf_cs = GD_UT | 3;
127                 tf->tf_rflags |= FL_IF;
128         }
129 }
130
131 /* Called when we are currently running an address space on our core and want to
132  * abandon it.  We need a known good pgdir before releasing the old one.  We
133  * decref, since current no longer tracks the proc (and current no longer
134  * protects the cr3). */
135 void __abandon_core(void)
136 {
137         struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
138         lcr3(boot_cr3);
139         proc_decref(pcpui->cur_proc);
140         pcpui->cur_proc = 0;
141 }