x86_64: save/pop kernel contexts
[akaros.git] / kern / arch / x86 / process64.c
1 #include <arch/arch.h>
2 #include <trap.h>
3 #include <process.h>
4 #include <pmap.h>
5 #include <smp.h>
6
7 #include <string.h>
8 #include <assert.h>
9 #include <stdio.h>
10
11 void proc_pop_ctx(struct user_context *ctx)
12 {
13         disable_irq();
14         /* for both HW and SW, note we pass an offset into the TF, beyond the fs and
15          * gs bases */
16         if (ctx->type == ROS_HW_CTX) {
17                 struct hw_trapframe *tf = &ctx->tf.hw_tf;
18                 write_msr(MSR_GS_BASE, (uint64_t)tf->tf_gsbase);
19                 write_msr(MSR_FS_BASE, (uint64_t)tf->tf_fsbase);
20                 asm volatile ("movq %0, %%rsp;          "
21                               "popq %%rax;              "
22                               "popq %%rbx;              "
23                               "popq %%rcx;              "
24                               "popq %%rdx;              "
25                               "popq %%rbp;              "
26                               "popq %%rsi;              "
27                               "popq %%rdi;              "
28                               "popq %%r8;               "
29                               "popq %%r9;               "
30                               "popq %%r10;              "
31                               "popq %%r11;              "
32                               "popq %%r12;              "
33                               "popq %%r13;              "
34                               "popq %%r14;              "
35                               "popq %%r15;              "
36                               "addq $0x10, %%rsp;       "
37                               "iretq                    "
38                               : : "g" (&tf->tf_rax) : "memory");
39                 panic("iretq failed");
40         } else {
41                 struct sw_trapframe *tf = &ctx->tf.sw_tf;
42                 write_msr(MSR_GS_BASE, (uint64_t)tf->tf_gsbase);
43                 write_msr(MSR_FS_BASE, (uint64_t)tf->tf_fsbase);
44                 /* We need to 0 out any registers that aren't part of the sw_tf and that
45                  * we won't use/clobber on the out-path.  While these aren't part of the
46                  * sw_tf, we also don't want to leak any kernel register content. */
47                 asm volatile ("movq %0, %%rsp;          "
48                               "movq $0, %%rax;          "
49                                           "movq $0, %%rdx;          "
50                                           "movq $0, %%rsi;          "
51                                           "movq $0, %%rdi;          "
52                                           "movq $0, %%r8;           "
53                                           "movq $0, %%r9;           "
54                                           "movq $0, %%r10;          "
55                               "popq %%rbx;              "
56                               "popq %%rbp;              "
57                               "popq %%r12;              "
58                               "popq %%r13;              "
59                               "popq %%r14;              "
60                               "popq %%r15;              "
61                                           "movq %1, %%r11;          "
62                               "popq %%rcx;              "
63                               "popq %%rsp;              "
64                               "rex.w sysret             "
65                               : : "g"(&tf->tf_rbx), "i"(FL_IF) : "memory");
66                 panic("sysret failed");
67         }
68 }
69
70 /* TODO: consider using a SW context */
71 void proc_init_ctx(struct user_context *ctx, uint32_t vcoreid, uintptr_t entryp,
72                    uintptr_t stack_top)
73 {
74         struct hw_trapframe *tf = &ctx->tf.hw_tf;
75         ctx->type = ROS_HW_CTX;
76         memset(tf, 0, sizeof(*tf));
77         /* Set up appropriate initial values for the segment registers.
78          * GD_UD is the user data segment selector in the GDT, and
79          * GD_UT is the user text segment selector (see inc/memlayout.h).
80          * The low 2 bits of each segment register contains the
81          * Requestor Privilege Level (RPL); 3 means user mode. */
82         tf->tf_ss = GD_UD | 3;
83         tf->tf_rsp = stack_top-64;
84         tf->tf_cs = GD_UT | 3;
85         /* set the env's EFLAGSs to have interrupts enabled */
86         tf->tf_rflags |= 0x00000200; // bit 9 is the interrupts-enabled
87         tf->tf_rip = entryp;
88         /* Coupled closely with user's entry.S.  id is the vcoreid, which entry.S
89          * uses to determine what to do.  vcoreid == 0 is the main core/context. */
90         tf->tf_rax = vcoreid;
91 }
92
93 /* TODO: handle both HW and SW contexts.  Though I think this is only ever
94  * called on HW ctxs (we never try to pop a SW ctx that userspace had access
95  * to). */
96 void proc_secure_ctx(struct user_context *ctx)
97 {
98         struct hw_trapframe *tf = &ctx->tf.hw_tf;
99         ctx->type = ROS_HW_CTX;
100         tf->tf_ss = GD_UD | 3;
101         tf->tf_cs = GD_UT | 3;
102         tf->tf_rflags |= 0x00000200; // bit 9 is the interrupts-enabled
103 }
104
105 /* Called when we are currently running an address space on our core and want to
106  * abandon it.  We need a known good pgdir before releasing the old one.  We
107  * decref, since current no longer tracks the proc (and current no longer
108  * protects the cr3). */
109 void __abandon_core(void)
110 {
111         struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
112         lcr3(boot_cr3);
113         proc_decref(pcpui->cur_proc);
114         pcpui->cur_proc = 0;
115 }