parlib: Improve panic() and assert()
[akaros.git] / kern / arch / x86 / trap.c
index f818cae..6b365d5 100644 (file)
@@ -85,9 +85,7 @@ const char *x86_trapname(int trapno)
 }
 
 /* Set stacktop for the current core to be the stack the kernel will start on
- * when trapping/interrupting from userspace.  Don't use this til after
- * smp_percpu_init().  We can probably get the TSS by reading the task register
- * and then the GDT.  Still, it's a pain. */
+ * when trapping/interrupting from userspace. */
 void set_stack_top(uintptr_t stacktop)
 {
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
@@ -102,11 +100,7 @@ uintptr_t get_stack_top(void)
 {
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
        uintptr_t stacktop;
-       /* so we can check this in interrupt handlers (before smp_boot()) */
-       /* TODO: These are dangerous - it assumes we're on a one-page stack.  If we
-        * change it to KSTKSIZE, then we assume stacks are KSTKSIZE-aligned */
-       if (!pcpui->tss)
-               return ROUNDUP(read_sp(), PGSIZE);
+
        stacktop = x86_get_stacktop_tss(pcpui->tss);
        if (stacktop != ROUNDUP(read_sp(), PGSIZE))
                panic("Bad stacktop: %p esp one is %p\n", stacktop,
@@ -156,15 +150,17 @@ void idt_init(void)
        idt[T_BRKPT].gd_dpl = 3;
        /* Send NMIs to their own stack (IST1 in every core's TSS) */
        idt[T_NMI].gd_ist = 1;
+       /* Send double faults to their own stack (IST2 in every core's TSS) */
+       idt[T_DBLFLT].gd_ist = 2;
+
+       /* The sooner we set this, the sooner we can use set/get_stack_top. */
+       per_cpu_info[0].tss = &ts;
+       per_cpu_info[0].gdt = gdt;
 
        /* Set up our kernel stack when changing rings */
        /* Note: we want 16 byte aligned kernel stack frames (AMD 2:8.9.3) */
-       x86_set_stacktop_tss(&ts, (uintptr_t)bootstacktop);
-       x86_sysenter_init((uintptr_t)bootstacktop);
-
-#ifdef CONFIG_KTHREAD_POISON
-       *kstack_bottom_addr((uintptr_t)bootstacktop) = 0xdeadbeef;
-#endif /* CONFIG_KTHREAD_POISON */
+       x86_sysenter_init();
+       set_stack_top((uintptr_t)bootstacktop);
 
        /* Initialize the TSS field of the gdt.  The size of the TSS desc differs
         * between 64 and 32 bit, hence the pointer acrobatics */
@@ -208,7 +204,7 @@ void idt_init(void)
        register_irq(I_KERNEL_MSG, handle_kmsg_ipi, NULL, MKBUS(BusIPI, 0, 0, 0));
 }
 
-static void handle_fperr(struct hw_trapframe *hw_tf)
+static void print_fperr(struct hw_trapframe *hw_tf)
 {
        uint16_t fpcw, fpsw;
        uint32_t mxcsr;
@@ -240,8 +236,6 @@ static void handle_fperr(struct hw_trapframe *hw_tf)
                printk("\tNumeric Underflow\n");
        if (fpsw & ~fpcw & FP_EXCP_PE)
                printk("\tInexact result (precision)\n");
-       printk("Killing the process.\n");
-       proc_destroy(current);
 }
 
 static bool __handler_user_page_fault(struct hw_trapframe *hw_tf,
@@ -268,6 +262,11 @@ static bool __handler_kernel_page_fault(struct hw_trapframe *hw_tf,
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
        int err;
 
+       /* The only thing an NMI handler that faults can do is a fixup */
+       if (pcpui->nmi_status != NMI_NORMAL_OPN) {
+               assert(in_kernel(hw_tf));
+               return try_handle_exception_fixup(hw_tf);
+       }
        /* In general, if there's no cur_proc, a KPF is a bug. */
        if (!pcpui->cur_proc) {
                /* This only runs from test_uaccess(), where it is expected to fail. */
@@ -535,21 +534,30 @@ void handle_nmi(struct hw_trapframe *hw_tf)
        assert(0);
 }
 
+void handle_double_fault(struct hw_trapframe *hw_tf)
+{
+       print_trapframe(hw_tf);
+       backtrace_hwtf(hw_tf);
+       panic("Double fault!  Check the kernel stack pointer; you likely ran off the end of the stack.");
+}
+
 /* Certain traps want IRQs enabled, such as the syscall.  Others can't handle
  * it, like the page fault handler.  Turn them on on a case-by-case basis. */
 static void trap_dispatch(struct hw_trapframe *hw_tf)
 {
        struct per_cpu_info *pcpui;
-       bool handled = TRUE;
+       bool handled = FALSE;
        unsigned long aux = 0;
        uintptr_t fixup_ip;
 
        // Handle processor exceptions.
        switch(hw_tf->tf_trapno) {
                case T_BRKPT:
-                       enable_irq();
-                       monitor(hw_tf);
-                       disable_irq();
+                       if (!in_kernel(hw_tf))
+                               backtrace_user_ctx(current, current_ctx);
+                       else
+                               monitor(hw_tf);
+                       handled = TRUE;
                        break;
                case T_ILLOP:
                {
@@ -573,7 +581,8 @@ static void trap_dispatch(struct hw_trapframe *hw_tf)
                            *(uint8_t*)(ip + 2) == 0xf9) {
                                x86_fake_rdtscp(hw_tf);
                                pcpui->__lock_checking_enabled++;       /* for print debugging */
-                               return;
+                               handled = TRUE;
+                               break;
                        }
                        enable_irq();
                        monitor(hw_tf);
@@ -584,10 +593,9 @@ static void trap_dispatch(struct hw_trapframe *hw_tf)
                case T_PGFLT:
                        handled = __handle_page_fault(hw_tf, &aux);
                        break;
+               case T_GPFLT:
                case T_FPERR:
                        handled = try_handle_exception_fixup(hw_tf);
-                       if (!handled)
-                               handle_fperr(hw_tf);
                        break;
                case T_SYSCALL:
                        enable_irq();
@@ -599,21 +607,17 @@ static void trap_dispatch(struct hw_trapframe *hw_tf)
                                      (struct syscall*)x86_get_systrap_arg0(hw_tf),
                                                  (unsigned int)x86_get_systrap_arg1(hw_tf));
                        disable_irq();
+                       handled = TRUE;
                        break;
-               default:
-                       if (hw_tf->tf_cs == GD_KT) {
-                               handled = try_handle_exception_fixup(hw_tf);
-                               if (!handled) {
-                                       print_trapframe(hw_tf);
-                                       panic("Damn Damn!  Unhandled trap in the kernel!");
-                               }
-                       } else {
-                               handled = FALSE;
-                       }
        }
 
-       if (!handled)
+       if (!handled) {
+               if (in_kernel(hw_tf)) {
+                       print_trapframe(hw_tf);
+                       panic("Damn Damn!  Unhandled trap in the kernel!");
+               }
                reflect_unhandled_trap(hw_tf->tf_trapno, hw_tf->tf_err, aux);
+       }
 }
 
 /* Helper.  For now, this copies out the TF to pcpui.  Eventually, we should
@@ -880,23 +884,41 @@ static bool handle_vmexit_cpuid(struct vm_trapframe *tf)
 {
        uint32_t eax, ebx, ecx, edx;
 
-       /* 0x4000000 is taken from Linux; it is not documented but it signals the
-        * use of KVM. */
-       if (tf->tf_rax == 0x40000000) {
-               /* Pretend to be KVM: Return the KVM signature by placing the following
-                * constants in RAX, RBX, RCX and RDX. RAX is set to 0, while RBX to
-                * RDX forms the string "KVMKVMKVMKVM\0\0\0". This can be placed in
-                * 0x100 offsets from 0x40000000 to 0x40010000. */
-               eax = 0;
-               ebx = 0x4b4d564b;
-               ecx = 0x564b4d56;
-               edx = 0x0000004d;
-       } else {
-               cpuid(tf->tf_rax, tf->tf_rcx, &eax, &ebx, &ecx, &edx);
-               if (tf->tf_rax == 1) {
+       if (tf->tf_rax == 0x0B)
+               return FALSE;   // Handle in userspace.
+
+       cpuid(tf->tf_rax, tf->tf_rcx, &eax, &ebx, &ecx, &edx);
+       switch (tf->tf_rax) {
+               case 0x01:
                        /* Set the hypervisor bit to let the guest know it is virtualized */
                        ecx |= 1 << 31;
-               }
+                       /* Unset the vmx capability bit so that the guest does not try
+                        * to turn it on. */
+                       ecx &= ~(1 << 5);
+                       /* Unset the perf capability bit so that the guest does not try
+                        * to turn it on. */
+                       ecx &= ~(1 << 15);
+
+                       /* Set the guest pcore id into the apic ID field in CPUID. */
+                       ebx &= 0x0000ffff;
+                       ebx |= (current->vmm.nr_guest_pcores & 0xff) << 16;
+                       ebx |= (tf->tf_guest_pcoreid & 0xff) << 24;
+                       break;
+               case 0x0A:
+                       eax = 0;
+                       ebx = 0;
+                       ecx = 0;
+                       edx = 0;
+                       break;
+               /* Signal the use of KVM. */
+               case 0x40000000:
+                       eax = 0;
+                       ebx = 0x4b4d564b;
+                       ecx = 0x564b4d56;
+                       edx = 0x0000004d;
+                       break;
+               default:
+                       break;
        }
        tf->tf_rax = eax;
        tf->tf_rbx = ebx;
@@ -945,7 +967,7 @@ bool handle_vmexit_msr(struct vm_trapframe *tf)
 {
        bool ret;
 
-       ret = vmm_emulate_msr(&tf->tf_rcx, &tf->tf_rdx, &tf->tf_rax,
+       ret = vmm_emulate_msr(tf,
                              (tf->tf_exit_reason == EXIT_REASON_MSR_READ
                                                   ? VMM_MSR_EMU_READ : VMM_MSR_EMU_WRITE));
        if (ret)
@@ -1039,7 +1061,8 @@ static void vmexit_dispatch(struct vm_trapframe *tf)
         * do it for external IRQs - the irq_dispatch code will handle it. */
        switch (tf->tf_exit_reason) {
        case EXIT_REASON_VMCALL:
-               if (current->vmm.flags & VMM_VMCALL_PRINTF) {
+               if (current->vmm.flags & VMM_VMCALL_PRINTF &&
+                   tf->tf_rax == VMCALL_PRINTC) {
                        printk("%c", tf->tf_rdi);
                        tf->tf_rip += 3;
                        handled = TRUE;
@@ -1108,10 +1131,59 @@ void handle_vmexit(struct vm_trapframe *tf)
        proc_restartcore();
 }
 
-void x86_finalize_vmtf(struct vm_trapframe *tf)
+/* Partial contexts for HW and SW TFs have the user's gs in MSR_KERNEL_GS_BASE.
+ * The kernel's gs is loaded into gs.  We need to put the kernel's gs into
+ * KERNEL_GS_BASE so the core is ready to run another full context, save the
+ * user's {GS,FS}_BASE into their TF so it can run on another core, and keep GS
+ * loaded with the current GS (the kernel's). */
+static void x86_finalize_hwtf(struct hw_trapframe *tf)
+{
+       tf->tf_gsbase = read_kern_gsbase();
+       write_kern_gsbase(read_gsbase());
+       tf->tf_fsbase = read_fsbase();
+       x86_hwtf_clear_partial(tf);
+}
+
+static void x86_finalize_swtf(struct sw_trapframe *tf)
+{
+       tf->tf_gsbase = read_kern_gsbase();
+       write_kern_gsbase(read_gsbase());
+       tf->tf_fsbase = read_fsbase();
+       x86_swtf_clear_partial(tf);
+}
+
+static void x86_finalize_vmtf(struct vm_trapframe *tf)
 {
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
 
        x86_vmtf_clear_partial(tf);
-       unload_guest_pcore(pcpui->cur_proc, pcpui->guest_pcoreid);
+       unload_guest_pcore(pcpui->owning_proc, pcpui->guest_pcoreid);
+}
+
+/* Makes sure that the user context is fully saved into ctx and not split across
+ * the struct and HW, meaning it is not a "partial context".
+ *
+ * Be careful to zero out any part of the ctx struct not in use, to avoid
+ * leaking information from other processes. */
+void arch_finalize_ctx(struct user_context *ctx)
+{
+       if (!arch_ctx_is_partial(ctx))
+               return;
+       switch (ctx->type) {
+       case ROS_HW_CTX:
+               x86_finalize_hwtf(&ctx->tf.hw_tf);
+               memset((uint8_t*)&ctx->tf + sizeof(struct hw_trapframe), 0,
+                          sizeof(ctx->tf) - sizeof(struct hw_trapframe));
+               break;
+       case ROS_SW_CTX:
+               x86_finalize_swtf(&ctx->tf.sw_tf);
+               memset((uint8_t*)&ctx->tf + sizeof(struct sw_trapframe), 0,
+                          sizeof(ctx->tf) - sizeof(struct sw_trapframe));
+               break;
+       case ROS_VM_CTX:
+               x86_finalize_vmtf(&ctx->tf.vm_tf);
+               memset((uint8_t*)&ctx->tf + sizeof(struct vm_trapframe), 0,
+                          sizeof(ctx->tf) - sizeof(struct vm_trapframe));
+               break;
+       }
 }