x86: vmm: Track state for handling vmexits as KERNEL
[akaros.git] / kern / arch / x86 / trap.c
index 2a6134c..9f168fa 100644 (file)
@@ -162,10 +162,6 @@ void idt_init(void)
        x86_sysenter_init();
        set_stack_top((uintptr_t)bootstacktop);
 
-#ifdef CONFIG_KTHREAD_POISON
-       *kstack_bottom_addr((uintptr_t)bootstacktop) = 0xdeadbeef;
-#endif /* CONFIG_KTHREAD_POISON */
-
        /* Initialize the TSS field of the gdt.  The size of the TSS desc differs
         * between 64 and 32 bit, hence the pointer acrobatics */
        syssegdesc_t *ts_slot = (syssegdesc_t*)&gdt[GD_TSS >> 3];
@@ -557,9 +553,10 @@ static void trap_dispatch(struct hw_trapframe *hw_tf)
        // Handle processor exceptions.
        switch(hw_tf->tf_trapno) {
                case T_BRKPT:
-                       enable_irq();
-                       monitor(hw_tf);
-                       disable_irq();
+                       if (!in_kernel(hw_tf))
+                               backtrace_user_ctx(current, current_ctx);
+                       else
+                               monitor(hw_tf);
                        handled = TRUE;
                        break;
                case T_ILLOP:
@@ -583,13 +580,8 @@ static void trap_dispatch(struct hw_trapframe *hw_tf)
                            *(uint8_t*)(ip + 1) == 0x01,
                            *(uint8_t*)(ip + 2) == 0xf9) {
                                x86_fake_rdtscp(hw_tf);
-                               pcpui->__lock_checking_enabled++;       /* for print debugging */
                                handled = TRUE;
-                               break;
                        }
-                       enable_irq();
-                       monitor(hw_tf);
-                       disable_irq();
                        pcpui->__lock_checking_enabled++;               /* for print debugging */
                        break;
                }
@@ -887,17 +879,28 @@ static bool handle_vmexit_cpuid(struct vm_trapframe *tf)
 {
        uint32_t eax, ebx, ecx, edx;
 
+       if (tf->tf_rax == 0x0B)
+               return FALSE;   // Handle in userspace.
+
        cpuid(tf->tf_rax, tf->tf_rcx, &eax, &ebx, &ecx, &edx);
        switch (tf->tf_rax) {
                case 0x01:
                        /* Set the hypervisor bit to let the guest know it is virtualized */
                        ecx |= 1 << 31;
+                       /* Unset the monitor capability bit so that the guest does not try
+                        * to use monitor/mwait. */
+                       ecx &= ~(1 << 3);
                        /* Unset the vmx capability bit so that the guest does not try
                         * to turn it on. */
                        ecx &= ~(1 << 5);
                        /* Unset the perf capability bit so that the guest does not try
                         * to turn it on. */
                        ecx &= ~(1 << 15);
+
+                       /* Set the guest pcore id into the apic ID field in CPUID. */
+                       ebx &= 0x0000ffff;
+                       ebx |= (current->vmm.nr_guest_pcores & 0xff) << 16;
+                       ebx |= (tf->tf_guest_pcoreid & 0xff) << 24;
                        break;
                case 0x0A:
                        eax = 0;
@@ -912,6 +915,12 @@ static bool handle_vmexit_cpuid(struct vm_trapframe *tf)
                        ecx = 0x564b4d56;
                        edx = 0x0000004d;
                        break;
+               /* Hypervisor Features. */
+               case 0x40000003:
+                       /* Unset the monitor capability bit so that the guest does not try
+                        * to use monitor/mwait. */
+                       edx &= ~(1 << 0);
+                       break;
                default:
                        break;
        }
@@ -932,11 +941,14 @@ static bool handle_vmexit_ept_fault(struct vm_trapframe *tf)
        prot |= tf->tf_exit_qual & VMX_EPT_FAULT_WRITE ? PROT_WRITE : 0;
        prot |= tf->tf_exit_qual & VMX_EPT_FAULT_INS ? PROT_EXEC : 0;
        ret = handle_page_fault(current, tf->tf_guest_pa, prot);
-       if (ret) {
-               /* TODO: maybe put ret in the TF somewhere */
-               return FALSE;
-       }
-       return TRUE;
+       if (ret == 0)
+               return TRUE;
+
+       //Mirror behavior in uthreads, tell userspace to try again.
+       if (ret == -EAGAIN)
+               tf->tf_flags |= VMCTX_FL_EPT_VMR_BACKED;
+
+       return FALSE;
 }
 
 /* Regarding NMI blocking,
@@ -962,7 +974,7 @@ bool handle_vmexit_msr(struct vm_trapframe *tf)
 {
        bool ret;
 
-       ret = vmm_emulate_msr(&tf->tf_rcx, &tf->tf_rdx, &tf->tf_rax,
+       ret = vmm_emulate_msr(tf,
                              (tf->tf_exit_reason == EXIT_REASON_MSR_READ
                                                   ? VMM_MSR_EMU_READ : VMM_MSR_EMU_WRITE));
        if (ret)
@@ -1056,7 +1068,8 @@ static void vmexit_dispatch(struct vm_trapframe *tf)
         * do it for external IRQs - the irq_dispatch code will handle it. */
        switch (tf->tf_exit_reason) {
        case EXIT_REASON_VMCALL:
-               if (current->vmm.flags & VMM_VMCALL_PRINTF) {
+               if (current->vmm.flags & VMM_VMCALL_PRINTF &&
+                   tf->tf_rax == VMCALL_PRINTC) {
                        printk("%c", tf->tf_rdi);
                        tf->tf_rip += 3;
                        handled = TRUE;
@@ -1117,6 +1130,7 @@ void handle_vmexit(struct vm_trapframe *tf)
        tf->tf_guest_pa = vmcs_read(GUEST_PHYSICAL_ADDRESS);
 
        set_current_ctx_vm(pcpui, tf);
+       __set_cpu_state(pcpui, CPU_STATE_KERNEL);
        tf = &pcpui->cur_ctx->tf.vm_tf;
        vmexit_dispatch(tf);
        /* We're either restarting a partial VM ctx (vmcs was launched, loaded on
@@ -1132,16 +1146,16 @@ void handle_vmexit(struct vm_trapframe *tf)
  * loaded with the current GS (the kernel's). */
 static void x86_finalize_hwtf(struct hw_trapframe *tf)
 {
-       tf->tf_gsbase = read_msr(MSR_KERNEL_GS_BASE);
-       write_msr(MSR_KERNEL_GS_BASE, read_gsbase());
+       tf->tf_gsbase = read_kern_gsbase();
+       write_kern_gsbase(read_gsbase());
        tf->tf_fsbase = read_fsbase();
        x86_hwtf_clear_partial(tf);
 }
 
 static void x86_finalize_swtf(struct sw_trapframe *tf)
 {
-       tf->tf_gsbase = read_msr(MSR_KERNEL_GS_BASE);
-       write_msr(MSR_KERNEL_GS_BASE, read_gsbase());
+       tf->tf_gsbase = read_kern_gsbase();
+       write_kern_gsbase(read_gsbase());
        tf->tf_fsbase = read_fsbase();
        x86_swtf_clear_partial(tf);
 }
@@ -1151,7 +1165,7 @@ static void x86_finalize_vmtf(struct vm_trapframe *tf)
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
 
        x86_vmtf_clear_partial(tf);
-       unload_guest_pcore(pcpui->cur_proc, pcpui->guest_pcoreid);
+       unload_guest_pcore(pcpui->owning_proc, pcpui->guest_pcoreid);
 }
 
 /* Makes sure that the user context is fully saved into ctx and not split across