x86_64: pop non-sysenter TFs, fixes segmentation
authorBarret Rhoden <brho@cs.berkeley.edu>
Wed, 26 Jun 2013 00:35:32 +0000 (17:35 -0700)
committerBarret Rhoden <brho@cs.berkeley.edu>
Wed, 26 Jun 2013 05:20:24 +0000 (22:20 -0700)
Apparently we do need some data segmentation, at least for the SS, when popping
via iretq into userspace.  I haven't needed it for the kernel yet.  Note the
segment limit/base are ignored (they can't express 64 bits), but things like
the DPL and the W bit seemed to be checked.

kern/arch/x86/entry64.S
kern/arch/x86/pmap64.c
kern/arch/x86/process64.c
kern/arch/x86/ros/mmu64.h

index cfc3278..fb52b47 100644 (file)
@@ -142,12 +142,14 @@ spin:     jmp     spin
        .p2align        2               # force 4 byte alignment
 .globl gdt64
 gdt64:
+       # keep the number of these in sync with SEG_COUNT
        SEG_NULL
        SEG_CODE_64(0)          # kernel code segment
+       SEG_DATA_64(0)          # kernel data segment
        SEG_CODE_64(3)          # user code segment
+       SEG_DATA_64(3)          # user data segment
        SEG_NULL                        # these two nulls are a placeholder for the TSS
        SEG_NULL                        # these two nulls are a placeholder for the TSS
-       SEG_DATA_64                     # generic flat data segment (debugging for now)
 .globl gdt64desc
 gdt64desc:
        .word   (gdt64desc - gdt64 - 1)         # sizeof(gdt64) - 1
index f39949c..56bfd9c 100644 (file)
@@ -544,6 +544,6 @@ void debug_print_pgdir(pte_t *pgdir)
        printk("Printing the entire page table set for %p, DFS\n", pgdir);
        /* Need to be careful we avoid VPT/UVPT, o/w we'll recurse */
        pml_for_each(pgdir, 0, UVPT, print_pte, 0);
-       pml_for_each(pgdir, ULIM, VPT - ULIM, print_pte, 0);
+       pml_for_each(pgdir, KERNBASE, VPT - KERNBASE, print_pte, 0);
        pml_for_each(pgdir, VPT_TOP, MAX_VADDR - VPT_TOP, print_pte, 0);
 }
index 7c96bcb..acbfeb7 100644 (file)
@@ -36,26 +36,31 @@ void proc_pop_ctx(struct user_context *ctx)
        /* In case they are enabled elsewhere.  We can't take an interrupt in these
         * routines, due to how they play with the kernel stack pointer. */
        disable_irq();
-       /*
-        * If the process entered the kernel via sysenter, we need to leave via
+       /* If the process entered the kernel via sysenter, we need to leave via
         * sysexit.  sysenter trapframes have 0 for a CS, which is pushed in
-        * sysenter_handler.
-        */
-       if(tf->tf_cs) {
-               /*
-                * Restores the register values in the Trapframe with the 'iret'
-                * instruction.  This exits the kernel and starts executing some
-                * environment's code.  This function does not return.
-                */
-//             asm volatile ("movl %0,%%esp;           "
-//                           "popal;                   "
-//                           "popl %%gs;               "
-//                           "popl %%fs;               "
-//                           "popl %%es;               "
-//                           "popl %%ds;               "
-//                           "addl $0x8,%%esp;         "
-//                           "iret                     "
-//                           : : "g" (tf) : "memory");
+        * sysenter_handler. */
+       if (tf->tf_cs) {
+               asm volatile ("movq %0, %%rsp;          "
+                             "popq %%rax;              "
+                             "popq %%rbx;              "
+                             "popq %%rcx;              "
+                             "popq %%rdx;              "
+                             "popq %%rbp;              "
+                             "popq %%rsi;              "
+                             "popq %%rdi;              "
+                             "popq %%r8;               "
+                             "popq %%r9;               "
+                             "popq %%r10;              "
+                             "popq %%r11;              "
+                             "popq %%r12;              "
+                             "popq %%r13;              "
+                             "popq %%r14;              "
+                             "popq %%r15;              "
+                             "movw 0x4(%%rsp), %%gs;   "
+                             "movw 0x6(%%rsp), %%fs;   "
+                             "addq $0x10, %%rsp;       "
+                             "iretq                    "
+                             : : "g" (tf) : "memory");
                panic("iret failed");  /* mostly to placate the compiler */
        } else {
                /* Return path of sysexit.  See sysenter_handler's asm for details.
index d61c2b9..360fa87 100644 (file)
@@ -304,17 +304,14 @@ typedef unsigned long pde_t;
 /* Global descriptor numbers */
 #define GD_NULL                        0x00    /* NULL descriptor */
 #define GD_KT                  0x08    /* kernel text */
-#define GD_UT                  0x10    /* user text */
-#define GD_TSS                 0x18    /* Task segment selector */
-#define GD_TSS2                        0x20    /* Placeholder, TSS is 2-descriptors wide */
+#define GD_KD                  0x10    /* kernel data */
+#define GD_UT                  0x18    /* user text */
+#define GD_UD                  0x20    /* user data */
+#define GD_TSS                 0x28    /* Task segment selector */
+#define GD_TSS2                        0x30    /* Placeholder, TSS is 2-descriptors wide */
 /* These two aren't in the GDT yet (might never be) */
-#define GD_LDT                 0x28    /* Local descriptor table */
-#define GD_LDT2                        0x30    /* Placeholder */
-
-/* Kept around to help compile, will remove */
-#define GD_KD     0x10     // kernel data
-#define GD_UD     0x20     // user data
-
+#define GD_LDT                 0x38    /* Local descriptor table */
+#define GD_LDT2                        0x40    /* Placeholder */
 
 #ifdef __ASSEMBLER__
 
@@ -334,11 +331,12 @@ typedef unsigned long pde_t;
 
 /* 64 bit data segment.  These are pretty much completely ignored (except if we
  * use them for fs/gs, or compatibility mode */
-#define SEG_DATA_64                                                         \
-       .word 0, 0;                                                             \
+#define SEG_DATA_64(dpl)                                                    \
+       .word 0xffff, 0;                                                        \
        .byte 0;                                                                \
-       .byte 0x90;                                                             \
-       .word 0;
+       .byte (0x92 | ((dpl) << 5));                                            \
+       .byte 0x8f;                                                             \
+       .byte 0;
 
 /* System segments (TSS/LDT) are twice as long as usual (16 bytes). */
 #define SEG_SYS_64(type, base, lim, dpl)                                       \
@@ -504,7 +502,7 @@ typedef struct Pseudodesc {
 #define STS_IG32       0xE             /* 64-bit Interrupt Gate */
 #define STS_TG32       0xF             /* 64-bit Trap Gate */
 
-#define SEG_COUNT      6               /* Number of GDT segments */
+#define SEG_COUNT      7               /* Number of GDT segments */
 /* TODO: Probably won't use this */
 #define LDT_SIZE       (8192 * sizeof(segdesc_t))