FP save/restore on x86
authorBarret Rhoden <brho@cs.berkeley.edu>
Fri, 30 Apr 2010 23:24:06 +0000 (16:24 -0700)
committerKevin Klues <klueska@cs.berkeley.edu>
Thu, 3 Nov 2011 00:35:46 +0000 (17:35 -0700)
Saves and restores FP regs.  This will also save SSE registers, but
supposedly only if the appropriate bit is set in cr4.  This might not be
logically correct, especially with the _S state, but it will save and
restore them for preemptions, which should be good enough for OSDI.  We
still need to think through the best places to put them and under what
conditions we need to save/restore.

The pthread library still needs to handle it's own save and restore (on
both sparc and x86).

You need to reinstall your kernel headers (or at least x86's
trapframe.h).

kern/arch/i686/ros/trapframe.h
kern/arch/i686/trap.c
kern/arch/i686/trap.h
kern/src/process.c
kern/src/resource.c

index 759ae53..ba574fd 100644 (file)
@@ -40,12 +40,101 @@ typedef struct trapframe {
 } trapframe_t;
 
 /* TODO: consider using a user-space specific trapframe, since they don't need
- * all of this information.  Will do that eventually, but til then: */
+ * all of this information.  Might do that eventually, but til then: */
 #define user_trapframe trapframe
 
-/* FP state and whatever else the kernel won't muck with automatically */
+/* FP state and whatever else the kernel won't muck with automatically.  For
+ * now, it's the Non-64-bit-mode layout of FP and XMM registers, as used by
+ * FXSAVE and FXRSTOR.  Other modes will require a union on a couple entries.
+ * See SDM 2a 3-451. */
+/* Header for the non-64-bit mode FXSAVE map */
+struct fp_header_non_64bit {
+       uint16_t                fcw;
+       uint16_t                fsw;
+       uint8_t                 ftw;
+       uint8_t                 padding0;
+       uint16_t                fop;
+       uint32_t                fpu_ip;
+       uint16_t                cs;
+       uint16_t                padding1;
+       uint32_t                fpu_dp;
+       uint16_t                ds;
+       uint16_t                padding2;
+       uint32_t                mxcsr;
+       uint32_t                mxcsr_mask;
+};
+
+/* Header for the 64-bit mode FXSAVE map with promoted operand size */
+struct fp_header_64bit_promoted {
+       uint16_t                fcw;
+       uint16_t                fsw;
+       uint8_t                 ftw;
+       uint8_t                 padding0;
+       uint16_t                fop;
+       uint64_t                fpu_ip;
+       uint64_t                fpu_dp;
+       uint32_t                mxcsr;
+       uint32_t                mxcsr_mask;
+};
+
+/* Header for the 64-bit mode FXSAVE map with default operand size */
+struct fp_header_64bit_default {
+       uint16_t                fcw;
+       uint16_t                fsw;
+       uint8_t                 ftw;
+       uint8_t                 padding0;
+       uint16_t                fop;
+       uint32_t                fpu_ip;
+       uint16_t                cs;
+       uint16_t                padding1;
+       uint32_t                fpu_dp;
+       uint16_t                ds;
+       uint16_t                padding2;
+       uint32_t                mxcsr;
+       uint32_t                mxcsr_mask;
+};
+
+/* Just for storage space, not for real use    */
+typedef struct {
+       unsigned int stor[4];
+} __uint128_t;
+
 typedef struct ancillary_state {
-       uint32_t silly; // remove this when you actually use this struct
-} ancillary_state_t;
+       union { /* whichever header used depends on the mode */
+               struct fp_header_non_64bit                      fp_head_n64;
+               struct fp_header_64bit_promoted         fp_head_64p;
+               struct fp_header_64bit_default          fp_head_64d;
+       };
+       __uint128_t             st0_mm0;        /* 128 bits: 80 for the st0, 48 rsv */
+       __uint128_t             st1_mm1;
+       __uint128_t             st2_mm2;
+       __uint128_t             st3_mm3;
+       __uint128_t             st4_mm4;
+       __uint128_t             st5_mm5;
+       __uint128_t             st6_mm6;
+       __uint128_t             st7_mm7;
+       __uint128_t             xmm0;
+       __uint128_t             xmm1;
+       __uint128_t             xmm2;
+       __uint128_t             xmm3;
+       __uint128_t             xmm4;
+       __uint128_t             xmm5;
+       __uint128_t             xmm6;
+       __uint128_t             xmm7;
+       __uint128_t             xmm8;           /* xmm8 and below only for 64-bit-mode */
+       __uint128_t             xmm9;
+       __uint128_t             xmm10;
+       __uint128_t             xmm11;
+       __uint128_t             xmm12;
+       __uint128_t             xmm13;
+       __uint128_t             xmm14;
+       __uint128_t             xmm15;
+       __uint128_t             reserv0;
+       __uint128_t             reserv1;
+       __uint128_t             reserv2;
+       __uint128_t             reserv3;
+       __uint128_t             reserv4;
+       __uint128_t             reserv5;
+} __attribute__((aligned(16))) ancillary_state_t;
 
 #endif /* !ROS_INCLUDE_ARCH_TRAPFRAME_H */
index 82097ab..f4f8c35 100644 (file)
@@ -215,14 +215,6 @@ trap_dispatch(trapframe_t *tf)
        return;
 }
 
-void save_fp_state(struct ancillary_state *silly)
-{
-}
-
-void restore_fp_state(struct ancillary_state *silly)
-{
-}
-
 void
 env_push_ancillary_state(env_t* e)
 {
index 42e6fb2..fd00892 100644 (file)
@@ -73,6 +73,17 @@ static inline bool in_kernel(struct trapframe *tf)
        return (tf->tf_cs & ~3) == GD_KT;
 }
 
+/* TODO: (HSS) */
+static inline void save_fp_state(struct ancillary_state *silly)
+{
+       asm volatile("fxsave %0" : : "m"(*silly));
+}
+
+static inline void restore_fp_state(struct ancillary_state *silly)
+{
+       asm volatile("fxrstor %0" : : "m"(*silly));
+}
+
 #endif /* !__ASSEMBLER__ */
 
 #endif /* !ROS_INC_ARCH_TRAP_H */
index 84c7a9c..97fbd66 100644 (file)
@@ -645,8 +645,10 @@ static void __proc_startcore(struct proc *p, trapframe_t *tf)
         * different context.
         * for now, we load this silly state here. (TODO) (HSS)
         * We also need this to be per trapframe, and not per process...
-        */
-       env_pop_ancillary_state(p);
+        * For now / OSDI, only load it when in _S mode.  _M mode was handled in
+        * __startcore.  */
+       if (p->state == PROC_RUNNING_S)
+               env_pop_ancillary_state(p);
        env_pop_tf(tf);
 }
 
@@ -867,7 +869,7 @@ void proc_yield(struct proc *SAFE p, bool being_nice)
        switch (p->state) {
                case (PROC_RUNNING_S):
                        p->env_tf= *current_tf;
-                       env_push_ancillary_state(p);
+                       env_push_ancillary_state(p); // TODO: (HSS)
                        __proc_set_state(p, PROC_RUNNABLE_S);
                        schedule_proc(p);
                        break;
index b169c3b..0cf07eb 100644 (file)
@@ -56,7 +56,7 @@ ssize_t core_request(struct proc *p)
                assert(p->state == PROC_RUNNING_M); // TODO: (ACR) async core req
                // save the context, to be restarted in _S mode
                p->env_tf = *current_tf;
-               env_push_ancillary_state(p);
+               env_push_ancillary_state(p); // TODO: (HSS)
                proc_set_syscall_retval(&p->env_tf, ESUCCESS);
                /* sending death, since it's not our job to save contexts or anything in
                 * this case.  also, if this returns true, we will not return down