akaros/kern/arch/x86/ros/mmu64.h
<<
>>
Prefs
   1#pragma once
   2
   3#ifndef ROS_INC_ARCH_MMU_H
   4#error "Do not include include ros/arch/mmu64.h directly"
   5#endif
   6
   7#ifndef __ASSEMBLER__
   8#include <ros/common.h>
   9typedef unsigned long kpte_t;
  10typedef unsigned long epte_t;
  11
  12typedef kpte_t* pte_t;
  13
  14typedef struct x86_pgdir {
  15        kpte_t  *kpte;
  16        uint64_t eptp;
  17} pgdir_t;
  18#endif
  19
  20/* Virtual memory map:                                  Virt Addresses
  21 *                                                      perms: kernel/user
  22 *
  23 *                     +------------------------------+ 0xffffffffffffffff -+
  24 *                     |                              |                     |
  25 *                     |   Mapped to lowmem, unused   | RW/--               |
  26 *                     |                              |                     |
  27 *  "end" symbol  -->  +------------------------------+        PML3_PTE_REACH
  28 *                     |                              |                     |
  29 *                     |  Kernel link/load location   |                     |
  30 *                     |    (mapped to 0, physical)   |                     |
  31 *                     |                              |                     |
  32 * KERN_LOAD_ADDR -->  +------------------------------+ 0xffffffffc0000000 -+
  33 *                     |                              |
  34 *                     |            IOAPIC            | RW/--  APIC_SIZE (1MB)
  35 *                     |                              |
  36 *   IOAPIC_BASE  -->  +------------------------------+ 0xffffffffbff00000
  37 *                     :                              :
  38 *                     |          Unmapped            | --/--
  39 *                     |                              |
  40 *                     |  Kernel static linking limit |
  41 *   KERN_DYN_TOP -->  +------------------------------+ 0xffffffff80000000
  42 *                     |   Kernel Dynamic Mappings    |
  43 *                     |              .               |
  44 *                     :              .               :
  45 *                     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RW/--
  46 *                     |              .               |
  47 *                     |              .               |
  48 *                     |              .               |
  49 *   KERN_DYN_BOT -->  +------------------------------+ 0xfffffff000000000
  50 *                     |                              |
  51 *                     |                              |
  52 *                     |                              |
  53 *  VPT_TOP    ----->  +------------------------------+ 0xffffff0000000000 -+
  54 *                     |                              |                     |
  55 *                     |                              |                     |
  56 *                     |  Cur. Page Table (Kern. RW)  | RW/--  P4ML_PTE_REACH
  57 *                     |                              |                     |
  58 *                     |                              |                     |
  59 *    VPT,     ----->  +------------------------------+ 0xfffffe8000000000 -+
  60 *  KERN_VMAP_TOP      |                              |
  61 *                     :              .               :
  62 *                     :              .               :
  63 *                     :              .               :
  64 *                     |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~| RW/--
  65 *                     |                              | RW/--
  66 *                     |                              | RW/--
  67 *                     |                              | RW/--
  68 *                     |   Remapped Physical Memory   | RW/--
  69 *                     |                              | RW/--
  70 *                     |                              | RW/--
  71 *                     |                              | RW/--
  72 *    KERNBASE  ---->  +------------------------------+ 0xffff800000000000
  73 *                     |                              |
  74 *                     |                              |
  75 *                     |                              |
  76 *                     |   Non-canonical addresses    |
  77 *                     |         (unusable)           |
  78 *                     |                              |
  79 *                     |                              |
  80 * ULIM (not canon) -> +------------------------------+ 0x0000800000000000 -+
  81 *                     +     Highest user address     + 0x00007fffffffffff  |
  82 *                     |                              |                     |
  83 *                     |  Cur. Page Table (User R-)   | R-/R-  PML4_PTE_REACH
  84 *                     |                              |                     |
  85 *    UVPT      ---->  +------------------------------+ 0x00007f8000000000 -+
  86 *                     | Unmapped (expandable region) |                     |
  87 *                     |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|                     |
  88 *                     |       Global R/O Info        | R-/R-  PML2_PTE_REACH
  89 *                     |       (proc_glb_info)        |                     |
  90 *    UGINFO    ---->  +------------------------------+ 0x00007f7fffe00000 -+
  91 *                     | Unmapped (expandable region) |                     |
  92 *                     |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|                     |
  93 *                     |     Per-Process R/O Info     | R-/R-  PML2_PTE_REACH
  94 *                     |         (procinfo)           |                     |
  95 * UWLIM, UINFO ---->  +------------------------------+ 0x00007f7fffc00000 -+
  96 *                     | Unmapped (expandable region) |                     |
  97 *                     |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|                     |
  98 *                     |     Per-Process R/W Data     | RW/RW  PML2_PTE_REACH
  99 *                     |         (procdata)           |                     |
 100 *    UDATA     ---->  +------------------------------+ 0x00007f7fffa00000 -+
 101 *                     |                              |
 102 *                     |    Global Shared R/W Data    | RW/RW  PGSIZE
 103 *                     |                              |
 104 * UMAPTOP, UGDATA ->  +------------------------------+ 0x00007f7fff9ff000
 105 *    USTACKTOP        |                              |
 106 *                     |      Normal User Stack       | RW/RW 256 * PGSIZE
 107 *                     |                              |
 108 *                     +------------------------------+ 0x00007f7fff8ff000
 109 *                     |                              |
 110 *                     |        Empty Memory          |
 111 *                     |                              |
 112 *                     .                              .
 113 *                     .                              .
 114 *    BRK_END   ---->  +------------------------------+ 0x0000300000000000
 115 *                     .                              .
 116 *                     .                              .
 117 *                     |                              |
 118 *                     |         SBRK Heap            |
 119 *                     |                              |
 120 *    BRK_START ---->  +------------------------------+ 0x0000100000000000
 121 *                     .                              .
 122 *                     .                              .
 123 *                     |                              |
 124 *                     |        Empty Memory          |
 125 *                     |                              |
 126 *                     |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|
 127 *                     |                              |
 128 *                     |     Program, BSS, & Data     |
 129 *                     |                              |
 130 *                     +------------------------------+ 0x0000000000400000
 131 *                     .                              .
 132 *                     .                              .
 133 *                     |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|
 134 *                     |                              |
 135 *                     |       ld.so (Dynamic)        |
 136 *                     |                              |
 137 * MMAP_LD_FIXED_VA    +------------------------------+ 0x0000000000100000
 138 *                     |                              |
 139 *                     |                              |
 140 *                     |                              |
 141 * MMAP_LOWEST_VA      +------------------------------+ 0x0000000000001000
 142 *                     |                              |
 143 *                     |       Empty Memory (*)       |
 144 *                     |                              |
 145 *                     +------------------------------+ 0x0000000000000000
 146 */
 147
 148/* Physical Mapping symbols:
 149 * At IOPHYSMEM (640K) there is a 384K hole for I/O.  From the kernel,
 150 * IOPHYSMEM can be addressed at KERNBASE + IOPHYSMEM.  The hole ends
 151 * at physical address EXTPHYSMEM. */
 152#define IOPHYSMEM               0x0A0000
 153#define VGAPHYSMEM              0x0A0000
 154#define DEVPHYSMEM              0x0C0000
 155#define BIOSPHYSMEM             0x0F0000
 156#define EXTPHYSMEM              0x100000
 157
 158/* Kernel Virtual Memory Mapping */
 159
 160/* The kernel needs to be loaded in the top 2 GB of memory, since we compile it
 161 * with -mcmodel=kernel (helps with relocations).  We're actually loading it in
 162 * the top 1 GB. */
 163#define KERN_LOAD_ADDR          0xffffffffc0000000
 164/* Static kernel mappings */
 165#define APIC_SIZE               0x100000
 166#define IOAPIC_BASE             (KERN_LOAD_ADDR - APIC_SIZE)
 167/* This is the range of the dynamic virtual mappings. */
 168#define KERN_DYN_TOP            0xffffffff80000000
 169#define KERN_DYN_BOT            0xfffffff000000000
 170
 171/* Virtual page table.  Every PML4 has a PTE at the slot (PML4(VPT))
 172 * corresponding to VPT that points to that PML4's base.  In essence, the 512
 173 * GB chunk of the VA space from VPT..VPT_TOP is a window into the paging
 174 * structure.
 175 *
 176 * The VPT needs to be aligned on 39 bits.
 177 *
 178 * Ex: Say the VPT's entry in 9 bits is "9v".  If you construct a VA from:
 179 * 9v9v9v9v000, the paging hardware will recurse 4 times, with the end result
 180 * being the PML4.  That virtual address will map to the PML4 itself.
 181 *
 182 * If you want to see a specific PML3, figure out which entry it is in the
 183 * PML4 (using PML3(va)), say 9 bits = "9X".  The VA 9v9v9v9X000 will map to
 184 * that PML3. */
 185#define VPT_TOP                 0xffffff0000000000
 186#define VPT                     (VPT_TOP - PML4_PTE_REACH)
 187/* Helper to return the current outer pgdir via the VPT mapping. */
 188#define PML4_VIA_VPT (VPT + ((VPT & 0x0000ffffffffffff) >> 9) +                \
 189                     ((VPT & 0x0000ffffffffffff) >> 18) +                      \
 190                     ((VPT & 0x0000ffffffffffff) >> 27))
 191
 192/* Top of the kernel virtual mapping area (KERNBASE) */
 193#define KERN_VMAP_TOP   (VPT)
 194/* Base of the physical memory map. This maps from 0 physical to max_paddr */
 195#define KERNBASE                0xffff800000000000
 196
 197/* Highest user address: 0x00007fffffffffff: 1 zero, 47 ones, sign extended.
 198 * From here down to UWLIM is User Read-only */
 199#define ULIM                    0x0000800000000000
 200/* Same as VPT but read-only for users */
 201#define UVPT                    (ULIM - PML4_PTE_REACH)
 202/* Arbitrary boundary between the break and the start of
 203 * memory returned by calls to mmap with addr = 0 */
 204#define BRK_END                 0x0000300000000000
 205/* Arbitrary boundary where the break (glibc's heap) starts.  You can safely
 206 * mmap with MAP_FIXED below this address. */
 207#define BRK_START               0x0000100000000000
 208
 209/* **************************************** */
 210/* Page table constants, macros, etc */
 211
 212/* A linear address 'la' has a five-part structure as follows:
 213 *
 214 * +-----9------+-----9------+-----9------+-----9------+---------12----------+
 215 * | PML4 bits  | PML3 bits  | PML2 bits  | PML1 bits  |     Page offset     |
 216 * |   offset   |   offset   |   offset   |   offset   |                     |
 217 * +------------+------------+------------+------------+---------------------+
 218 *  \ PML4(la) / \ PML3(la) / \ PML2(la) / \ PML1(la) / \---- PGOFF(la) ----/
 219 *  \------------------ LA2PPN(la) -------------------/
 220 *
 221 * The PMLx, PGOFF, and LA2PPN macros decompose linear addresses as shown.
 222 * To construct a linear address la from these, use:
 223 * PGADDR(PML4(la), PML3(la), PML2(la), PML1(la), PGOFF(la)).
 224 * Careful, that's arch- and bit-specific.
 225 *
 226 * I'd somewhat like it if we started counting from the outer-most PT, though
 227 * amd coined the term PML4 for the outermost, instead of PML1.  Incidentally,
 228 * they also don't use numbers other than PML4, sticking with names like PDP. */
 229
 230#define PML4_SHIFT              39
 231#define PML3_SHIFT              30
 232#define PML2_SHIFT              21
 233#define PML1_SHIFT              12
 234#define BITS_PER_PML            9
 235
 236/* PTE reach is the amount of VM an entry can map, either as a jumbo or as
 237 * further page tables.  I'd like to write these as shifts, but I can't please
 238 * both the compiler and the assembler. */
 239#define PML4_PTE_REACH  (0x0000008000000000)    /* No jumbos available */
 240#define PML3_PTE_REACH  (0x0000000040000000)    /* 1 GB jumbos available */
 241#define PML2_PTE_REACH  (0x0000000000200000)    /* 2 MB jumbos available */
 242#define PML1_PTE_REACH  (0x0000000000001000)    /* aka, PGSIZE */
 243
 244/* Reach is the amount of VM a table can map, counting all of its entries.
 245 * Note that a PML(n)_PTE is a PML(n-1) table. */
 246#define PML4_REACH              (512ULL * PML4_PTE_REACH)
 247#define PML3_REACH              (PML4_PTE_REACH)
 248#define PML2_REACH              (PML3_PTE_REACH)
 249#define PML1_REACH              (PML2_PTE_REACH)
 250
 251/* PMLx(la, shift) gives the 9 bits specifying the la's entry in the PML
 252 * corresponding to shift.  PMLn(la) gives the 9 bits for PML4, etc. */
 253#define PMLx(la, shift) (((uintptr_t)(la) >> (shift)) & 0x1ff)
 254#define PML4(la)                PMLx(la, PML4_SHIFT)
 255#define PML3(la)                PMLx(la, PML3_SHIFT)
 256#define PML2(la)                PMLx(la, PML2_SHIFT)
 257#define PML1(la)                PMLx(la, PML1_SHIFT)
 258
 259/* Common kernel helpers */
 260#define PGSHIFT                 PML1_SHIFT
 261#define PGSIZE                  PML1_PTE_REACH
 262#define LA2PPN(la)              ((uintptr_t)(la) >> PGSHIFT)
 263#define PTE2PPN(pte)            LA2PPN(pte)
 264#define PGOFF(la)               ((uintptr_t)(la) & (PGSIZE - 1))
 265#define NPTENTRIES              512
 266
 267/* This is used in places (procinfo) meaning "size of smallest jumbo page" */
 268#define PTSIZE PML2_PTE_REACH
 269
 270/* Page table/directory entry flags. */
 271
 272/* Some things to be careful of:  Global and PAT only apply to the last PTE in
 273 * a chain: so either a PTE in PML1, or a Jumbo PTE in PML2 or 3.  When PAT
 274 * applies, which bit we use depends on whether we are jumbo or not.  For PML1,
 275 * PAT is bit 7.  For jumbo PTEs (and only when they are for a jumbo page), we
 276 * use bit 12. */
 277#define PTE_P                   (1 << 0)        /* Present */
 278#define PTE_W                   (1 << 1)        /* Writeable */
 279#define PTE_U                   (1 << 2)        /* User */
 280#define __PTE_PWT               (1 << 3)        /* Write-Through */
 281#define __PTE_PCD               (1 << 4)        /* Cache-Disable */
 282#define PTE_A                   (1 << 5)        /* Accessed */
 283#define PTE_D                   (1 << 6)        /* Dirty */
 284#define PTE_PS                  (1 << 7)        /* Page Size */
 285#define __PTE_PAT               (1 << 7)        /* Page attribute table */
 286#define PTE_G                   (1 << 8)        /* Global Page */
 287#define __PTE_JPAT              (1 << 12)       /* Jumbo PAT */
 288#define PTE_XD                  (1 << 63)       /* Execute disabled */
 289#define PTE_NOCACHE             (__PTE_PWT | __PTE_PCD)
 290#define PTE_WRITECOMB           (__PTE_PCD)
 291
 292/* Permissions fields and common access modes.  These should be read as 'just
 293 * kernel or user too' and 'RO or RW'.  USER_RO means read-only for everyone. */
 294#define PTE_PERM                (PTE_W | PTE_U | PTE_P)
 295#define PTE_KERN_RW             (PTE_W | PTE_P)
 296#define PTE_KERN_RO             PTE_P
 297#define PTE_USER_RW             (PTE_W | PTE_U | PTE_P)
 298#define PTE_USER_RO             (PTE_U | PTE_P)
 299#define PTE_NONE                0
 300
 301/* The PTE/translation part of a PTE/virtual(linear) address.  It's used
 302 * frequently to be the page address of a virtual address.  Note this doesn't
 303 * work on jumbo PTEs due to the reserved bits.  Jumbo's don't have a PTE_ADDR
 304 * in them - just a base address of wherever they point. */
 305#define PTE_ADDR(pte)   ((physaddr_t) (pte) & ~(PGSIZE - 1))
 306/* More meaningful macro, same as PTE_ADDR */
 307#define PG_ADDR(la)     ((uintptr_t)(la) & ~(PGSIZE - 1))
 308
 309/* we must guarantee that for any PTE, exactly one of the following is true */
 310#define PAGE_PRESENT(pte) ((pte) & PTE_P)
 311#define PAGE_UNMAPPED(pte) ((pte) == 0)
 312#define PAGE_PAGED_OUT(pte) (0) /* Need to use an OS reserved PTE bit */
 313
 314
 315/* **************************************** */
 316/* Segmentation */
 317
 318/* Global descriptor numbers */
 319#define GD_NULL                 0x00    /* NULL descriptor */
 320#define GD_KT                   0x08    /* kernel text */
 321#define GD_KD                   0x10    /* kernel data */
 322/* syscall/sysret wants UD before UT, but KT before KD.  it really wants UT32,
 323 * UD, UT64.  anyways... */
 324#define GD_UD                   0x18    /* user data */
 325#define GD_UT                   0x20    /* user text */
 326#define GD_TSS                  0x28    /* Task segment selector */
 327#define GD_TSS2                 0x30    /* Placeholder, TSS is 2-descriptors wide */
 328/* These two aren't in the GDT yet (might never be) */
 329#define GD_LDT                  0x38    /* Local descriptor table */
 330#define GD_LDT2                 0x40    /* Placeholder */
 331
 332#ifdef __ASSEMBLER__
 333
 334/* Macros to build GDT entries in assembly. */
 335#define SEG_NULL                                                \
 336        .word 0, 0;                                             \
 337        .byte 0, 0, 0, 0
 338
 339/* 64 bit code segment.  This is for long mode, no compatibility.  If we want
 340 * to support 32 bit apps later, we'll want to adjust this. */
 341#define SEG_CODE_64(dpl)                                                    \
 342        .word 0, 0;                                                         \
 343        .byte 0;                                                            \
 344        .byte (((1/*p*/) << 7) | ((dpl) << 5) | 0x18 | ((0/*c*/) << 2));    \
 345        .byte (((0/*d*/) << 6) | ((1/*l*/) << 5));                          \
 346        .byte 0;
 347
 348/* 64 bit data segment.  These are pretty much completely ignored (except if we
 349 * use them for fs/gs, or compatibility mode */
 350#define SEG_DATA_64(dpl)                                                    \
 351        .word 0xffff, 0;                                                    \
 352        .byte 0;                                                            \
 353        .byte (0x92 | ((dpl) << 5));                                        \
 354        .byte 0x8f;                                                         \
 355        .byte 0;
 356
 357/* System segments (TSS/LDT) are twice as long as usual (16 bytes). */
 358#define SEG_SYS_64(type, base, lim, dpl)                                       \
 359        .word ((lim) & 0xffff);                                                \
 360        .word ((base) & 0xffff);                                               \
 361        .byte (((base) >> 16) & 0xff);                                         \
 362        .byte ((1 << 7) | ((dpl) << 5) | (type));                              \
 363        .byte (((1/*g*/) << 7) | (((lim) >> 16) & 0xf));                       \
 364        .byte (((base) >> 24) & 0xff);                                         \
 365        .quad ((base) >> 32);                                                  \
 366        .quad 0;
 367
 368/* Default segment (32 bit style).  Would work for fs/gs, if needed */
 369#define SEG(type, base, lim)                                                \
 370        .word (((lim) >> 12) & 0xffff);                                     \
 371        .word ((base) & 0xffff);                                            \
 372        .byte (((base) >> 16) & 0xff);                                      \
 373        .byte (0x90 | (type));                                              \
 374        .byte (0xC0 | (((lim) >> 28) & 0xf));                               \
 375        .byte (((base) >> 24) & 0xff)
 376
 377#else   // not __ASSEMBLER__
 378
 379/* Legacy Segment Descriptor (used for 64 bit data and code) */
 380typedef struct Segdesc {
 381        unsigned sd_lim_15_0 : 16;  // Low bits of segment limit
 382        unsigned sd_base_15_0 : 16; // Low bits of segment base address
 383        unsigned sd_base_23_16 : 8; // Middle bits of segment base address
 384        unsigned sd_type : 4;       // Segment type (see STS_ constants)
 385        unsigned sd_s : 1;          // 0 = system, 1 = application
 386        unsigned sd_dpl : 2;        // Descriptor Privilege Level
 387        unsigned sd_p : 1;          // Present
 388        unsigned sd_lim_19_16 : 4;  // High bits of segment limit
 389        unsigned sd_avl : 1;        // Unused (available for software use)
 390        unsigned sd_rsv1 : 1;       // Reserved
 391        unsigned sd_db : 1;         // 0 = 16-bit segment, 1 = 32-bit segment
 392        unsigned sd_g : 1;          // Granularity: limit scaled by 4K when set
 393        unsigned sd_base_31_24 : 8; // High bits of segment base address
 394} segdesc_t;
 395
 396/* Lower half is similar (more ignored, etc)  to a legacy system descriptor */
 397struct x86_sysseg64 {
 398        unsigned sd_lim_15_0 : 16;      /* Low bits of segment limit */
 399        unsigned sd_base_15_0 : 16;     /* Low bits of segment base address */
 400        unsigned sd_base_23_16 : 8;     /* Middle bits of segment base address */
 401        unsigned sd_type : 4;           /* Segment type (see STS_ constants) */
 402        unsigned sd_s : 1;              /* 0 = system, 1 = application */
 403        unsigned sd_dpl : 2;            /* Descriptor Privilege Level */
 404        unsigned sd_p : 1;              /* Present */
 405        unsigned sd_lim_19_16 : 4;      /* High bits of segment limit */
 406        unsigned sd_avl : 1;            /* Unused (available for software ) */
 407        unsigned sd_rsv2 : 2;           /* Reserved */
 408        unsigned sd_g : 1;      /* Granularity: limit scaled by 4K when set */
 409        unsigned sd_base_31_24 : 8;     /* 24-31 bits of segment base address */
 410        unsigned sd_base_63_32;         /* top 32 bits of the base */
 411        unsigned sd_reserved;   /* some parts must be zero, just zero it all */
 412};
 413typedef struct x86_sysseg64 syssegdesc_t;
 414
 415/* G(ranularity) determines if the limit is shifted */
 416#define __SEG_SYS64(type, base, lim, dpl, g)                                   \
 417{ ((lim) >> ((g) * 12)) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff,      \
 418    type, 0, dpl, 1, (unsigned) (lim) >> 28, 0, 0, (g),                        \
 419    ((unsigned) (base) >> 24) & 0xff,                                          \
 420    ((unsigned long) (base) >> 32), 0 }
 421
 422/* Normal system segment descriptor (LDT or TSS). (limit is scaled by 4k) */
 423#define SEG_SYS64(type, base, lim, dpl)                                        \
 424        __SEG_SYS64(type, base, lim, dpl, 1)
 425
 426/* Smaller system segment descriptor (LDT or TSS). */
 427#define SEG_SYS64_SMALL(type, base, lim, dpl)                                  \
 428        __SEG_SYS64(type, base, lim, dpl, 0)
 429
 430#define SEG_SYS_SMALL(type, base, lim, dpl) \
 431        SEG_SYS64_SMALL(type, base, lim, dpl)
 432
 433/* 64 bit task state segment (AMD 2:12.2.5) */
 434typedef struct taskstate {
 435        uint32_t                ts_rsv1;        /* reserved / ignored */
 436        uint64_t                ts_rsp0;        /* stack ptr in ring 0 */
 437        uint64_t                ts_rsp1;        /* stack ptr in ring 1 */
 438        uint64_t                ts_rsp2;        /* stack ptr in ring 2 */
 439        uint64_t                ts_rsv2;        /* reserved / ignored */
 440        uint64_t                ts_ist1;        /* IST: unconditional rsp */
 441        uint64_t                ts_ist2;        /* check AMD 2:8.9.4 for info */
 442        uint64_t                ts_ist3;
 443        uint64_t                ts_ist4;
 444        uint64_t                ts_ist5;
 445        uint64_t                ts_ist6;
 446        uint64_t                ts_ist7;
 447        uint64_t                ts_rsv3;        /* reserved / ignored */
 448        uint16_t                ts_rsv4;        /* reserved / ignored */
 449        uint16_t                ts_iobm;        /* IO base map (offset) */
 450} __attribute__((packed)) taskstate_t;
 451
 452/* 64 bit gate descriptors for interrupts and traps */
 453typedef struct Gatedesc {
 454        unsigned gd_off_15_0 : 16;      /* low 16 bits of offset in segment */
 455        unsigned gd_ss : 16;            /* segment selector */
 456        unsigned gd_ist : 3;    /* interrupt stack table selector (0 = none) */
 457        unsigned gd_rsv1 : 5;           /* ignored */
 458        unsigned gd_type : 4;           /* type(STS_{TG,IG32,TG32}) */
 459        unsigned gd_s : 1;              /* must be 0 (system) */
 460        unsigned gd_dpl : 2;    /* DPL - highest ring allowed to use this */
 461        unsigned gd_p : 1;              /* Present */
 462        unsigned gd_off_31_16 : 16;     /* 16-31 bits of offset in segment */
 463        unsigned gd_off_63_32;          /* top 32 bits of offset */
 464        unsigned gd_rsv2;               /* reserved / unsused */
 465} gatedesc_t;
 466
 467/* Set up an IST-capable 64 bit interrupt/trap gate descriptor.  IST selects a
 468 * stack pointer from the interrupt-stack table (in TSS) that will be loaded
 469 * unconditionally when we hit this gate  - regardless of privelege change. */
 470#define SETGATE64(gate, istrap, sel, off, dpl, ist)                            \
 471{                                                                              \
 472        (gate).gd_off_15_0 = (uintptr_t) (off) & 0xffff;                       \
 473        (gate).gd_ss = (sel);                                                  \
 474        (gate).gd_ist = (ist);                                                 \
 475        (gate).gd_rsv1 = 0;                                                    \
 476        (gate).gd_type = (istrap) ? STS_TG32 : STS_IG32;                       \
 477        (gate).gd_s = 0;                                                       \
 478        (gate).gd_dpl = (dpl);                                                 \
 479        (gate).gd_p = 1;                                                       \
 480        (gate).gd_off_31_16 = (uintptr_t) (off) >> 16;                         \
 481        (gate).gd_off_63_32 = (uintptr_t) (off) >> 32;                         \
 482        (gate).gd_rsv2 = 0;                                                    \
 483}
 484
 485/* Set up a normal, 64 bit interrupt/trap gate descriptor.
 486 * - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate.
 487 *   - interrupt gates automatically disable interrupts (cli)
 488 * - sel: Code segment selector for interrupt/trap handler
 489 * - off: Offset in code segment for interrupt/trap handler (address)
 490 * - dpl: Descriptor Privilege Level -
 491 *        the privilege level required for software to invoke
 492 *        this interrupt/trap gate explicitly using an int instruction. */
 493#define SETGATE(gate, istrap, sel, off, dpl)                                   \
 494        SETGATE64(gate, istrap, sel, off, dpl, 0)
 495
 496// Pseudo-descriptors used for LGDT, LLDT and LIDT instructions.
 497typedef struct Pseudodesc {
 498        uint16_t pd_lim;                // Limit
 499        uintptr_t pd_base;              // Base address
 500} __attribute__ ((packed)) pseudodesc_t;
 501
 502#endif /* !__ASSEMBLER__ */
 503
 504// Application segment type bits
 505#define STA_X           0x8         // Executable segment
 506#define STA_E           0x4         // Expand down (non-executable segments)
 507#define STA_C           0x4         // Conforming code segment (executable only)
 508#define STA_W           0x2         // Writeable (non-executable segments)
 509#define STA_R           0x2         // Readable (executable segments)
 510#define STA_A           0x1         // Accessed
 511
 512/* System segment type bits.  All other types are reserved/illegal.  The '32' is
 513 * mostly a legacy naming - the bits work for both 64 and 32. */
 514#define STS_LDT         0x2             /* 64-bit Local Descriptor Table  */
 515#define STS_T32A        0x9             /* Available 64-bit TSS */
 516#define STS_T32B        0xB             /* Busy 64-bit TSS */
 517#define STS_CG32        0xC             /* 64-bit Call Gate */
 518#define STS_IG32        0xE             /* 64-bit Interrupt Gate */
 519#define STS_TG32        0xF             /* 64-bit Trap Gate */
 520
 521#define SEG_COUNT       7               /* Number of GDT segments */
 522/* TODO: Probably won't use this */
 523#define LDT_SIZE        (8192 * sizeof(segdesc_t))
 524
 525/* TLS 'syscall', coupled to trapentry64.S.  Needed a non-canon 'addr' */
 526#define FASTCALL_SETFSBASE 0xf0f0000000000001
 527